#!/usr/bin/python2.3
#
# `de_obf_helper.py`
#
# Helper for deobfuscation of Javascript code.
#
# Author: follower@myrealbox.com
#
# Copyright: 2005
#
# License: GPL 2.0
#
# Version: 0.1.0
#
# MOTD: Are you all *insane*???
#
#
# Example usage: (Using Google Maps code)
#
# Note: All code must be pretty-printed in the files.
#
# * Copy file `maps.1.js`
#
# * Execute ('-d' = generate documention):
#
# ./de_obf_helper.py -d maps.1.js > maps.1.html
#
# * Generates or updates these files:
#
# maps.1.html funcs-maps.1.js.txt maps.1.js-linenums.html
#
# * Edit `funcs-maps.1.js.txt`, the format is:
#
# class/function nameargsdeobfuscated namedescription
#
# e.g.:
#
# Y Vc XSLT Handles XSLT processing
#
# * Re-run above '-d' command to refresh documentation.
#
# * Then, when target code has been modified and obfuscated differently:
#
# * Copy file `maps.2.js`
#
# * Execute ('-u' = upgrade functions/class metadata to new names):
#
# ./de_obf_helper.py -u maps.1.js maps.2.js
#
# * Generates or updates:
#
# funcs-maps.2.js.txt
#
# * Execute:
#
# ./de_obf_helper.py -d maps.2.js > maps.2.html
#
# and documentation matching the most recent obfuscation is generated.
#
# Note: The "upgrade" step is not perfect, it uses various heuristics to
# attempt to identify the same function/class in each file.
#
# Note: This code is very rough, with hard coded stuff all over the place,
# but it does the job for me so I figured I'd throw it up on the net.
# Perhaps a little too literally... :-)
#
import os
import re
import sys
import sets
import shutil
import difflib
import textwrap
RE_FUNCTION_DEF = re.compile("function\s+?(.+?)\((.*?)\)")
#RE_FUNCTION_DEF = re.compile("^function\s+?(.+?)\((.*?)\)", re.DOTALL | re.MULTILINE)
RE_METHOD_DEF = re.compile("^(\w+?)\.([\w.]+?)=function\((.*?)\)", re.MULTILINE)
# TODO: Make this all lazy, but cached.
class SourcePropertyMixin(object): # TODO: Abstract out more functionality?
"""
"""
def _getSource(self):
"""
"""
return self._source[self.startIdx:self.endIdx]
source = property(_getSource, doc = "")
class Function(SourcePropertyMixin, object): # TODO: Rename? & Class-ify?
"""
"""
def __init__(self, functionName, functionArgs,
functionNewName = "", functionDescription = "",
text=None, lineNumber = None,
startIdx = None, endIdx = None,
source = None):
"""
"""
self.name = functionName
self.args = functionArgs
self.newName = functionNewName
self.description = functionDescription
self.text = text
self.lineNumber = lineNumber
# Offset of function source within the source file.
self.startIdx = startIdx
self.endIdx = endIdx
self._source = source
self.methods = []
self.otherName = "" # TODO: Handle this better. Key by file name?
def update(self, source):
"""
* `source` another Function instance.
"""
#if source.name == self.name:
for attr in ["args", "newName", "description"]: #TODO:Improve this?
newValue = getattr(source, attr)
if newValue:
setattr(self, attr, newValue)
#else:
# raise Exception("Functions are not of the same name")
def asExportString(self):
"""
"""
return "\t".join([self.name, self.args, self.newName,
self.description])
def _getSignature(self):
"""
"""
return [m.name for m in self.methods]
signature = property(_getSignature, doc="")
# TODO: Make this static?
def functionFromString(metaData):
"""
* metaData - Tab delimited funcName, funcNewName, funcArgs, funcDesc
"""
funcName, funcArgs, funcNewName, funcDesc = metaData.split("\t")
return Function(funcName, funcArgs, funcNewName, funcDesc)
class Method(SourcePropertyMixin, object):
"""
"""
def __init__(self, methodName, methodArgs,
text=None, lineNumber = None,
startIdx = None, endIdx = None,
source = None):
"""
"""
# TODO: Record if 'prototype'?
self.name = methodName.replace("prototype.", "")
self.args = methodArgs
self.text = text
self.lineNumber = lineNumber
# Offset of function source within the source file.
self.startIdx = startIdx
self.endIdx = endIdx
self._source = source
def exportFuncs(filename, funcs):
"""
"""
if os.path.exists(filename):
shutil.copy(filename, "%s.bak" % filename)
for line in open(filename):
funcMetaData = functionFromString(line[:-1])
# TODO: Improve this...
try:
funcs[funcMetaData.name].update(funcMetaData)
except KeyError: # TODO: Only catch missing funcName?
funcs[funcMetaData.name] = funcMetaData
fh = open(filename, "w")
for funcMetaData in funcs.values():
fh.write("%s\n" % funcMetaData.asExportString())
fh.close()
#BASE_RE_FUNCTION_REFERENCES = "([\w.])+?\s*?=\s*?%s\((.+?)\)"
#BASE_RE_FUNCTION_REFERENCES = "^[^\n]*?([\w.])+?\s*?=\s*(.*?)\s*%s\((.*?)\).*?$"
# TODO: Check this matches all references correctly.
#BASE_RE_FUNCTION_REFERENCES = "^.*?\W%s(\((.*?)\).*?$|($|\W.*?$))"
BASE_RE_FUNCTION_REFERENCES = "^.*?\W%s\((.*?)\).*?$"
#BASE_RE_FUNCTION_REFERENCES = "^.*?\W%s(\((.*?)\).*?$|[\s;]*?$)"
def getReferences(source, funcName):
"""
"""
re_functionReference = re.compile(BASE_RE_FUNCTION_REFERENCES % funcName,
re.MULTILINE)
return [match.group(0)
for match in re.finditer(re_functionReference, source)
if not match.group(0).startswith("function")]
FORMAT_HEADING = '%s '
def heading(level, klass, content, id_ = None):
"""
"""
format = FORMAT_HEADING
if id_:
# Ewww...
format = format.replace(' ', ' id="%s" ' % id_ , 1)
return format % (level, klass, content, level)
def paragraph(klass, content):
"""
"""
return '%s
' % (klass, content)
def href(url, text):
"""
"""
return '%s ' % (url, text)
def table(datalines, columns):
"""
"""
parts = []
parts.append('')
numPerCol = len(datalines) / columns
count = 0
while count <= len(datalines):
parts.append('%s ' % (100 / columns,
" \n".join(datalines[count:count+numPerCol])))
count += numPerCol
parts.append('
')
return "\n".join(parts)
# TODO: Find the proper one...
def escape(text):
"""
"""
return text.replace("&", "&").replace("<","<")
def referenceList(func, references):
"""
"""
MAX_REFS_TO_DISPLAY = 5
parts = []
parts.append(heading(3, "references", "References:"))
uniqueReferences = list(sets.Set([r.strip() for r in references]))
parts.append("")
for reference in uniqueReferences[:MAX_REFS_TO_DISPLAY]:
if func.newName:
# TODO: Check this substitution handles all cases.
reference = re.sub("(\W|^)%s(\W|$)" % func.name,
r"\1%s\2" % func.newName.replace("~",""),
reference)
parts.append('%s '
% escape(reference))
if len(uniqueReferences) > MAX_REFS_TO_DISPLAY:
parts.append('... ')
parts.append(" ")
return "\n".join(parts)
def formatFunction(f, sourceURL):
"""
"""
parts = []
name = f.newName.replace("~","") or f.name
parts.append(heading(2, "fname",
'%s [%s ]' %
(name, sourceURL, f.lineNumber, f.name), name))
parts.append(heading(3, "section", "Args:"))
parts.append(paragraph("args",
f.args.replace(",",", "))) #TODO: Change formatting.
parts.append(paragraph("desc", f.description))
if f.methods:
parts.append(heading(3, "section", "Methods:"))
for m in f.methods:
parts.append(paragraph("method", "%s(%s)" % (m.name, m.args)))
return (href("#%s" % name, name) , "\n".join(parts))
def extractFunctionInfo(source):
"""
"""
funcs = {}
for match in RE_FUNCTION_DEF.finditer(source):
text = match.group(0)
funcName, args = match.groups()
startIdx = source.index(text)
lineNumber = source[:startIdx].count("\n") + 1
dummy, endIdx = getBlockOffsets(source, startIdx)
funcs[funcName] = Function(funcName, args,
text=text, lineNumber=lineNumber,
startIdx=startIdx, endIdx=endIdx,
source=source)
#print "Functions found: %d" % (len(funcs))
#print funcs.keys()
for match in RE_METHOD_DEF.finditer(source):
text = match.group(0)
parent, methodName, args = match.groups()
try:
func = funcs[parent]
except KeyError:
# It's adding to builtin object...
#print parent #TODO: Output this...
pass
else:
startIdx = source.index(text)
lineNumber = source[:startIdx].count("\n")
dummy, endIdx = getBlockOffsets(source, startIdx)
func.methods.append(Method(methodName, args,
text=text,
lineNumber=lineNumber,
startIdx=startIdx, endIdx=endIdx,
source=source))
return funcs
def getBlockOffsets(source, startIdx):
"""
"""
startIdx = idx = source.index("{", startIdx) # TODO: Catch ValueError when no more?
obcount = 1
while obcount !=0:
idx += 1
try:
nextobIdx = source.index("{", idx)
except ValueError:
nextobIdx = len(source)
try:
nextcbIdx = source.index("}", idx, nextobIdx)
except ValueError:
idx = nextobIdx
obcount += 1
continue
if nextobIdx < nextcbIdx:
idx = nextobIdx
obcount += 1
else:
idx = nextcbIdx
obcount -= 1
endIdx = idx + 1
return (startIdx, endIdx)
def formatLine(lineNumber, text):
"""
"""
return ' %s' % (lineNumber, escape(text))
def generateNumberedSource(sourceFilename, outputFilename):
"""
"""
parts = []
parts.append("
")
for lineNumber, text in enumerate(open(sourceFilename)):
parts.append(formatLine(lineNumber, text))
parts.append("
")
if os.path.exists(outputFilename):
shutil.copy(outputFilename, "%s.bak" % outputFilename)
open(outputFilename, "w").write( "".join(parts))
def generateDocumentation(source, funcs, sourceURL):
"""
"""
print """
Google Maps Classes and Functions Reference
"""
print """
"""
print ""
print """
Google Maps Classes and Functions Reference
Rough initial version. Based on original annotation from http://spaces.msn.com/members/sompost/ with additions.
Original name (as listed in %s ) is given in square brackets and linked to the original source. Meta data is from Google Maps meta data file . Generated by Javascript deobfuscation helper .
""" % (sourceURL.replace("-linenums.html",""),
"funcs-%s.txt" % sourceURL.replace("-linenums.html",""))
sections = []
toc = []
sortedFuncs = [(f.newName.lower() or f.name.lower(), f)
for f in funcs.values()]
sortedFuncs.sort()
for dummy, f in sortedFuncs:
tocEntry, section = formatFunction(f, sourceURL)
toc.append(tocEntry)
sections.append(section)
references = getReferences(source, f.name)
if references:
sections.append(referenceList(f, references))
print ""
print table(toc, 4)
print "
"
print "\n".join(sections)
print ""
def usage():
"""
"""
print "Usage: %s (-d | -u )" % sys.argv[0]
raise SystemExit
# TODO: Make this method of Function?
def getArgsSig(f):
"""
Based on function/constructor args?
"""
if f.args:
fargs = f.args.split(",")
else:
fargs = []
return fargs
def findSignatureMatches(targetFunction, allFunctions):
"""
"""
# TODO: Refactor copy & pasting...
# TODO: Cache any of this stuff?
sigMatches = []
# Method signatures
for mf in allFunctions:
if mf.signature == targetFunction.signature:
# This is strongest match so we not check anything else?
sigMatches.append(mf)
# Constructor/function arg count signatures
if len(sigMatches) > 1:
for mf in sigMatches:
if not (len(getArgsSig(mf)) == len(getArgsSig(targetFunction))):
sigMatches.remove(mf)
# Constructor/function source code match
if len(sigMatches) > 1:
matchSources = [mf.source for mf in sigMatches]
closeSources = difflib.get_close_matches(f.source,
matchSources,
n = 2)
if len(closeSources) == 1:
sigMatches = [sigMatches[matchSources.index(closeSources[0])]]
# TODO: Handle multiple closematches?
# First method source code match
if (len(sigMatches) > 1) and f.methods:
# TODO: Check methods are present.
# TODO: Check against all methods?
matchSources = [mf.methods[0].source for mf in sigMatches]
closeSources = difflib.get_close_matches(f.methods[0].source,
matchSources,
n = 2)
if len(closeSources) == 1:
sigMatches = [sigMatches[matchSources.index(closeSources[0])]]
# TODO: Handle multiple close matches?
# Close method signatures
if not sigMatches:
allSigs = [mf.signature for mf in allFunctions]
closeSigs = difflib.get_close_matches(f.signature,
allSigs,
n = 2)
if len(closeSigs) == 1:
sigMatches = [allFunctions[allSigs.index(closeSigs[0])]]
# TODO: Handle multiple close matches?
return sigMatches
if __name__ == "__main__":
try:
option = sys.argv[1]
except IndexError:
usage()
else:
if option == "-d":
try:
sourceFilename1 = sys.argv[2]
except IndexError:
usage()
elif option =="-u":
try:
sourceFilename1 = sys.argv[2]
sourceFilename2 = sys.argv[3]
except IndexError:
usage()
else:
usage()
source1 = open(sourceFilename1).read()
funcs1 = extractFunctionInfo(source1)
exportFuncs("funcs-%s.txt" % sourceFilename1, funcs1)
if option == "-d": # "d"ocument
linenumsFilename = "%s-linenums.html" % sourceFilename1
generateNumberedSource(sourceFilename1, linenumsFilename)
generateDocumentation(source1, funcs1, linenumsFilename)
elif option == "-u": # "u"pgrade
source2 = open(sourceFilename2).read()
funcs2 = extractFunctionInfo(source2)
allFuncs1 = funcs1.values()
allFuncs2 = funcs2.values()
itemMatched = True
while allFuncs1 and allFuncs2 and itemMatched:
itemMatched = False
for f in allFuncs1[:]:
sigMatches = findSignatureMatches(f, allFuncs2)
if len(sigMatches) == 1:
itemMatched = True
matchedF = sigMatches[0]
allFuncs2.remove(matchedF)
allFuncs1.remove(f)
matchedF.update(f)
# TODO: Allow for manual confirmation.
#print "\n------------------------------"
#print f.name, matchedF.name, f.newName
#print f.source
#print matchedF.source
if allFuncs1 and not allFuncs2:
print "%d items exist in old file but not in new file." % \
len(allFuncs1)
if allFuncs2 and not allFuncs1:
print "%d items exist in new file but not in old file." % \
len(allFuncs2)
if allFuncs1 and allFuncs2:
print "%d items in old file and "\
"%d items in new file not matched."% (len(allFuncs1),
len(allFuncs2))
exportFuncs("funcs-%s.txt" % sourceFilename2, funcs2)
else:
usage()