#!/usr/bin/python2.3 # # `de_obf_helper.py` # # Helper for deobfuscation of Javascript code. # # Author: follower@myrealbox.com # # Copyright: 2005 # # License: GPL 2.0 # # Version: 0.1.0 # # MOTD: Are you all *insane*??? # # # Example usage: (Using Google Maps code) # # Note: All code must be pretty-printed in the files. # # * Copy file `maps.1.js` # # * Execute ('-d' = generate documention): # # ./de_obf_helper.py -d maps.1.js > maps.1.html # # * Generates or updates these files: # # maps.1.html funcs-maps.1.js.txt maps.1.js-linenums.html # # * Edit `funcs-maps.1.js.txt`, the format is: # # class/function nameargsdeobfuscated namedescription # # e.g.: # # Y Vc XSLT Handles XSLT processing # # * Re-run above '-d' command to refresh documentation. # # * Then, when target code has been modified and obfuscated differently: # # * Copy file `maps.2.js` # # * Execute ('-u' = upgrade functions/class metadata to new names): # # ./de_obf_helper.py -u maps.1.js maps.2.js # # * Generates or updates: # # funcs-maps.2.js.txt # # * Execute: # # ./de_obf_helper.py -d maps.2.js > maps.2.html # # and documentation matching the most recent obfuscation is generated. # # Note: The "upgrade" step is not perfect, it uses various heuristics to # attempt to identify the same function/class in each file. # # Note: This code is very rough, with hard coded stuff all over the place, # but it does the job for me so I figured I'd throw it up on the net. # Perhaps a little too literally... :-) # import os import re import sys import sets import shutil import difflib import textwrap RE_FUNCTION_DEF = re.compile("function\s+?(.+?)\((.*?)\)") #RE_FUNCTION_DEF = re.compile("^function\s+?(.+?)\((.*?)\)", re.DOTALL | re.MULTILINE) RE_METHOD_DEF = re.compile("^(\w+?)\.([\w.]+?)=function\((.*?)\)", re.MULTILINE) # TODO: Make this all lazy, but cached. class SourcePropertyMixin(object): # TODO: Abstract out more functionality? """ """ def _getSource(self): """ """ return self._source[self.startIdx:self.endIdx] source = property(_getSource, doc = "") class Function(SourcePropertyMixin, object): # TODO: Rename? & Class-ify? """ """ def __init__(self, functionName, functionArgs, functionNewName = "", functionDescription = "", text=None, lineNumber = None, startIdx = None, endIdx = None, source = None): """ """ self.name = functionName self.args = functionArgs self.newName = functionNewName self.description = functionDescription self.text = text self.lineNumber = lineNumber # Offset of function source within the source file. self.startIdx = startIdx self.endIdx = endIdx self._source = source self.methods = [] self.otherName = "" # TODO: Handle this better. Key by file name? def update(self, source): """ * `source` another Function instance. """ #if source.name == self.name: for attr in ["args", "newName", "description"]: #TODO:Improve this? newValue = getattr(source, attr) if newValue: setattr(self, attr, newValue) #else: # raise Exception("Functions are not of the same name") def asExportString(self): """ """ return "\t".join([self.name, self.args, self.newName, self.description]) def _getSignature(self): """ """ return [m.name for m in self.methods] signature = property(_getSignature, doc="") # TODO: Make this static? def functionFromString(metaData): """ * metaData - Tab delimited funcName, funcNewName, funcArgs, funcDesc """ funcName, funcArgs, funcNewName, funcDesc = metaData.split("\t") return Function(funcName, funcArgs, funcNewName, funcDesc) class Method(SourcePropertyMixin, object): """ """ def __init__(self, methodName, methodArgs, text=None, lineNumber = None, startIdx = None, endIdx = None, source = None): """ """ # TODO: Record if 'prototype'? self.name = methodName.replace("prototype.", "") self.args = methodArgs self.text = text self.lineNumber = lineNumber # Offset of function source within the source file. self.startIdx = startIdx self.endIdx = endIdx self._source = source def exportFuncs(filename, funcs): """ """ if os.path.exists(filename): shutil.copy(filename, "%s.bak" % filename) for line in open(filename): funcMetaData = functionFromString(line[:-1]) # TODO: Improve this... try: funcs[funcMetaData.name].update(funcMetaData) except KeyError: # TODO: Only catch missing funcName? funcs[funcMetaData.name] = funcMetaData fh = open(filename, "w") for funcMetaData in funcs.values(): fh.write("%s\n" % funcMetaData.asExportString()) fh.close() #BASE_RE_FUNCTION_REFERENCES = "([\w.])+?\s*?=\s*?%s\((.+?)\)" #BASE_RE_FUNCTION_REFERENCES = "^[^\n]*?([\w.])+?\s*?=\s*(.*?)\s*%s\((.*?)\).*?$" # TODO: Check this matches all references correctly. #BASE_RE_FUNCTION_REFERENCES = "^.*?\W%s(\((.*?)\).*?$|($|\W.*?$))" BASE_RE_FUNCTION_REFERENCES = "^.*?\W%s\((.*?)\).*?$" #BASE_RE_FUNCTION_REFERENCES = "^.*?\W%s(\((.*?)\).*?$|[\s;]*?$)" def getReferences(source, funcName): """ """ re_functionReference = re.compile(BASE_RE_FUNCTION_REFERENCES % funcName, re.MULTILINE) return [match.group(0) for match in re.finditer(re_functionReference, source) if not match.group(0).startswith("function")] FORMAT_HEADING = '%s' def heading(level, klass, content, id_ = None): """ """ format = FORMAT_HEADING if id_: # Ewww... format = format.replace(' ', ' id="%s" ' % id_ , 1) return format % (level, klass, content, level) def paragraph(klass, content): """ """ return '

%s

' % (klass, content) def href(url, text): """ """ return '%s' % (url, text) def table(datalines, columns): """ """ parts = [] parts.append('') numPerCol = len(datalines) / columns count = 0 while count <= len(datalines): parts.append('' % (100 / columns, "
\n".join(datalines[count:count+numPerCol]))) count += numPerCol parts.append('
%s
') return "\n".join(parts) # TODO: Find the proper one... def escape(text): """ """ return text.replace("&", "&").replace("<","<") def referenceList(func, references): """ """ MAX_REFS_TO_DISPLAY = 5 parts = [] parts.append(heading(3, "references", "References:")) uniqueReferences = list(sets.Set([r.strip() for r in references])) parts.append("
    ") for reference in uniqueReferences[:MAX_REFS_TO_DISPLAY]: if func.newName: # TODO: Check this substitution handles all cases. reference = re.sub("(\W|^)%s(\W|$)" % func.name, r"\1%s\2" % func.newName.replace("~",""), reference) parts.append('
  • %s
  • ' % escape(reference)) if len(uniqueReferences) > MAX_REFS_TO_DISPLAY: parts.append('
  • ...
  • ') parts.append("
") return "\n".join(parts) def formatFunction(f, sourceURL): """ """ parts = [] name = f.newName.replace("~","") or f.name parts.append(heading(2, "fname", '%s [%s]' % (name, sourceURL, f.lineNumber, f.name), name)) parts.append(heading(3, "section", "Args:")) parts.append(paragraph("args", f.args.replace(",",", "))) #TODO: Change formatting. parts.append(paragraph("desc", f.description)) if f.methods: parts.append(heading(3, "section", "Methods:")) for m in f.methods: parts.append(paragraph("method", "%s(%s)" % (m.name, m.args))) return (href("#%s" % name, name) , "\n".join(parts)) def extractFunctionInfo(source): """ """ funcs = {} for match in RE_FUNCTION_DEF.finditer(source): text = match.group(0) funcName, args = match.groups() startIdx = source.index(text) lineNumber = source[:startIdx].count("\n") dummy, endIdx = getBlockOffsets(source, startIdx) funcs[funcName] = Function(funcName, args, text=text, lineNumber=lineNumber, startIdx=startIdx, endIdx=endIdx, source=source) #print "Functions found: %d" % (len(funcs)) #print funcs.keys() for match in RE_METHOD_DEF.finditer(source): text = match.group(0) parent, methodName, args = match.groups() try: func = funcs[parent] except KeyError: # It's adding to builtin object... #print parent #TODO: Output this... pass else: startIdx = source.index(text) lineNumber = source[:startIdx].count("\n") dummy, endIdx = getBlockOffsets(source, startIdx) func.methods.append(Method(methodName, args, text=text, lineNumber=lineNumber, startIdx=startIdx, endIdx=endIdx, source=source)) return funcs def getBlockOffsets(source, startIdx): """ """ startIdx = idx = source.index("{", startIdx) # TODO: Catch ValueError when no more? obcount = 1 while obcount !=0: idx += 1 try: nextobIdx = source.index("{", idx) except ValueError: nextobIdx = len(source) try: nextcbIdx = source.index("}", idx, nextobIdx) except ValueError: idx = nextobIdx obcount += 1 continue if nextobIdx < nextcbIdx: idx = nextobIdx obcount += 1 else: idx = nextcbIdx obcount -= 1 endIdx = idx + 1 return (startIdx, endIdx) def formatLine(lineNumber, text): """ """ return '%s' % (lineNumber, escape(text)) def generateNumberedSource(sourceFilename, outputFilename): """ """ parts = [] parts.append("

")
    for lineNumber, text in enumerate(open(sourceFilename)):
        parts.append(formatLine(lineNumber, text))
    parts.append("

") if os.path.exists(outputFilename): shutil.copy(outputFilename, "%s.bak" % outputFilename) open(outputFilename, "w").write( "".join(parts)) def generateDocumentation(source, funcs, sourceURL): """ """ print """ Google Maps Classes and Functions Reference """ print """ """ print "" print """

Google Maps Classes and Functions Reference

Rough initial version. Based on original annotation from http://spaces.msn.com/members/sompost/ with additions.

Original name (as listed in %s) is given in square brackets and linked to the original source. Meta data is from Google Maps meta data file. Generated by Javascript deobfuscation helper.

""" % (sourceURL.replace("-linenums.html",""), "funcs-%s.txt" % sourceURL.replace("-linenums.html","")) sections = [] toc = [] sortedFuncs = [(f.newName.lower() or f.name.lower(), f) for f in funcs.values()] sortedFuncs.sort() for dummy, f in sortedFuncs: tocEntry, section = formatFunction(f, sourceURL) toc.append(tocEntry) sections.append(section) references = getReferences(source, f.name) if references: sections.append(referenceList(f, references)) print "

" print table(toc, 4) print "

" print "\n".join(sections) print "" def usage(): """ """ print "Usage: %s (-d | -u )" % sys.argv[0] raise SystemExit # TODO: Make this method of Function? def getArgsSig(f): """ Based on function/constructor args? """ if f.args: fargs = f.args.split(",") else: fargs = [] return fargs def findSignatureMatches(targetFunction, allFunctions): """ """ # TODO: Refactor copy & pasting... # TODO: Cache any of this stuff? sigMatches = [] # Method signatures for mf in allFunctions: if mf.signature == targetFunction.signature: # This is strongest match so we not check anything else? sigMatches.append(mf) # Constructor/function arg count signatures if len(sigMatches) > 1: for mf in sigMatches: if not (len(getArgsSig(mf)) == len(getArgsSig(targetFunction))): sigMatches.remove(mf) # Constructor/function source code match if len(sigMatches) > 1: matchSources = [mf.source for mf in sigMatches] closeSources = difflib.get_close_matches(f.source, matchSources, n = 2) if len(closeSources) == 1: sigMatches = [sigMatches[matchSources.index(closeSources[0])]] # TODO: Handle multiple closematches? # First method source code match if (len(sigMatches) > 1) and f.methods: # TODO: Check methods are present. # TODO: Check against all methods? matchSources = [mf.methods[0].source for mf in sigMatches] closeSources = difflib.get_close_matches(f.methods[0].source, matchSources, n = 2) if len(closeSources) == 1: sigMatches = [sigMatches[matchSources.index(closeSources[0])]] # TODO: Handle multiple close matches? # Close method signatures if not sigMatches: allSigs = [mf.signature for mf in allFunctions] closeSigs = difflib.get_close_matches(f.signature, allSigs, n = 2) if len(closeSigs) == 1: sigMatches = [allFunctions[allSigs.index(closeSigs[0])]] # TODO: Handle multiple close matches? return sigMatches if __name__ == "__main__": try: option = sys.argv[1] except IndexError: usage() else: if option == "-d": try: sourceFilename1 = sys.argv[2] except IndexError: usage() elif option =="-u": try: sourceFilename1 = sys.argv[2] sourceFilename2 = sys.argv[3] except IndexError: usage() else: usage() source1 = open(sourceFilename1).read() funcs1 = extractFunctionInfo(source1) exportFuncs("funcs-%s.txt" % sourceFilename1, funcs1) if option == "-d": # "d"ocument linenumsFilename = "%s-linenums.html" % sourceFilename1 generateNumberedSource(sourceFilename1, linenumsFilename) generateDocumentation(source1, funcs1, linenumsFilename) elif option == "-u": # "u"pgrade source2 = open(sourceFilename2).read() funcs2 = extractFunctionInfo(source2) allFuncs1 = funcs1.values() allFuncs2 = funcs2.values() itemMatched = True while allFuncs1 and allFuncs2 and itemMatched: itemMatched = False for f in allFuncs1[:]: sigMatches = findSignatureMatches(f, allFuncs2) if len(sigMatches) == 1: itemMatched = True matchedF = sigMatches[0] allFuncs2.remove(matchedF) allFuncs1.remove(f) matchedF.update(f) # TODO: Allow for manual confirmation. #print "\n------------------------------" #print f.name, matchedF.name, f.newName #print f.source #print matchedF.source if allFuncs1 and not allFuncs2: print "%d items exist in old file but not in new file." % \ len(allFuncs1) if allFuncs2 and not allFuncs1: print "%d items exist in new file but not in old file." % \ len(allFuncs2) if allFuncs1 and allFuncs2: print "%d items in old file and "\ "%d items in new file not matched."% (len(allFuncs1), len(allFuncs2)) exportFuncs("funcs-%s.txt" % sourceFilename2, funcs2) else: usage()