From 16174505a42d94bc9765950e68de8a46a96965aa Mon Sep 17 00:00:00 2001 From: Mysterie Date: Thu, 11 Oct 2012 16:32:59 +0200 Subject: [PATCH] Cleaning code & patch --- setup.py | 3 +- test/test_loops.py | 13 +- test/test_tuples.py | 2 +- test_one | 16 - test_pythonlib => test_pythonlib.py | 82 +++--- scripts/uncompyle2 => uncompyle2.py | 4 +- uncompyle2/Scanner.py | 60 ---- uncompyle2/__init__.py | 147 ++++----- uncompyle2/disas.py | 24 +- uncompyle2/opcode/opcode_25.py | 20 +- uncompyle2/opcode/opcode_26.py | 20 +- uncompyle2/opcode/opcode_27.py | 20 +- uncompyle2/{Parser.py => parser.py} | 19 +- uncompyle2/scanner.py | 274 +++++++++++++++++ uncompyle2/{Scanner25.py => scanner25.py} | 336 ++++----------------- uncompyle2/{Scanner26.py => scanner26.py} | 344 +++++----------------- uncompyle2/{Scanner27.py => scanner27.py} | 277 +++-------------- uncompyle2/spark.py | 48 +-- uncompyle2/verify.py | 4 +- uncompyle2/{Walker.py => walker.py} | 103 +++---- 20 files changed, 713 insertions(+), 1103 deletions(-) delete mode 100755 test_one rename test_pythonlib => test_pythonlib.py (59%) rename scripts/uncompyle2 => uncompyle2.py (99%) delete mode 100755 uncompyle2/Scanner.py rename uncompyle2/{Parser.py => parser.py} (98%) create mode 100755 uncompyle2/scanner.py rename uncompyle2/{Scanner25.py => scanner25.py} (77%) rename uncompyle2/{Scanner26.py => scanner26.py} (77%) rename uncompyle2/{Scanner27.py => scanner27.py} (74%) rename uncompyle2/{Walker.py => walker.py} (96%) diff --git a/setup.py b/setup.py index d01d3172..d4de2d0c 100755 --- a/setup.py +++ b/setup.py @@ -10,6 +10,5 @@ setup (name = "uncompyle2", author = "Mysterie", author_email = "kajusska@gmail.com", url = "http://github.com/Mysterie/uncompyle2", - packages=['uncompyle2', 'uncompyle2.opcode'], - scripts=['scripts/uncompyle2'], + packages=['uncompyle2', 'uncompyle2.opcode'] ) diff --git a/test/test_loops.py b/test/test_loops.py index 462ef419..a5992317 100755 --- a/test/test_loops.py +++ b/test/test_loops.py @@ -15,7 +15,6 @@ for i in range(10): else: print 'Else' - i = 0 while i < 10: i = i+1 @@ -45,3 +44,15 @@ for x, y in [(1,2),(3,4)]: for x in (1, 2, 3): if x == 1: print x + +i = 0 +while i < 10: + i+=1 + for x in (1,2,3): + for y in (1,2,3): + if x == y and x == 1: + while i < 10: + print x + break + + diff --git a/test/test_tuples.py b/test/test_tuples.py index 82579985..56c31911 100755 --- a/test/test_tuples.py +++ b/test/test_tuples.py @@ -17,4 +17,4 @@ del x[1,2,3] x=[1,2,3] b=(1 for i in x if i) -b=(e for i in range(4) if i == 2 for j in range(7) if i + i % 2 == 0) \ No newline at end of file +b=(e for i in range(4) if i == 2) \ No newline at end of file diff --git a/test_one b/test_one deleted file mode 100755 index 6c62ff3e..00000000 --- a/test_one +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh - -file=$1 -shift -options=$@ - -BASEDIR=test/bytecode_2.6 -#BASEDIR=test/bytecode_2.0 -#BASEDIR=test/bytecode_2.1 -#BASEDIR=test/bytecode_2.2 - -if [ `dirname $file` == '.' ] ; then - file=$BASEDIR/test_$file.pyc -fi - -python2.7 -u ./scripts/uncompyle $options $file 2>&1 |less diff --git a/test_pythonlib b/test_pythonlib.py similarity index 59% rename from test_pythonlib rename to test_pythonlib.py index 367b16e3..baed1a35 100755 --- a/test_pythonlib +++ b/test_pythonlib.py @@ -1,6 +1,6 @@ -#!/usr/bin/env python -# emacs-mode: -*-python-*- -""" +#!/usr/bin/env python2.7 + +''' test_pythonlib -- uncompyle and verify Python libraries Usage-Examples: @@ -17,16 +17,17 @@ Step 1) Edit this file and add a new entry to 'test_options', eg. Step 2: Run the test: test_pythonlib --mylib # decompile 'mylib' test_pythonlib --mylib --verify # decompile verify 'mylib' -""" +''' -from uncompyle import main, verify +from uncompyle2 import main, verify +import getopt, sys import os, time, shutil from fnmatch import fnmatch #----- configure this for your needs +lib_prefix = ['.', '/usr/lib/', '/usr/local/lib/'] target_base = '/tmp/py-dis/' -lib_prefix = '/usr/lib' PYC = ('*.pyc', ) PYO = ('*.pyo', ) @@ -34,21 +35,22 @@ PYOC = ('*.pyc', '*.pyo') test_options = { # name: (src_basedir, pattern, output_base_suffix) - 'test': ('./test', PYOC, 'test'), - '1.5': (os.path.join(lib_prefix, 'python1.5'), PYC, 'python-lib1.5'), - '1.6': (os.path.join(lib_prefix, 'python1.6'), PYC, 'python-lib1.6'), - '2.0': (os.path.join(lib_prefix, 'python2.0'), PYC, 'python-lib2.0'), - '2.1': (os.path.join(lib_prefix, 'python2.1'), PYC, 'python-lib2.1'), - '2.2': (os.path.join(lib_prefix, 'python2.2'), PYC, 'python-lib2.2'), - '2.5': (os.path.join(lib_prefix, 'python2.5'), PYC, 'python-lib2.5'), - '2.6': (os.path.join(lib_prefix, 'python2.6'), PYC, 'python-lib2.6'), - '2.7': (os.path.join(lib_prefix, 'python2.7'), PYC, 'python-lib2.7') - } + 'test': ['test', PYC, 'test'], + '2.5': ['python2.5', PYC, 'python-lib2.5'], + '2.6': ['python2.6', PYC, 'python-lib2.6'], + '2.7': ['python2.7', PYC, 'python-lib2.7'] +} #----- -def do_tests(src_dir, patterns, target_dir, start_with=None, do_verify=0): +def help(): + print 'Usage-Examples:' + print 'test_pythonlib --all # decompile all tests (suite + libs)' + print 'test_pythonlib --all --verify # decomyile all tests and verify results' + print 'test_pythonlib --test # decompile only the testsuite' + print 'test_pythonlib --2.2 --verify # decompile and verify python lib 2.2' +def do_tests(src_dir, patterns, target_dir, start_with=None, do_verify=0): def visitor(files, dirname, names): files.extend( [os.path.normpath(os.path.join(dirname, n)) @@ -72,20 +74,25 @@ def do_tests(src_dir, patterns, target_dir, start_with=None, do_verify=0): pass print time.ctime() - main(src_dir, target_dir, files, [], do_verify=do_verify) - print time.ctime() + print 'Working directory: ', src_dir + try: + main(src_dir, target_dir, files, [], do_verify=do_verify) + except (KeyboardInterrupt, OSError): + print + exit(1) if __name__ == '__main__': - import getopt, sys - do_verify = 0 test_dirs = [] + checked_dirs = [] start_with = None test_options_keys = test_options.keys(); test_options_keys.sort() opts, args = getopt.getopt(sys.argv[1:], '', ['start-with=', 'verify', 'all', ] \ + test_options_keys ) + if not opts: + help() for opt, val in opts: if opt == '--verify': do_verify = 1 @@ -96,23 +103,20 @@ if __name__ == '__main__': elif opt == '--all': for val in test_options_keys: test_dirs.append(test_options[val]) - - for src_dir, pattern, target_dir in test_dirs: - if os.path.exists(src_dir): - target_dir = os.path.join(target_base, target_dir) - if os.path.exists(target_dir): - shutil.rmtree(target_dir, ignore_errors=1) - do_tests(src_dir, pattern, target_dir, start_with, do_verify) else: - print '### skipping', src_dir + help() + + for src_dir, pattern, target_dir in test_dirs: + for libpath in lib_prefix: + testpath = os.path.join(libpath, src_dir) + testlibfile = "%s/%s" % (testpath, 'os.py') + testfile = "%s/%s" % (testpath, 'test_empty.py') + if os.path.exists(testlibfile) or os.path.exists(testfile): + src_dir = testpath + checked_dirs.append([src_dir, pattern, target_dir]) -# python 1.5: - -# test/re_tests memory error -# test/test_b1 memory error - -# Verification notes: -# - xdrlib fails verification due the same lambda used twice -# (verification is successfull when using original .pyo as -# input) -# + for src_dir, pattern, target_dir in checked_dirs: + target_dir = os.path.join(target_base, target_dir) + if os.path.exists(target_dir): + shutil.rmtree(target_dir, ignore_errors=1) + do_tests(src_dir, pattern, target_dir, start_with, do_verify) diff --git a/scripts/uncompyle2 b/uncompyle2.py similarity index 99% rename from scripts/uncompyle2 rename to uncompyle2.py index 26067d23..6e83c2e5 100755 --- a/scripts/uncompyle2 +++ b/uncompyle2.py @@ -3,7 +3,7 @@ # # Copyright (c) 2000-2002 by hartmut Goebel # -""" +''' Usage: uncompyle [OPTIONS]... [ FILE | DIR]... Examples: @@ -38,7 +38,7 @@ Extensions of generated files: '.dis' successfully decompiled (and verified if --verify) '.dis_unverified' successfully decompile but --verify failed '.nodis' uncompyle failed (contact author for enhancement) -""" +''' Usage_short = \ "decomyple [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py deleted file mode 100755 index 50d12b9e..00000000 --- a/uncompyle2/Scanner.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 2005 by Dan Pascu -# -# See main module for license. -# - -__all__ = ['Token', 'Scanner', 'getscanner'] - -import types -import disas as dis -from collections import namedtuple -from array import array -from operator import itemgetter - -class Token: - """ - Class representing a byte-code token. - - A byte-code token is equivalent to the contents of one line - as output by dis.dis(). - """ - def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=False): - self.type = intern(type_) - self.attr = attr - self.pattr = pattr - self.offset = offset - self.linestart = linestart - - def __cmp__(self, o): - if isinstance(o, Token): - # both are tokens: compare type and pattr - return cmp(self.type, o.type) or cmp(self.pattr, o.pattr) - else: - return cmp(self.type, o) - - def __repr__(self): return str(self.type) - def __str__(self): - pattr = self.pattr - if self.linestart: - return '\n%s\t%-17s %r' % (self.offset, self.type, pattr) - else: - return '%s\t%-17s %r' % (self.offset, self.type, pattr) - - def __hash__(self): return hash(self.type) - def __getitem__(self, i): raise IndexError - - -class Code: - """ - Class for representing code-objects. - - This is similar to the original code object, but additionally - the diassembled code is stored in the attribute '_tokens'. - """ - def __init__(self, co, scanner, classname=None): - for i in dir(co): - if i.startswith('co_'): - setattr(self, i, getattr(co, i)) - self._tokens, self._customize = scanner.disassemble(co, classname) diff --git a/uncompyle2/__init__.py b/uncompyle2/__init__.py index 5d1ed81e..1dbd2f20 100755 --- a/uncompyle2/__init__.py +++ b/uncompyle2/__init__.py @@ -1,39 +1,40 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000 by hartmut Goebel -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# See the file 'CHANGES' for a list of changes -# -# NB. This is not a masterpiece of software, but became more like a hack. -# Probably a complete rewrite would be sensefull. hG/2000-12-27 -# +''' + Copyright (c) 1999 John Aycock + Copyright (c) 2000 by hartmut Goebel + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + See the file 'CHANGES' for a list of changes + + NB. This is not a masterpiece of software, but became more like a hack. + Probably a complete rewrite would be sensefull. hG/2000-12-27 +''' import sys, types, os -import Walker, verify, magics +import walker, verify, magics sys.setrecursionlimit(5000) -__all__ = ['uncompyle_file', 'uncompyle_file', 'main'] +__all__ = ['uncompyle_file', 'main'] def _load_file(filename): - """ + ''' load a Python source file and compile it to byte-code _load_module(filename: string): code_object @@ -43,26 +44,27 @@ def _load_file(filename): code_object: code_object compiled from this source code This function does NOT write any file! - """ + ''' fp = open(filename, 'rb') source = fp.read()+'\n' try: co = compile(source, filename, 'exec') except SyntaxError: - print >> sys.stderr, '>>Syntax error in', filename + print >> sys.stderr, '>>Syntax error in', filename, '\n' raise fp.close() return co def _load_module(filename): - """ + ''' load a module without importing it _load_module(filename: string): code_object filename: name of file containing Python byte-code object (normally a .pyc) code_object: code_object from this file - """ + ''' + import magics, marshal fp = open(filename, 'rb') magic = fp.read(4) @@ -79,33 +81,35 @@ def _load_module(filename): return version, co def uncompyle(version, co, out=None, showasm=0, showast=0): - """ + ''' diassembles a given code block 'co' - """ + ''' assert type(co) == types.CodeType # store final output stream for case of error __real_out = out or sys.stdout if co.co_filename: print >>__real_out, '#Embedded file name: %s' % co.co_filename - # diff scanner + # diff scanner if version == 2.7: - import Scanner27 as scan + import scanner27 as scan + scanner = scan.Scanner27() elif version == 2.6: - import Scanner26 as scan + import scanner26 as scan + scanner = scan.Scanner26() elif version == 2.5: - import Scanner25 as scan - - scanner = scan.Scanner(version) + import scanner25 as scan + scanner = scan.Scanner25() + scanner.setShowAsm(showasm, out) tokens, customize = scanner.disassemble(co) #sys.exit(0) # Build AST from disassembly. - walker = Walker.Walker(out, scanner, showast=showast) + walk = walker.Walker(out, scanner, showast=showast) try: - ast = walker.build_ast(tokens, customize) - except Walker.ParserError, e : # parser failed, dump disassembly + ast = walk.build_ast(tokens, customize) + except walker.ParserError, e : # parser failed, dump disassembly print >>__real_out, e raise @@ -114,20 +118,20 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): # convert leading '__doc__ = "..." into doc string assert ast == 'stmts' try: - if ast[0][0] == Walker.ASSIGN_DOC_STRING(co.co_consts[0]): - walker.print_docstring('', co.co_consts[0]) + if ast[0][0] == walker.ASSIGN_DOC_STRING(co.co_consts[0]): + walk.print_docstring('', co.co_consts[0]) del ast[0] - if ast[-1] == Walker.RETURN_NONE: + if ast[-1] == walker.RETURN_NONE: ast.pop() # remove last node #todo: if empty, add 'pass' except: pass - walker.mod_globs = Walker.find_globals(ast, set()) - walker.gen_source(ast, customize) - for g in walker.mod_globs: - walker.write('global %s ## Warning: Unused global\n' % g) - if walker.ERROR: - raise walker.ERROR + walk.mod_globs = walker.find_globals(ast, set()) + walk.gen_source(ast, customize) + for g in walk.mod_globs: + walk.write('global %s ## Warning: Unused global' % g) + if walk.ERROR: + raise walk.ERROR def uncompyle_file(filename, outstream=None, showasm=0, showast=0): """ @@ -137,7 +141,7 @@ def uncompyle_file(filename, outstream=None, showasm=0, showast=0): uncompyle(version, co, outstream, showasm, showast) co = None -#---- main ------- +# ---- main ---- if sys.platform.startswith('linux') and os.uname()[2][:2] == '2.': def __memUsage(): @@ -151,7 +155,7 @@ else: def main(in_base, out_base, files, codes, outfile=None, showasm=0, showast=0, do_verify=0): - """ + ''' in_base base directory for input files out_base base directory for output files (ignored when files list of filenames to be uncompyled (relative to src_base) @@ -161,11 +165,12 @@ def main(in_base, out_base, files, codes, outfile=None, - outfile= (out_base is ignored) - files below out_base out_base=... - stdout out_base=None, outfile=None - """ + ''' def _get_outstream(outfile): dir = os.path.dirname(outfile) failed_file = outfile + '_failed' - if os.path.exists(failed_file): os.remove(failed_file) + if os.path.exists(failed_file): + os.remove(failed_file) try: os.makedirs(dir) except OSError: @@ -173,7 +178,6 @@ def main(in_base, out_base, files, codes, outfile=None, return open(outfile, 'w') of = outfile - tot_files = okay_files = failed_files = verify_failed_files = 0 for code in codes: @@ -203,30 +207,39 @@ def main(in_base, out_base, files, codes, outfile=None, if outfile: outstream.close() os.remove(outfile) + sys.stderr.write("\nLast file: %s " % (infile)) raise except: failed_files += 1 - sys.stderr.write("\n# Can't uncompyle %s\n" % infile) if outfile: outstream.close() os.rename(outfile, outfile + '_failed') - import traceback - traceback.print_exc() + else: + sys.stderr.write("\n# Can't uncompyle %s\n" % infile) + import traceback + traceback.print_exc() #raise - else: # uncompyle successfull + else: # uncompyle successfull if outfile: outstream.close() if do_verify: try: verify.compare_code_with_srcfile(infile, outfile) - print '\n# okay decompyling', infile, __memUsage() + if not outfile: print '\n# okay decompyling', infile, __memUsage() okay_files += 1 except verify.VerifyCmpError, e: verify_failed_files += 1 os.rename(outfile, outfile + '_unverified') - print >>sys.stderr, "### Error Verifiying", file - print >>sys.stderr, e + if not outfile: + print >>sys.stderr, "### Error Verifiying", file + print >>sys.stderr, e else: okay_files += 1 - print '\n# okay decompyling', infile, __memUsage() + if not outfile: print '\n# okay decompyling', infile, __memUsage() + if outfile: + sys.stdout.write("decompiled %i files: %i okay, %i failed, %i verify failed\r" % (tot_files, okay_files, failed_files, verify_failed_files)) + sys.stdout.flush() + if outfile: + sys.stdout.write("\n") + sys.stdout.flush() return (tot_files, okay_files, failed_files, verify_failed_files) diff --git a/uncompyle2/disas.py b/uncompyle2/disas.py index e732f6d3..4884bb32 100755 --- a/uncompyle2/disas.py +++ b/uncompyle2/disas.py @@ -139,7 +139,7 @@ def disassemble_string(code, lasti=-1, varnames=None, names=None, print '(' + cmp_op[oparg] + ')', print -disco = disassemble # XXX For backwards compatibility +disco = disassemble # XXX For backwards compatibility def findlabels(code): """Detect all offsets in a byte code which are jump targets. @@ -189,28 +189,6 @@ def findlinestarts(code): if lineno != lastlineno: yield (addr, lineno) -def setVersion(version): - if version == 2.7: - import uncompyle2.opcode.opcode_27 as opcodyn - elif version == 2.6: - import uncompyle2.opcode.opcode_26 as opcodyn - elif version == 2.5: - import uncompyle2.opcode.opcode_25 as opcodyn - - globals().update({'cmp_op': opcodyn.cmp_op}) - globals().update({'hasconst': opcodyn.hasconst}) - globals().update({'hasname': opcodyn.hasname}) - globals().update({'hasjrel': opcodyn.hasjrel}) - globals().update({'hasjabs': opcodyn.hasjabs}) - globals().update({'haslocal': opcodyn.haslocal}) - globals().update({'hascompare': opcodyn.hascompare}) - globals().update({'hasfree': opcodyn.hasfree}) - globals().update({'opname': opcodyn.opname}) - globals().update({'opmap': opcodyn.opmap}) - globals().update({'HAVE_ARGUMENT': opcodyn.HAVE_ARGUMENT}) - globals().update({'EXTENDED_ARG': opcodyn.EXTENDED_ARG}) - - def _test(): """Simple test program to disassemble a file.""" if sys.argv[1:]: diff --git a/uncompyle2/opcode/opcode_25.py b/uncompyle2/opcode/opcode_25.py index 699ecd91..94a668a7 100755 --- a/uncompyle2/opcode/opcode_25.py +++ b/uncompyle2/opcode/opcode_25.py @@ -1,12 +1,7 @@ - -""" +''' opcode module - potentially shared between dis and other modules which operate on bytecodes (e.g. peephole optimizers). -""" - -__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", - "haslocal", "hascompare", "hasfree", "opname", "opmap", - "HAVE_ARGUMENT", "EXTENDED_ARG"] +''' cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', 'is not', 'exception match', 'BAD') @@ -18,6 +13,7 @@ hasjabs = [] haslocal = [] hascompare = [] hasfree = [] +PJIF = PJIT = JA = JF = 0 opmap = {} opname = [''] * 256 @@ -27,6 +23,7 @@ del op def def_op(name, op): opname[op] = name opmap[name] = op + globals().update({name: op}) def name_op(name, op): def_op(name, op) @@ -40,6 +37,14 @@ def jabs_op(name, op): def_op(name, op) hasjabs.append(op) +def updateGlobal(): + globals().update({'PJIF': opmap['JUMP_IF_FALSE']}) + globals().update({'PJIT': opmap['JUMP_IF_TRUE']}) + globals().update({'JA': opmap['JUMP_ABSOLUTE']}) + globals().update({'JF': opmap['JUMP_FORWARD']}) + globals().update({k.replace('+','_'):v for (k,v) in opmap.items()}) + globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)}) + # Instruction opcodes for compiled code # Blank lines correspond to available opcodes @@ -182,4 +187,5 @@ def_op('CALL_FUNCTION_VAR_KW', 142) # 113 # #args + (#kwargs << 8) def_op('EXTENDED_ARG', 143) # 114 EXTENDED_ARG = 143 # 114 +updateGlobal() del def_op, name_op, jrel_op, jabs_op diff --git a/uncompyle2/opcode/opcode_26.py b/uncompyle2/opcode/opcode_26.py index 99856f43..250f8eda 100755 --- a/uncompyle2/opcode/opcode_26.py +++ b/uncompyle2/opcode/opcode_26.py @@ -1,12 +1,7 @@ - -""" +''' opcode module - potentially shared between dis and other modules which operate on bytecodes (e.g. peephole optimizers). -""" - -__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", - "haslocal", "hascompare", "hasfree", "opname", "opmap", - "HAVE_ARGUMENT", "EXTENDED_ARG"] +''' cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', 'is not', 'exception match', 'BAD') @@ -18,6 +13,7 @@ hasjabs = [] haslocal = [] hascompare = [] hasfree = [] +PJIF = PJIT = JA = JF = 0 opmap = {} opname = [''] * 256 @@ -27,6 +23,7 @@ del op def def_op(name, op): opname[op] = name opmap[name] = op + globals().update({name: op}) def name_op(name, op): def_op(name, op) @@ -39,6 +36,14 @@ def jrel_op(name, op): def jabs_op(name, op): def_op(name, op) hasjabs.append(op) + +def updateGlobal(): + globals().update({'PJIF': opmap['JUMP_IF_FALSE']}) + globals().update({'PJIT': opmap['JUMP_IF_TRUE']}) + globals().update({'JA': opmap['JUMP_ABSOLUTE']}) + globals().update({'JF': opmap['JUMP_FORWARD']}) + globals().update({k.replace('+','_'):v for (k,v) in opmap.items()}) + globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)}) # Instruction opcodes for compiled code # Blank lines correspond to available opcodes @@ -183,4 +188,5 @@ def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8) def_op('EXTENDED_ARG', 143) EXTENDED_ARG = 143 +updateGlobal() del def_op, name_op, jrel_op, jabs_op diff --git a/uncompyle2/opcode/opcode_27.py b/uncompyle2/opcode/opcode_27.py index e403365b..05b655b2 100755 --- a/uncompyle2/opcode/opcode_27.py +++ b/uncompyle2/opcode/opcode_27.py @@ -1,12 +1,7 @@ - -""" +''' opcode module - potentially shared between dis and other modules which operate on bytecodes (e.g. peephole optimizers). -""" - -__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", - "haslocal", "hascompare", "hasfree", "opname", "opmap", - "HAVE_ARGUMENT", "EXTENDED_ARG"] +''' cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', 'is not', 'exception match', 'BAD') @@ -18,6 +13,7 @@ hasjabs = [] haslocal = [] hascompare = [] hasfree = [] +PJIF = PJIT = JA = JF = 0 opmap = {} opname = [''] * 256 @@ -27,6 +23,7 @@ del op def def_op(name, op): opname[op] = name opmap[name] = op + globals().update({name: op}) def name_op(name, op): def_op(name, op) @@ -40,6 +37,14 @@ def jabs_op(name, op): def_op(name, op) hasjabs.append(op) +def updateGlobal(): + globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']}) + globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']}) + globals().update({'JA': opmap['JUMP_ABSOLUTE']}) + globals().update({'JF': opmap['JUMP_FORWARD']}) + globals().update({k.replace('+','_'):v for (k,v) in opmap.items()}) + globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)}) + # Instruction opcodes for compiled code # Blank lines correspond to available opcodes @@ -189,4 +194,5 @@ EXTENDED_ARG = 145 def_op('SET_ADD', 146) def_op('MAP_ADD', 147) +updateGlobal() del def_op, name_op, jrel_op, jabs_op diff --git a/uncompyle2/Parser.py b/uncompyle2/parser.py similarity index 98% rename from uncompyle2/Parser.py rename to uncompyle2/parser.py index cbbd7dbc..c6f92ca4 100755 --- a/uncompyle2/Parser.py +++ b/uncompyle2/parser.py @@ -1,9 +1,10 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 2005 by Dan Pascu -# -# See main module for license. -# +''' + Copyright (c) 1999 John Aycock + Copyright (c) 2000-2002 by hartmut Goebel + Copyright (c) 2005 by Dan Pascu + + See main module for license. +''' __all__ = ['parse', 'AST', 'ParserError', 'Parser'] @@ -11,7 +12,7 @@ from spark import GenericASTBuilder import string, exceptions, sys from UserList import UserList -from Scanner import Token +from scanner import Token class AST(UserList): def __init__(self, type, kids=[]): @@ -41,7 +42,7 @@ class ParserError(Exception): self.offset = offset def __str__(self): - return "Syntax error at or near `%r' token at offset %s" % \ + return "Syntax error at or near `%r' token at offset %s\n" % \ (self.token, self.offset) @@ -774,5 +775,5 @@ def parse(tokens, customize): raise Exception('unknown customize token %s' % k) p.addRule(rule, nop) ast = p.parse(tokens) -# p.cleanup() +# p.cleanup() return ast diff --git a/uncompyle2/scanner.py b/uncompyle2/scanner.py new file mode 100755 index 00000000..6f172be9 --- /dev/null +++ b/uncompyle2/scanner.py @@ -0,0 +1,274 @@ +# Copyright (c) 1999 John Aycock +# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2005 by Dan Pascu +# +# See main module for license. +# + +__all__ = ['Token', 'Scanner', 'Code'] + +import types +from collections import namedtuple +from array import array +from operator import itemgetter + +from uncompyle2.opcode import opcode_25, opcode_26, opcode_27 + +class Token: + ''' + Class representing a byte-code token. + + A byte-code token is equivalent to the contents of one line + as output by dis.dis(). + ''' + def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=False): + self.type = intern(type_) + self.attr = attr + self.pattr = pattr + self.offset = offset + self.linestart = linestart + + def __cmp__(self, o): + if isinstance(o, Token): + # both are tokens: compare type and pattr + return cmp(self.type, o.type) or cmp(self.pattr, o.pattr) + else: + return cmp(self.type, o) + + def __repr__(self): return str(self.type) + def __str__(self): + pattr = self.pattr + if self.linestart: + return '\n%s\t%-17s %r' % (self.offset, self.type, pattr) + else: + return '%s\t%-17s %r' % (self.offset, self.type, pattr) + + def __hash__(self): return hash(self.type) + def __getitem__(self, i): raise IndexError + +class Code: + ''' + Class for representing code-objects. + + This is similar to the original code object, but additionally + the diassembled code is stored in the attribute '_tokens'. + ''' + def __init__(self, co, scanner, classname=None): + for i in dir(co): + if i.startswith('co_'): + setattr(self, i, getattr(co, i)) + self._tokens, self._customize = scanner.disassemble(co, classname) + +class Scanner(object): + opc = None # opcode module + + def __init__(self, version): + if version == 2.7: + self.opc = opcode_27 + elif version == 2.6: + self.opc = opcode_26 + elif version == 2.5: + self.opc = opcode_25 + + return self.resetTokenClass() + + def setShowAsm(self, showasm, out=None): + self.showasm = showasm + self.out = out + + def setTokenClass(self, tokenClass): + assert type(tokenClass) == types.ClassType + self.Token = tokenClass + return self.Token + + def resetTokenClass(self): + return self.setTokenClass(Token) + + def get_target(self, pos, op=None): + if op is None: + op = self.code[pos] + target = self.get_argument(pos) + if op in self.opc.hasjrel: + target += pos + 3 + return target + + def get_argument(self, pos): + target = self.code[pos+1] + self.code[pos+2] * 256 + return target + + def print_bytecode(self): + for i in self.op_range(0, len(self.code)): + op = self.code[i] + if op in self.opc.hasjabs+self.opc.hasjrel: + dest = self.get_target(i, op) + print '%i\t%s\t%i' % (i, self.opc.opname[op], dest) + else: + print '%i\t%s\t' % (i, self.opc.opname[op]) + + def first_instr(self, start, end, instr, target=None, exact=True): + ''' + Find the first in the block from start to end. + is any python bytecode instruction or a list of opcodes + If is an opcode with a target (like a jump), a target + destination can be specified which must match precisely if exact + is True, or if exact is False, the instruction which has a target + closest to will be returned. + + Return index to it or None if not found. + ''' + code = self.code + assert(start>=0 and end<=len(code)) + + try: None in instr + except: instr = [instr] + + pos = None + distance = len(code) + for i in self.op_range(start, end): + op = code[i] + if op in instr: + if target is None: + return i + dest = self.get_target(i, op) + if dest == target: + return i + elif not exact: + _distance = abs(target - dest) + if _distance < distance: + distance = _distance + pos = i + return pos + + def last_instr(self, start, end, instr, target=None, exact=True): + ''' + Find the last in the block from start to end. + is any python bytecode instruction or a list of opcodes + If is an opcode with a target (like a jump), a target + destination can be specified which must match precisely if exact + is True, or if exact is False, the instruction which has a target + closest to will be returned. + + Return index to it or None if not found. + ''' + + code = self.code + if not (start>=0 and end<=len(code)): + return None + + try: None in instr + except: instr = [instr] + + pos = None + distance = len(code) + for i in self.op_range(start, end): + op = code[i] + if op in instr: + if target is None: + pos = i + else: + dest = self.get_target(i, op) + if dest == target: + distance = 0 + pos = i + elif not exact: + _distance = abs(target - dest) + if _distance <= distance: + distance = _distance + pos = i + return pos + + def all_instr(self, start, end, instr, target=None, include_beyond_target=False): + ''' + Find all in the block from start to end. + is any python bytecode instruction or a list of opcodes + If is an opcode with a target (like a jump), a target + destination can be specified which must match precisely. + + Return a list with indexes to them or [] if none found. + ''' + + code = self.code + assert(start>=0 and end<=len(code)) + + try: None in instr + except: instr = [instr] + + result = [] + for i in self.op_range(start, end): + op = code[i] + if op in instr: + if target is None: + result.append(i) + else: + t = self.get_target(i, op) + if include_beyond_target and t >= target: + result.append(i) + elif t == target: + result.append(i) + return result + + def op_size(self, op): + if op < self.opc.HAVE_ARGUMENT: + return 1 + else: + return 3 + + def op_range(self, start, end): + while start < end: + yield start + start += self.op_size(self.code[start]) + + def remove_mid_line_ifs(self, ifs): + filtered = [] + for i in ifs: + if self.lines[i].l_no == self.lines[i+3].l_no: + if self.code[self.prev[self.lines[i].next]] in (self.opc.PJIT, self.opc.PJIF): + continue + filtered.append(i) + return filtered + + def rem_or(self, start, end, instr, target=None, include_beyond_target=False): + ''' + Find all in the block from start to end. + is any python bytecode instruction or a list of opcodes + If is an opcode with a target (like a jump), a target + destination can be specified which must match precisely. + + Return a list with indexes to them or [] if none found. + ''' + + code = self.code + assert(start>=0 and end<=len(code)) + + try: None in instr + except: instr = [instr] + + result = [] + for i in self.op_range(start, end): + op = code[i] + if op in instr: + if target is None: + result.append(i) + else: + t = self.get_target(i, op) + if include_beyond_target and t >= target: + result.append(i) + elif t == target: + result.append(i) + + pjits = self.all_instr(start, end, self.opc.PJIT) + filtered = [] + for pjit in pjits: + tgt = self.get_target(pjit)-3 + for i in result: + if i <= pjit or i >= tgt: + filtered.append(i) + result = filtered + filtered = [] + return result + + def restrict_to_parent(self, target, parent): + '''Restrict pos to parent boundaries.''' + if not (parent['start'] < target < parent['end']): + target = parent['end'] + return target \ No newline at end of file diff --git a/uncompyle2/Scanner25.py b/uncompyle2/scanner25.py similarity index 77% rename from uncompyle2/Scanner25.py rename to uncompyle2/scanner25.py index 3f22697f..23153f94 100755 --- a/uncompyle2/Scanner25.py +++ b/uncompyle2/scanner25.py @@ -1,54 +1,32 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 2005 by Dan Pascu -# -# See main module for license. -# +''' + Copyright (c) 1999 John Aycock + Copyright (c) 2000-2002 by hartmut Goebel + Copyright (c) 2005 by Dan Pascu -__all__ = ['Token', 'Scanner', 'getscanner'] + See main module for license. +''' import types -import disas as dis from collections import namedtuple from array import array from operator import itemgetter from struct import * -from Scanner import Token, Code -class Scanner: - def __init__(self, version): - self.version = version - self.resetTokenClass() +from uncompyle2.opcode.opcode_25 import * +import disas as dis +import scanner as scan - dis.setVersion(version) - globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT}) - globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()}) - globals().update({'PJIF': dis.opmap['JUMP_IF_FALSE']}) - globals().update({'PJIT': dis.opmap['JUMP_IF_TRUE']}) - globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']}) - globals().update({'JF': dis.opmap['JUMP_FORWARD']}) - - self.JUMP_OPs = map(lambda op: dis.opname[op], - dis.hasjrel + dis.hasjabs) - - def setShowAsm(self, showasm, out=None): - self.showasm = showasm - self.out = out - - def setTokenClass(self, tokenClass): - assert type(tokenClass) == types.ClassType - self.Token = tokenClass - - def resetTokenClass(self): - self.setTokenClass(Token) - +class Scanner25(scan.Scanner): + def __init__(self): + self.Token = scan.Scanner.__init__(self, 2.5) + def disassemble(self, co, classname=None): - """ + ''' Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). - """ + ''' rv = [] customize = {} Token = self.Token # shortcut @@ -93,7 +71,7 @@ class Scanner: delta = 0 self.restructCode(toDel) for x in toDel: - if self.code[x-delta] >= dis.HAVE_ARGUMENT: + if self.code[x-delta] >= HAVE_ARGUMENT: self.code.pop(x-delta) self.code.pop(x-delta) self.code.pop(x-delta) @@ -154,7 +132,7 @@ class Scanner: extended_arg = 0 for offset in self.op_range(0, n): op = self.code[offset] - opname = dis.opname[op] + op_name = opname[op] oparg = None; pattr = None if offset in cf: @@ -166,23 +144,23 @@ class Scanner: if op >= HAVE_ARGUMENT: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 - if op == dis.EXTENDED_ARG: + if op == EXTENDED_ARG: raise 'TODO' extended_arg = oparg * 65536L continue - if op in dis.hasconst: + if op in hasconst: const = co.co_consts[oparg] if type(const) == types.CodeType: oparg = const if const.co_name == '': - assert opname == 'LOAD_CONST' - opname = 'LOAD_LAMBDA' + assert op_name == 'LOAD_CONST' + op_name = 'LOAD_LAMBDA' elif const.co_name == '': - opname = 'LOAD_GENEXPR' + op_name = 'LOAD_GENEXPR' elif const.co_name == '': - opname = 'LOAD_DICTCOMP' + op_name = 'LOAD_DICTCOMP' elif const.co_name == '': - opname = 'LOAD_SETCOMP' + op_name = 'LOAD_SETCOMP' # verify uses 'pattr' for comparism, since 'attr' # now holds Code(const) and thus can not be used # for comparism (todo: think about changing this) @@ -191,21 +169,21 @@ class Scanner: pattr = '' else: pattr = const - elif op in dis.hasname: + elif op in hasname: pattr = names[oparg] - elif op in dis.hasjrel: + elif op in hasjrel: pattr = repr(offset + 3 + oparg) - elif op in dis.hasjabs: + elif op in hasjabs: pattr = repr(oparg) - elif op in dis.haslocal: + elif op in haslocal: pattr = varnames[oparg] - elif op in dis.hascompare: - pattr = dis.cmp_op[oparg] - elif op in dis.hasfree: + elif op in hascompare: + pattr = cmp_op[oparg] + elif op in hasfree: pattr = free[oparg] if offset in self.toChange: if self.code[offset] == JA and self.code[oparg] == WITH_CLEANUP: - opname = 'SETUP_WITH' + op_name = 'SETUP_WITH' cf[oparg] = cf.get(oparg, []) + [offset] if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE, UNPACK_SEQUENCE, @@ -220,30 +198,30 @@ class Scanner: self.code[offset-3] == LOAD_CLOSURE: continue else: - opname = '%s_%d' % (opname, oparg) + op_name = '%s_%d' % (op_name, oparg) if op != BUILD_SLICE: - customize[opname] = oparg + customize[op_name] = oparg elif op == JA: target = self.get_target(offset) if target < offset: if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \ and offset not in self.not_continue: - opname = 'CONTINUE' + op_name = 'CONTINUE' else: - opname = 'JUMP_BACK' + op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: try: if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE': - opname = 'LOAD_ASSERT' + op_name = 'LOAD_ASSERT' except AttributeError: pass elif op == RETURN_VALUE: if offset in self.return_end_ifs: - opname = 'RETURN_END_IF' + op_name = 'RETURN_END_IF' if offset not in replace: - rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) + rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets)) else: rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) @@ -255,9 +233,9 @@ class Scanner: return rv, customize def getOpcodeToDel(self, i): - """ + ''' check validity of the opcode at position I and return a list of opcode to delete - """ + ''' opcode = self.code[i] opsize = self.op_size(opcode) if opcode == EXTENDED_ARG: @@ -380,10 +358,11 @@ class Scanner: return None def restructRelativeJump(self): - """ + ''' change relative JUMP_IF_FALSE/TRUE to absolut jump and remap the target of PJIF/PJIT - """ + ''' + for i in self.op_range(0, len(self.code)): if(self.code[i] in (PJIF,PJIT)): target = self.get_argument(i) @@ -398,9 +377,9 @@ class Scanner: self.restructJump(i, target) def restructCode(self, listDel): - """ + ''' restruct linestarts and jump destination after removing bad opcode - """ + ''' result = list() for block in self.linestarts: @@ -413,21 +392,24 @@ class Scanner: result.append((block[0]+startBlock, block[1])) self.linestarts = result - for change in self.toChange: + for index in xrange(len(self.toChange)): + change = self.toChange[index] + delta = 0 for toDel in listDel: if change > toDel: - self.toChange[self.toChange.index(change)] -= self.op_size(self.code[toDel]) + delta += self.op_size(self.code[toDel]) else: break + self.toChange[index] -= delta for jmp in self.op_range(0, len(self.code)): op = self.code[jmp] - if op in dis.hasjrel+dis.hasjabs: # jmp + if op in hasjrel+hasjabs: # jmp offset = 0 jmpTarget = self.get_target(jmp) for toDel in listDel: if toDel < jmpTarget: - if op in dis.hasjabs: + if op in hasjabs: offset-=self.op_size(self.code[toDel]) elif jmp < toDel: offset-=self.op_size(self.code[toDel]) @@ -436,7 +418,7 @@ class Scanner: self.restructJump(jmp, self.get_target(jmp)+offset) def restructJump(self, pos, newTarget): - if not (self.code[pos] in dis.hasjabs+dis.hasjrel): + if not (self.code[pos] in hasjabs+hasjrel): raise 'Can t change this argument. Opcode is not a jump' if newTarget > 0xFFFF: raise 'TODO' @@ -447,140 +429,6 @@ class Scanner: self.code[pos+2] = (target >> 8) & 0xFF self.code[pos+1] = target & 0xFF - def get_target(self, pos, op=None): - if op is None: - op = self.code[pos] - target = self.get_argument(pos) - if op in dis.hasjrel: - target += pos + 3 - return target - - def get_argument(self, pos): - target = self.code[pos+1] + self.code[pos+2] * 256 - return target - - def print_bytecode(self): - for i in self.op_range(0, len(self.code)): - op = self.code[i] - if op in dis.hasjabs+dis.hasjrel: - dest = self.get_target(i, op) - print '%i\t%s\t%i' % (i, dis.opname[op], dest) - else: - print '%i\t%s\t' % (i, dis.opname[op]) - - def first_instr(self, start, end, instr, target=None, exact=True): - """ - Find the first in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely if exact - is True, or if exact is False, the instruction which has a target - closest to will be returned. - - Return index to it or None if not found. - """ - code = self.code - assert(start>=0 and end<=len(code)) - - try: None in instr - except: instr = [instr] - - pos = None - distance = len(code) - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - return i - dest = self.get_target(i, op) - if dest == target: - return i - elif not exact: - _distance = abs(target - dest) - if _distance < distance: - distance = _distance - pos = i - return pos - - def last_instr(self, start, end, instr, target=None, exact=True): - """ - Find the last in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely if exact - is True, or if exact is False, the instruction which has a target - closest to will be returned. - - Return index to it or None if not found. - """ - - code = self.code - if not (start>=0 and end<=len(code)): - return None - - try: None in instr - except: instr = [instr] - - pos = None - distance = len(code) - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - pos = i - else: - dest = self.get_target(i, op) - if dest == target: - distance = 0 - pos = i - elif not exact: - _distance = abs(target - dest) - if _distance <= distance: - distance = _distance - pos = i - return pos - - def all_instr(self, start, end, instr, target=None, include_beyond_target=False): - """ - Find all in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely. - - Return a list with indexes to them or [] if none found. - """ - - code = self.code - assert(start>=0 and end<=len(code)) - - try: None in instr - except: instr = [instr] - - result = [] - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - result.append(i) - else: - t = self.get_target(i, op) - if include_beyond_target and t >= target: - result.append(i) - elif t == target: - result.append(i) - return result - - def op_size(self, op): - if op < HAVE_ARGUMENT: - return 1 - else: - return 3 - - def op_range(self, start, end): - while start < end: - yield start - start += self.op_size(self.code[start]) - def build_stmt_indices(self): code = self.code start = 0; @@ -659,61 +507,11 @@ class Scanner: i = s slist += [len(code)] * (len(code)-len(slist)) - def remove_mid_line_ifs(self, ifs): - filtered = [] - for i in ifs: - if self.lines[i].l_no == self.lines[i+3].l_no: - if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF): - continue - filtered.append(i) - return filtered - - - def rem_or(self, start, end, instr, target=None, include_beyond_target=False): - """ - Find all in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely. - - Return a list with indexes to them or [] if none found. - """ - - code = self.code - assert(start>=0 and end<=len(code)) - - try: None in instr - except: instr = [instr] - - result = [] - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - result.append(i) - else: - t = self.get_target(i, op) - if include_beyond_target and t >= target: - result.append(i) - elif t == target: - result.append(i) - - pjits = self.all_instr(start, end, PJIT) - filtered = [] - for pjit in pjits: - tgt = self.get_target(pjit)-3 - for i in result: - if i <= pjit or i >= tgt: - filtered.append(i) - result = filtered - filtered = [] - return result - def next_except_jump(self, start): - """ + ''' Return the next jump that was generated by an except SomeException: construct in a try...except...else clause or None if not found. - """ + ''' except_match = self.first_instr(start, self.lines[start].next, (PJIF)) if except_match: jmp = self.prev[self.get_target(except_match)] @@ -734,18 +532,12 @@ class Scanner: elif op in (SETUP_EXCEPT, SETUP_FINALLY): count_SETUP_ += 1 #return self.lines[start].next - - def restrict_to_parent(self, target, parent): - """Restrict pos to parent boundaries.""" - if not (parent['start'] < target < parent['end']): - target = parent['end'] - return target def detect_structure(self, pos, op=None): - """ + ''' Detect type of block structures and their boundaries to fix optimizied jumps in python2.3+ - """ + ''' # TODO: check the struct boundaries more precisely -Dan @@ -848,7 +640,7 @@ class Scanner: ## Add the except blocks i = end - while self.code[i] != END_FINALLY: + while i < len(self.code) and self.code[i] != END_FINALLY: jmp = self.next_except_jump(i) if jmp == None: # check i = self.next_stmt[i] @@ -863,7 +655,7 @@ class Scanner: end_else = self.get_target(jmp) if self.code[jmp] == JF: #self.fixed_jumps[i] = jmp - self.fixed_jumps[jmp] = -1 + self.fixed_jumps[jmp] = -1 self.structs.append({'type': 'except', 'start': i, 'end': jmp}) @@ -989,17 +781,14 @@ class Scanner: # self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) def find_jump_targets(self, code): - """ + ''' Detect all offsets in a byte code which are jump targets. Return the list of offsets. This procedure is modelled after dis.findlables(), but here for each target the number of jumps are counted. - """ - - hasjrel = dis.hasjrel - hasjabs = dis.hasjabs + ''' n = len(code) self.structs = [{'type': 'root', @@ -1035,4 +824,3 @@ class Scanner: label = self.fixed_jumps[i] targets[label] = targets.get(label, []) + [i] return targets - diff --git a/uncompyle2/Scanner26.py b/uncompyle2/scanner26.py similarity index 77% rename from uncompyle2/Scanner26.py rename to uncompyle2/scanner26.py index a9a31585..75046b78 100755 --- a/uncompyle2/Scanner26.py +++ b/uncompyle2/scanner26.py @@ -1,54 +1,33 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 2005 by Dan Pascu -# -# See main module for license. -# +''' + Copyright (c) 1999 John Aycock + Copyright (c) 2000-2002 by hartmut Goebel + Copyright (c) 2005 by Dan Pascu -__all__ = ['Token', 'Scanner', 'getscanner'] + See main module for license. +''' import types -import disas as dis from collections import namedtuple from array import array from operator import itemgetter from struct import * -from Scanner import Token, Code -class Scanner: - def __init__(self, version): - self.version = version - self.resetTokenClass() +from uncompyle2.opcode.opcode_26 import * +import disas as dis +import scanner as scan - dis.setVersion(version) - globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT}) - globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()}) - globals().update({'PJIF': dis.opmap['JUMP_IF_FALSE']}) - globals().update({'PJIT': dis.opmap['JUMP_IF_TRUE']}) - globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']}) - globals().update({'JF': dis.opmap['JUMP_FORWARD']}) - - self.JUMP_OPs = map(lambda op: dis.opname[op], - dis.hasjrel + dis.hasjabs) - - def setShowAsm(self, showasm, out=None): - self.showasm = showasm - self.out = out - - def setTokenClass(self, tokenClass): - assert type(tokenClass) == types.ClassType - self.Token = tokenClass - - def resetTokenClass(self): - self.setTokenClass(Token) +class Scanner26(scan.Scanner): + def __init__(self): + self.Token = scan.Scanner.__init__(self, 2.6) def disassemble(self, co, classname=None): - """ + ''' Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). - """ + ''' + rv = [] customize = {} Token = self.Token # shortcut @@ -59,7 +38,6 @@ class Scanner: self.prev = [0] # change jump struct self.restructRelativeJump() - # class and names if classname: classname = '_' + classname.lstrip('_') + '__' @@ -76,7 +54,7 @@ class Scanner: names = co.co_names varnames = co.co_varnames self.names = names - + # add instruction to remonde in "toDel" list toDel = [] # add instruction to change in "toChange" list @@ -91,7 +69,7 @@ class Scanner: delta = 0 self.restructCode(toDel) for x in toDel: - if self.code[x-delta] >= dis.HAVE_ARGUMENT: + if self.code[x-delta] >= HAVE_ARGUMENT: self.code.pop(x-delta) self.code.pop(x-delta) self.code.pop(x-delta) @@ -111,7 +89,7 @@ class Scanner: j = 0 linestarts = self.linestarts - + self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) linestartoffsets = {a for (a, _) in linestarts} @@ -154,7 +132,7 @@ class Scanner: extended_arg = 0 for offset in self.op_range(0, n): op = self.code[offset] - opname = dis.opname[op] + op_name = opname[op] oparg = None; pattr = None if offset in cf: @@ -166,23 +144,23 @@ class Scanner: if op >= HAVE_ARGUMENT: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 - if op == dis.EXTENDED_ARG: + if op == EXTENDED_ARG: raise 'TODO' extended_arg = oparg * 65536L continue - if op in dis.hasconst: + if op in hasconst: const = co.co_consts[oparg] if type(const) == types.CodeType: oparg = const if const.co_name == '': - assert opname == 'LOAD_CONST' - opname = 'LOAD_LAMBDA' + assert op_name == 'LOAD_CONST' + op_name = 'LOAD_LAMBDA' elif const.co_name == '': - opname = 'LOAD_GENEXPR' + op_name = 'LOAD_GENEXPR' elif const.co_name == '': - opname = 'LOAD_DICTCOMP' + op_name = 'LOAD_DICTCOMP' elif const.co_name == '': - opname = 'LOAD_SETCOMP' + op_name = 'LOAD_SETCOMP' # verify uses 'pattr' for comparism, since 'attr' # now holds Code(const) and thus can not be used # for comparism (todo: think about changing this) @@ -191,21 +169,21 @@ class Scanner: pattr = '' else: pattr = const - elif op in dis.hasname: + elif op in hasname: pattr = names[oparg] - elif op in dis.hasjrel: + elif op in hasjrel: pattr = repr(offset + 3 + oparg) - elif op in dis.hasjabs: + elif op in hasjabs: pattr = repr(oparg) - elif op in dis.haslocal: + elif op in haslocal: pattr = varnames[oparg] - elif op in dis.hascompare: - pattr = dis.cmp_op[oparg] - elif op in dis.hasfree: + elif op in hascompare: + pattr = cmp_op[oparg] + elif op in hasfree: pattr = free[oparg] if offset in self.toChange: if self.code[offset] == JA and self.code[oparg] == WITH_CLEANUP: - opname = 'SETUP_WITH' + op_name = 'SETUP_WITH' cf[oparg] = cf.get(oparg, []) + [offset] if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE, UNPACK_SEQUENCE, @@ -220,30 +198,30 @@ class Scanner: self.code[offset-3] == LOAD_CLOSURE: continue else: - opname = '%s_%d' % (opname, oparg) + op_name = '%s_%d' % (op_name, oparg) if op != BUILD_SLICE: - customize[opname] = oparg + customize[op_name] = oparg elif op == JA: target = self.get_target(offset) if target < offset: if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \ and offset not in self.not_continue: - opname = 'CONTINUE' + op_name = 'CONTINUE' else: - opname = 'JUMP_BACK' + op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: try: if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE': - opname = 'LOAD_ASSERT' + op_name = 'LOAD_ASSERT' except AttributeError: pass elif op == RETURN_VALUE: if offset in self.return_end_ifs: - opname = 'RETURN_END_IF' + op_name = 'RETURN_END_IF' if offset not in replace: - rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) + rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets)) else: rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) @@ -255,9 +233,9 @@ class Scanner: return rv, customize def getOpcodeToDel(self, i): - """ + ''' check validity of the opcode at position I and return a list of opcode to delete - """ + ''' opcode = self.code[i] opsize = self.op_size(opcode) @@ -276,19 +254,21 @@ class Scanner: pass else: toDel += [i+opsize] - # conditional tuple + # conditional tuple (not optimal at all, no good solution...) if self.code[i] == JA and self.code[i+opsize] == POP_TOP \ and self.code[i+opsize+1] == JA and self.code[i+opsize+4] == POP_BLOCK: jmpabs1target = self.get_target(i) jmpabs2target = self.get_target(i+opsize+1) - if jmpabs1target == jmpabs2target and self.code[jmpabs1target] == FOR_ITER: + if jmpabs1target == jmpabs2target and self.code[jmpabs1target] == FOR_ITER \ + and self.code[jmpabs1target-1] != GET_ITER: destFor = self.get_target(jmpabs1target) if destFor == i+opsize+4: setupLoop = self.last_instr(0, jmpabs1target, SETUP_LOOP) standarFor = self.last_instr(setupLoop, jmpabs1target, GET_ITER) - if standarFor == None: + if standarFor == None: self.restructJump(jmpabs1target, destFor+self.op_size(POP_BLOCK)) toDel += [setupLoop, i+opsize+1, i+opsize+4] + if len(toDel) > 0: return toDel return None @@ -377,10 +357,10 @@ class Scanner: return None def restructRelativeJump(self): - """ + ''' change relative JUMP_IF_FALSE/TRUE to absolut jump and remap the target of PJIF/PJIT - """ + ''' for i in self.op_range(0, len(self.code)): if(self.code[i] in (PJIF,PJIT)): target = self.get_argument(i) @@ -395,9 +375,9 @@ class Scanner: self.restructJump(i, target) def restructCode(self, listDel): - """ + ''' restruct linestarts and jump destination after removing a POP_TOP - """ + ''' result = list() for block in self.linestarts: startBlock = 0 @@ -409,21 +389,24 @@ class Scanner: result.append((block[0]+startBlock, block[1])) self.linestarts = result - for change in self.toChange: + for index in xrange(len(self.toChange)): + change = self.toChange[index] + delta = 0 for toDel in listDel: if change > toDel: - self.toChange[self.toChange.index(change)] -= self.op_size(self.code[toDel]) + delta += self.op_size(self.code[toDel]) else: break + self.toChange[index] -= delta for jmp in self.op_range(0, len(self.code)): op = self.code[jmp] - if op in dis.hasjrel+dis.hasjabs: # jmp + if op in hasjrel+hasjabs: # jmp offset = 0 jmpTarget = self.get_target(jmp) for toDel in listDel: if toDel < jmpTarget: - if op in dis.hasjabs: + if op in hasjabs: offset-=self.op_size(self.code[toDel]) elif jmp < toDel: offset-=self.op_size(self.code[toDel]) @@ -432,7 +415,7 @@ class Scanner: self.restructJump(jmp, self.get_target(jmp)+offset) def restructJump(self, pos, newTarget): - if not (self.code[pos] in dis.hasjabs+dis.hasjrel): + if not (self.code[pos] in hasjabs+hasjrel): raise 'Can t change this argument. Opcode is not a jump' if newTarget > 0xFFFF: raise 'TODO' @@ -442,140 +425,6 @@ class Scanner: raise 'TODO' self.code[pos+2] = (target >> 8) & 0xFF self.code[pos+1] = target & 0xFF - - def get_target(self, pos, op=None): - if op is None: - op = self.code[pos] - target = self.get_argument(pos) - if op in dis.hasjrel: - target += pos + 3 - return target - - def get_argument(self, pos): - target = self.code[pos+1] + self.code[pos+2] * 256 - return target - - def print_bytecode(self): - for i in self.op_range(0, len(self.code)): - op = self.code[i] - if op in dis.hasjabs+dis.hasjrel: - dest = self.get_target(i, op) - print '%i\t%s\t%i' % (i, dis.opname[op], dest) - else: - print '%i\t%s\t' % (i, dis.opname[op]) - - def first_instr(self, start, end, instr, target=None, exact=True): - """ - Find the first in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely if exact - is True, or if exact is False, the instruction which has a target - closest to will be returned. - - Return index to it or None if not found. - """ - code = self.code - assert(start>=0 and end<=len(code)) - - try: None in instr - except: instr = [instr] - - pos = None - distance = len(code) - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - return i - dest = self.get_target(i, op) - if dest == target: - return i - elif not exact: - _distance = abs(target - dest) - if _distance < distance: - distance = _distance - pos = i - return pos - - def last_instr(self, start, end, instr, target=None, exact=True): - """ - Find the last in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely if exact - is True, or if exact is False, the instruction which has a target - closest to will be returned. - - Return index to it or None if not found. - """ - - code = self.code - if not (start>=0 and end<=len(code)): - return None - - try: None in instr - except: instr = [instr] - - pos = None - distance = len(code) - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - pos = i - else: - dest = self.get_target(i, op) - if dest == target: - distance = 0 - pos = i - elif not exact: - _distance = abs(target - dest) - if _distance <= distance: - distance = _distance - pos = i - return pos - - def all_instr(self, start, end, instr, target=None, include_beyond_target=False): - """ - Find all in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely. - - Return a list with indexes to them or [] if none found. - """ - - code = self.code - assert(start>=0 and end<=len(code)) - - try: None in instr - except: instr = [instr] - - result = [] - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - result.append(i) - else: - t = self.get_target(i, op) - if include_beyond_target and t >= target: - result.append(i) - elif t == target: - result.append(i) - return result - - def op_size(self, op): - if op < HAVE_ARGUMENT: - return 1 - else: - return 3 - - def op_range(self, start, end): - while start < end: - yield start - start += self.op_size(self.code[start]) def build_stmt_indices(self): code = self.code @@ -654,62 +503,12 @@ class Scanner: slist += [s] * (s-i) i = s slist += [len(code)] * (len(code)-len(slist)) - - def remove_mid_line_ifs(self, ifs): - filtered = [] - for i in ifs: - if self.lines[i].l_no == self.lines[i+3].l_no: - if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF): - continue - filtered.append(i) - return filtered - - - def rem_or(self, start, end, instr, target=None, include_beyond_target=False): - """ - Find all in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely. - - Return a list with indexes to them or [] if none found. - """ - - code = self.code - assert(start>=0 and end<=len(code)) - - try: None in instr - except: instr = [instr] - - result = [] - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - result.append(i) - else: - t = self.get_target(i, op) - if include_beyond_target and t >= target: - result.append(i) - elif t == target: - result.append(i) - - pjits = self.all_instr(start, end, PJIT) - filtered = [] - for pjit in pjits: - tgt = self.get_target(pjit)-3 - for i in result: - if i <= pjit or i >= tgt: - filtered.append(i) - result = filtered - filtered = [] - return result def next_except_jump(self, start): - """ + ''' Return the next jump that was generated by an except SomeException: construct in a try...except...else clause or None if not found. - """ + ''' except_match = self.first_instr(start, self.lines[start].next, (PJIF)) if except_match: jmp = self.prev[self.get_target(except_match)] @@ -731,17 +530,11 @@ class Scanner: count_SETUP_ += 1 #return self.lines[start].next - def restrict_to_parent(self, target, parent): - """Restrict pos to parent boundaries.""" - if not (parent['start'] < target < parent['end']): - target = parent['end'] - return target - def detect_structure(self, pos, op=None): - """ + ''' Detect type of block structures and their boundaries to fix optimizied jumps in python2.3+ - """ + ''' # TODO: check the struct boundaries more precisely -Dan @@ -844,7 +637,7 @@ class Scanner: ## Add the except blocks i = end - while self.code[i] != END_FINALLY: + while i < len(self.code) and self.code[i] != END_FINALLY: jmp = self.next_except_jump(i) if jmp == None: # check i = self.next_stmt[i] @@ -985,17 +778,14 @@ class Scanner: # self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) def find_jump_targets(self, code): - """ + ''' Detect all offsets in a byte code which are jump targets. Return the list of offsets. This procedure is modelled after dis.findlables(), but here for each target the number of jumps are counted. - """ - - hasjrel = dis.hasjrel - hasjabs = dis.hasjabs + ''' n = len(code) self.structs = [{'type': 'root', diff --git a/uncompyle2/Scanner27.py b/uncompyle2/scanner27.py similarity index 74% rename from uncompyle2/Scanner27.py rename to uncompyle2/scanner27.py index 4561c156..02f2c3ac 100755 --- a/uncompyle2/Scanner27.py +++ b/uncompyle2/scanner27.py @@ -1,53 +1,31 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 2005 by Dan Pascu -# -# See main module for license. -# +''' + Copyright (c) 1999 John Aycock + Copyright (c) 2000-2002 by hartmut Goebel + Copyright (c) 2005 by Dan Pascu -__all__ = ['Token', 'Scanner', 'getscanner'] + See main module for license. +''' import types -import disas as dis from collections import namedtuple from array import array from operator import itemgetter -from Scanner import Token, Code -class Scanner: - def __init__(self, version): - self.version = version - self.resetTokenClass() +from uncompyle2.opcode.opcode_27 import * +import disas as dis +import scanner as scan - dis.setVersion(version) - globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT}) - globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()}) - globals().update({'PJIF': dis.opmap['POP_JUMP_IF_FALSE']}) - globals().update({'PJIT': dis.opmap['POP_JUMP_IF_TRUE']}) - globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']}) - globals().update({'JF': dis.opmap['JUMP_FORWARD']}) - - self.JUMP_OPs = map(lambda op: dis.opname[op], - dis.hasjrel + dis.hasjabs) - - def setShowAsm(self, showasm, out=None): - self.showasm = showasm - self.out = out - - def setTokenClass(self, tokenClass): - assert type(tokenClass) == types.ClassType - self.Token = tokenClass - - def resetTokenClass(self): - self.setTokenClass(Token) +class Scanner27(scan.Scanner): + def __init__(self): + self.Token = scan.Scanner.__init__(self, 2.6) def disassemble(self, co, classname=None): - """ + ''' Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). - """ + ''' rv = [] customize = {} Token = self.Token # shortcut @@ -129,27 +107,27 @@ class Scanner: k += 1 op = code[offset] - opname = dis.opname[op] + op_name = opname[op] oparg = None; pattr = None if op >= HAVE_ARGUMENT: oparg = code[offset+1] + code[offset+2] * 256 + extended_arg extended_arg = 0 - if op == dis.EXTENDED_ARG: + if op == EXTENDED_ARG: extended_arg = oparg * 65536L continue - if op in dis.hasconst: + if op in hasconst: const = co.co_consts[oparg] if type(const) == types.CodeType: oparg = const if const.co_name == '': - assert opname == 'LOAD_CONST' - opname = 'LOAD_LAMBDA' + assert op_name == 'LOAD_CONST' + op_name = 'LOAD_LAMBDA' elif const.co_name == '': - opname = 'LOAD_GENEXPR' + op_name = 'LOAD_GENEXPR' elif const.co_name == '': - opname = 'LOAD_DICTCOMP' + op_name = 'LOAD_DICTCOMP' elif const.co_name == '': - opname = 'LOAD_SETCOMP' + op_name = 'LOAD_SETCOMP' # verify uses 'pattr' for comparism, since 'attr' # now holds Code(const) and thus can not be used # for comparism (todo: think about changing this) @@ -158,17 +136,17 @@ class Scanner: pattr = '' else: pattr = const - elif op in dis.hasname: + elif op in hasname: pattr = names[oparg] - elif op in dis.hasjrel: + elif op in hasjrel: pattr = repr(offset + 3 + oparg) - elif op in dis.hasjabs: + elif op in hasjabs: pattr = repr(oparg) - elif op in dis.haslocal: + elif op in haslocal: pattr = varnames[oparg] - elif op in dis.hascompare: - pattr = dis.cmp_op[oparg] - elif op in dis.hasfree: + elif op in hascompare: + pattr = cmp_op[oparg] + elif op in hasfree: pattr = free[oparg] if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, @@ -184,30 +162,30 @@ class Scanner: code[offset-3] == LOAD_CLOSURE: continue else: - opname = '%s_%d' % (opname, oparg) + op_name = '%s_%d' % (op_name, oparg) if op != BUILD_SLICE: - customize[opname] = oparg + customize[op_name] = oparg elif op == JA: target = self.get_target(offset) if target < offset: if offset in self.stmts and code[offset+3] not in (END_FINALLY, POP_BLOCK) \ and offset not in self.not_continue: - opname = 'CONTINUE' + op_name = 'CONTINUE' else: - opname = 'JUMP_BACK' + op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: try: if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE': - opname = 'LOAD_ASSERT' + op_name = 'LOAD_ASSERT' except AttributeError: pass elif op == RETURN_VALUE: if offset in self.return_end_ifs: - opname = 'RETURN_END_IF' + op_name = 'RETURN_END_IF' if offset not in replace: - rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) + rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets)) else: rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) @@ -218,127 +196,6 @@ class Scanner: print >>out return rv, customize - def get_target(self, pos, op=None): - if op is None: - op = self.code[pos] - target = self.code[pos+1] + self.code[pos+2] * 256 - if op in dis.hasjrel: - target += pos + 3 - return target - - def first_instr(self, start, end, instr, target=None, exact=True): - """ - Find the first in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely if exact - is True, or if exact is False, the instruction which has a target - closest to will be returned. - - Return index to it or None if not found. - """ - code = self.code - assert(start>=0 and end<=len(code)) - - try: None in instr - except: instr = [instr] - - pos = None - distance = len(code) - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - return i - dest = self.get_target(i, op) - if dest == target: - return i - elif not exact: - _distance = abs(target - dest) - if _distance < distance: - distance = _distance - pos = i - return pos - - def last_instr(self, start, end, instr, target=None, exact=True): - """ - Find the last in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely if exact - is True, or if exact is False, the instruction which has a target - closest to will be returned. - - Return index to it or None if not found. - """ - - code = self.code - if not (start>=0 and end<=len(code)): - return None - - try: None in instr - except: instr = [instr] - - pos = None - distance = len(code) - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - pos = i - else: - dest = self.get_target(i, op) - if dest == target: - distance = 0 - pos = i - elif not exact: - _distance = abs(target - dest) - if _distance <= distance: - distance = _distance - pos = i - return pos - - def all_instr(self, start, end, instr, target=None, include_beyond_target=False): - """ - Find all in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely. - - Return a list with indexes to them or [] if none found. - """ - - code = self.code - assert(start>=0 and end<=len(code)) - - try: None in instr - except: instr = [instr] - - result = [] - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - result.append(i) - else: - t = self.get_target(i, op) - if include_beyond_target and t >= target: - result.append(i) - elif t == target: - result.append(i) - return result - - def op_size(self, op): - if op < HAVE_ARGUMENT: - return 1 - else: - return 3 - - def op_range(self, start, end): - while start < end: - yield start - start += self.op_size(self.code[start]) - def build_stmt_indices(self): code = self.code start = 0; @@ -428,52 +285,11 @@ class Scanner: filtered.append(i) return filtered - - def rem_or(self, start, end, instr, target=None, include_beyond_target=False): - """ - Find all in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely. - - Return a list with indexes to them or [] if none found. - """ - - code = self.code - assert(start>=0 and end<=len(code)) - - try: None in instr - except: instr = [instr] - - result = [] - for i in self.op_range(start, end): - op = code[i] - if op in instr: - if target is None: - result.append(i) - else: - t = self.get_target(i, op) - if include_beyond_target and t >= target: - result.append(i) - elif t == target: - result.append(i) - - pjits = self.all_instr(start, end, PJIT) - filtered = [] - for pjit in pjits: - tgt = self.get_target(pjit)-3 - for i in result: - if i <= pjit or i >= tgt: - filtered.append(i) - result = filtered - filtered = [] - return result - def next_except_jump(self, start): - """ + ''' Return the next jump that was generated by an except SomeException: construct in a try...except...else clause or None if not found. - """ + ''' except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE) if except_match: @@ -493,17 +309,11 @@ class Scanner: elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): count_SETUP_ += 1 - def restrict_to_parent(self, target, parent): - """Restrict pos to parent boundaries.""" - if not (parent['start'] < target < parent['end']): - target = parent['end'] - return target - def detect_structure(self, pos, op=None): - """ + ''' Detect type of block structures and their boundaries to fix optimizied jumps in python2.3+ - """ + ''' # TODO: check the struct boundaries more precisely -Dan @@ -751,17 +561,14 @@ class Scanner: def find_jump_targets(self, code): - """ + ''' Detect all offsets in a byte code which are jump targets. Return the list of offsets. This procedure is modelled after dis.findlables(), but here for each target the number of jumps are counted. - """ - - hasjrel = dis.hasjrel - hasjabs = dis.hasjabs + ''' n = len(code) self.structs = [{'type': 'root', diff --git a/uncompyle2/spark.py b/uncompyle2/spark.py index ac630f71..ea06c1c9 100755 --- a/uncompyle2/spark.py +++ b/uncompyle2/spark.py @@ -1,23 +1,25 @@ -# Copyright (c) 1998-2002 John Aycock -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +''' +Copyright (c) 1998-2002 John Aycock + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +''' __version__ = 'SPARK-0.7 (pre-alpha-7) uncompyle trim' @@ -107,7 +109,7 @@ class GenericParser: self.rule2func = {} self.rule2name = {} self.collectRules() - start = D['rules'][self._START][0][1][1] # Blech. + start = D['rules'][self._START][0][1][1] # Blech. self.augment(start) D['rule2func'] = self.rule2func D['makeSet'] = self.makeSet_fast @@ -267,12 +269,12 @@ class GenericParser: sets.append([]) if sets[i] == []: - break + break self.makeSet(tokens[i], sets, i) else: sets.append([]) self.makeSet(None, sets, len(tokens)) - + finalitem = (self.finalState(tokens), 0) if finalitem not in sets[-2]: if len(tokens) > 0: diff --git a/uncompyle2/verify.py b/uncompyle2/verify.py index 44bef997..6af4feff 100755 --- a/uncompyle2/verify.py +++ b/uncompyle2/verify.py @@ -7,7 +7,7 @@ import types import operator import dis -import uncompyle2, Scanner +import uncompyle2, scanner BIN_OP_FUNCS = { 'BINARY_POWER': operator.pow, @@ -284,7 +284,7 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): getattr(code_obj1,member), getattr(code_obj2,member)) -class Token(Scanner.Token): +class Token(scanner.Token): """Token class with changed semantics for 'cmp()'.""" def __cmp__(self, o): diff --git a/uncompyle2/Walker.py b/uncompyle2/walker.py similarity index 96% rename from uncompyle2/Walker.py rename to uncompyle2/walker.py index 03ba9353..3a6f8229 100755 --- a/uncompyle2/Walker.py +++ b/uncompyle2/walker.py @@ -1,51 +1,52 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 2005 by Dan Pascu -# -# See main module for license. -# -# -# Decompilation (walking AST) -# -# All table-driven. Step 1 determines a table (T) and a path to a -# table key (K) from the node type (N) (other nodes are shown as O): -# -# N N N&K -# / | ... \ / | ... \ / | ... \ -# O O O O O K O O O -# | -# K -# -# MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT) -# -# The default is a direct mapping. The key K is then extracted from the -# subtree and used to find a table entry T[K], if any. The result is a -# format string and arguments (a la printf()) for the formatting engine. -# Escapes in the format string are: -# -# %c evaluate N[A] recursively* -# %C evaluate N[A[0]]..N[A[1]-1] recursively, separate by A[2]* -# %, print ',' if last %C only printed one item (for tuples--unused) -# %| tab to current indentation level -# %+ increase current indentation level -# %- decrease current indentation level -# %{...} evaluate ... in context of N -# %% literal '%' -# -# * indicates an argument (A) required. -# -# The '%' may optionally be followed by a number (C) in square brackets, which -# makes the engine walk down to N[C] before evaluating the escape code. -# +''' + Copyright (c) 1999 John Aycock + Copyright (c) 2000-2002 by hartmut Goebel + Copyright (c) 2005 by Dan Pascu + + See main module for license. + + + Decompilation (walking AST) + + All table-driven. Step 1 determines a table (T) and a path to a + table key (K) from the node type (N) (other nodes are shown as O): + + N N N&K + / | ... \ / | ... \ / | ... \ + O O O O O K O O O + | + K + + MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT) + + The default is a direct mapping. The key K is then extracted from the + subtree and used to find a table entry T[K], if any. The result is a + format string and arguments (a la printf()) for the formatting engine. + Escapes in the format string are: + + %c evaluate N[A] recursively* + %C evaluate N[A[0]]..N[A[1]-1] recursively, separate by A[2]* + %, print ',' if last %C only printed one item (for tuples--unused) + %| tab to current indentation level + %+ increase current indentation level + %- decrease current indentation level + %{...} evaluate ... in context of N + %% literal '%' + + * indicates an argument (A) required. + + The '%' may optionally be followed by a number (C) in square brackets, which + makes the engine walk down to N[C] before evaluating the escape code. +''' import sys, re, cStringIO from types import ListType, TupleType, DictType, \ EllipsisType, IntType, CodeType from spark import GenericASTTraversal -import Parser -from Parser import AST -from Scanner import Token, Code +import parser +from parser import AST +from scanner import Token, Code minint = -sys.maxint-1 @@ -385,7 +386,7 @@ escape = re.compile(r''' ( [{] (?P [^}]* ) [}] )) ''', re.VERBOSE) -class ParserError(Parser.ParserError): +class ParserError(parser.ParserError): def __init__(self, error, tokens): self.error = error # previous exception self.tokens = tokens @@ -1387,17 +1388,17 @@ class Walker(GenericASTTraversal, object): def build_ast(self, tokens, customize, isLambda=0, noneInNames=False): assert type(tokens) == ListType #assert isinstance(tokens[0], Token) - + if isLambda: tokens.append(Token('LAMBDA_MARKER')) try: - ast = Parser.parse(tokens, customize) - except Parser.ParserError, e: + ast = parser.parse(tokens, customize) + except parser.ParserError, e: raise ParserError(e, tokens) if self.showast: self.print_(repr(ast)) return ast - + if len(tokens) > 2 or len(tokens) == 2 and not noneInNames: if tokens[-1] == Token('RETURN_VALUE'): if tokens[-2] == Token('LOAD_CONST'): @@ -1406,14 +1407,14 @@ class Walker(GenericASTTraversal, object): tokens.append(Token('RETURN_LAST')) if len(tokens) == 0: return PASS - + # Build AST from disassembly. try: - ast = Parser.parse(tokens, customize) - except Parser.ParserError, e: + ast = parser.parse(tokens, customize) + except parser.ParserError, e: raise ParserError(e, tokens) if self.showast: self.print_(repr(ast)) - + return ast