diff --git a/.travis.yml b/.travis.yml index 24f55551..612a719e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,11 @@ sudo: false python: - '2.7' + - '3.3' - '3.4' +install: +- pip install -r requirements-dev.txt + script: - python ./setup.py develop && COMPILE='--compile' make check diff --git a/Makefile b/Makefile index 5d99c686..5705b9bf 100644 --- a/Makefile +++ b/Makefile @@ -18,8 +18,17 @@ TEST_TYPES=check-long check-short check-2.7 check-3.4 #: Default target - same as "check" all: check -#: Run working tests -check check-3.4 check-2.7: pytest +# Run all tests +check: + @PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \ + $(MAKE) check-$$PYTHON_VERSION + +#: Tests for Python 2.7, 3.3 and 3.4 +check-2.7 check-3.3 check-3.4: pytest + $(MAKE) -C test $@ + +#:Tests for Python 2.6 (doesn't have pytest) +check-2.6: $(MAKE) -C test $@ #: Run py.test tests diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..cb64874e --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +pytest +flake8 diff --git a/test/Makefile b/test/Makefile index 850e5e2a..568b8ffe 100644 --- a/test/Makefile +++ b/test/Makefile @@ -25,6 +25,10 @@ check-2.6: check-bytecode-2.5 check-bytecode-2.7 #: Run working tests from Python 2.7 check-2.7: check-bytecode check-2.7-ok +#: Run working tests from Python 3.3 +check-3.3: check-bytecode check-bytecode-3.3 + $(PYTHON) test_pythonlib.py --bytecode-3.3 + #: Run working tests from Python 3.4 check-3.4: check-bytecode check-bytecode-3.4 $(PYTHON) test_pythonlib.py --bytecode-3.4 @@ -35,7 +39,7 @@ check-disasm: #: Check deparsing bytecode only check-bytecode: - $(PYTHON) test_pythonlib.py --bytecode-2.5 --bytecode-2.7 --bytecode-3.2 + $(PYTHON) test_pythonlib.py --bytecode-2.5 --bytecode-2.7 --bytecode-3.3 #: Check deparsing Python 2.5 check-bytecode-2.5: @@ -49,6 +53,10 @@ check-bytecode-2.7: check-bytecode-3.2: $(PYTHON) test_pythonlib.py --bytecode-3.2 +#: Check deparsing Python 3.3 +check-bytecode-3.3: + $(PYTHON) test_pythonlib.py --bytecode-3.3 + #: Check deparsing Python 3.4 check-bytecode-3.4: $(PYTHON) test_pythonlib.py --bytecode-3.4 diff --git a/test/bytecode_3.3/00_assign.pyc b/test/bytecode_3.3/00_assign.pyc new file mode 100644 index 00000000..5dbd81c7 Binary files /dev/null and b/test/bytecode_3.3/00_assign.pyc differ diff --git a/test/bytecode_3.3/00_import.pyc b/test/bytecode_3.3/00_import.pyc new file mode 100644 index 00000000..78fe8206 Binary files /dev/null and b/test/bytecode_3.3/00_import.pyc differ diff --git a/test/bytecode_3.3/00_pass.pyc b/test/bytecode_3.3/00_pass.pyc new file mode 100644 index 00000000..f6a95572 Binary files /dev/null and b/test/bytecode_3.3/00_pass.pyc differ diff --git a/test/bytecode_3.3/01_boolean.pyc b/test/bytecode_3.3/01_boolean.pyc new file mode 100644 index 00000000..3a6133d9 Binary files /dev/null and b/test/bytecode_3.3/01_boolean.pyc differ diff --git a/test/bytecode_3.3/01_positional.pyc b/test/bytecode_3.3/01_positional.pyc new file mode 100644 index 00000000..227e6c26 Binary files /dev/null and b/test/bytecode_3.3/01_positional.pyc differ diff --git a/test/bytecode_3.3/02_def.pyc b/test/bytecode_3.3/02_def.pyc new file mode 100644 index 00000000..8d84c270 Binary files /dev/null and b/test/bytecode_3.3/02_def.pyc differ diff --git a/test/bytecode_3.3/02_slice.pyc b/test/bytecode_3.3/02_slice.pyc new file mode 100644 index 00000000..0d1a3d43 Binary files /dev/null and b/test/bytecode_3.3/02_slice.pyc differ diff --git a/test/bytecode_3.3/05_if.pyc b/test/bytecode_3.3/05_if.pyc new file mode 100644 index 00000000..11eeb242 Binary files /dev/null and b/test/bytecode_3.3/05_if.pyc differ diff --git a/test/bytecode_3.3/05_ifelse.pyc b/test/bytecode_3.3/05_ifelse.pyc new file mode 100644 index 00000000..bde28025 Binary files /dev/null and b/test/bytecode_3.3/05_ifelse.pyc differ diff --git a/test/bytecode_3.3/10_for.pyc b/test/bytecode_3.3/10_for.pyc new file mode 100644 index 00000000..3379248f Binary files /dev/null and b/test/bytecode_3.3/10_for.pyc differ diff --git a/test/bytecode_3.3/10_keyword.pyc b/test/bytecode_3.3/10_keyword.pyc new file mode 100644 index 00000000..77259b55 Binary files /dev/null and b/test/bytecode_3.3/10_keyword.pyc differ diff --git a/test/bytecode_3.3/10_mixed_boolean.pyc b/test/bytecode_3.3/10_mixed_boolean.pyc new file mode 100644 index 00000000..ecd44155 Binary files /dev/null and b/test/bytecode_3.3/10_mixed_boolean.pyc differ diff --git a/test/bytecode_3.3/10_while.pyc b/test/bytecode_3.3/10_while.pyc new file mode 100644 index 00000000..7d2e8fa8 Binary files /dev/null and b/test/bytecode_3.3/10_while.pyc differ diff --git a/test/bytecode_3.3/15_assert.pyc b/test/bytecode_3.3/15_assert.pyc new file mode 100644 index 00000000..7848e4ef Binary files /dev/null and b/test/bytecode_3.3/15_assert.pyc differ diff --git a/test/bytecode_3.3/15_for_if.pyc b/test/bytecode_3.3/15_for_if.pyc new file mode 100644 index 00000000..e9050ae6 Binary files /dev/null and b/test/bytecode_3.3/15_for_if.pyc differ diff --git a/test/bytecompile-tests b/test/bytecompile-tests index 851c473d..3811d94a 100755 --- a/test/bytecompile-tests +++ b/test/bytecompile-tests @@ -55,7 +55,7 @@ tests['2.3'] = tests['2.2'] tests['2.5'] = tests['2.3'] # tests['2.7'] = ['mine'] + tests['2.6'] tests['2.7'] = [ - # 'simple_source/branching/ifelse', + 'simple_source/branching/ifelse', # 'simple_source/branching/if' 'simple_source/misc/assert', 'simple_source/misc/assign', @@ -79,7 +79,7 @@ for root, dirs, basenames in os.walk('simple_source'): simple_source.append(os.path.join(root, basename)[0:-3]) pass -tests['3.4'] = simple_source +tests['3.3'] = tests['3.4'] = simple_source total_tests = len(tests['2.7']) #tests['2.2'].sort(); print tests['2.2'] diff --git a/test/test_pythonlib.py b/test/test_pythonlib.py index b124d2c9..c36f94ef 100755 --- a/test/test_pythonlib.py +++ b/test/test_pythonlib.py @@ -67,6 +67,9 @@ test_options = { 'bytecode-3.2': ['bytecode_3.2', PYC, 'bytecode_3.2', 3.2], + 'bytecode-3.3': + ['bytecode_3.3', PYC, 'bytecode_3.3', 3.3], + 'bytecode-3.4': ['bytecode_3.4', PYC, 'bytecode_3.4', 3.4], diff --git a/uncompyle6/__init__.py b/uncompyle6/__init__.py index 35eb9179..b1c60eaf 100644 --- a/uncompyle6/__init__.py +++ b/uncompyle6/__init__.py @@ -42,8 +42,8 @@ PYTHON_VERSION_STR = "%s.%s" % (sys.version_info[0], sys.version_info[1]) sys.setrecursionlimit(5000) def check_python_version(program): - if not (sys.version_info[0:2] in ((2, 6), (2, 7), (3, 4))): - print('Error: %s requires %s Python 2.6, 2.7 or 3.4' % program, + if not (sys.version_info[0:2] in ((2, 6), (2, 7), (3, 2), (3, 3), (3, 4))): + print('Error: %s requires Python 2.6, 2.7, 3.2, 3.3, or 3.4' % program, file=sys.stderr) sys.exit(-1) return diff --git a/uncompyle6/opcodes/opcode_32.py b/uncompyle6/opcodes/opcode_32.py index 879e815c..1d456e0c 100644 --- a/uncompyle6/opcodes/opcode_32.py +++ b/uncompyle6/opcodes/opcode_32.py @@ -69,8 +69,10 @@ def_op('BINARY_TRUE_DIVIDE', 27) def_op('INPLACE_FLOOR_DIVIDE', 28) def_op('INPLACE_TRUE_DIVIDE', 29) -# Gone from Python 3 are -# Python 2's SLICE+0 .. SLICE+3 +# Gone from Python 3 are Python2's +# SLICE+0 .. SLICE+3 +# STORE_SLICE+0 .. STORE_SLICE+3 +# DELETE_SLICE+0 .. DELETE_SLICE+3 def_op('STORE_MAP', 54) def_op('INPLACE_ADD', 55) @@ -125,6 +127,9 @@ name_op('STORE_ATTR', 95) # Index in name list name_op('DELETE_ATTR', 96) # "" name_op('STORE_GLOBAL', 97) # "" name_op('DELETE_GLOBAL', 98) # "" + +# Python 2's DUP_TOPX is gone + def_op('LOAD_CONST', 100) # Index in const list hasconst.append(100) name_op('LOAD_NAME', 101) # Index in name list diff --git a/uncompyle6/opcodes/opcode_33.py b/uncompyle6/opcodes/opcode_33.py new file mode 100644 index 00000000..879e815c --- /dev/null +++ b/uncompyle6/opcodes/opcode_33.py @@ -0,0 +1,189 @@ + +""" +opcode module - potentially shared between dis and other modules which +operate on bytecodes (e.g. peephole optimizers). +""" + +__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", + "haslocal", "hascompare", "hasfree", "opname", "opmap", + "HAVE_ARGUMENT", "EXTENDED_ARG"] + +cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', + 'is not', 'exception match', 'BAD') + +hasconst = [] +hasname = [] +hasjrel = [] +hasjabs = [] +haslocal = [] +hascompare = [] +hasfree = [] + +opmap = {} +opname = [''] * 256 +for op in range(256): opname[op] = '<%r>' % (op,) +del op + +def def_op(name, op): + opname[op] = name + opmap[name] = op + +def name_op(name, op): + def_op(name, op) + hasname.append(op) + +def jrel_op(name, op): + def_op(name, op) + hasjrel.append(op) + +def jabs_op(name, op): + def_op(name, op) + hasjabs.append(op) + +# Instruction opcodes for compiled code +# Blank lines correspond to available opcodes + +def_op('STOP_CODE', 0) +def_op('POP_TOP', 1) +def_op('ROT_TWO', 2) +def_op('ROT_THREE', 3) +def_op('DUP_TOP', 4) +def_op('DUP_TOP_TWO', 5) + +def_op('NOP', 9) +def_op('UNARY_POSITIVE', 10) +def_op('UNARY_NEGATIVE', 11) +def_op('UNARY_NOT', 12) + +def_op('UNARY_INVERT', 15) + +def_op('BINARY_POWER', 19) +def_op('BINARY_MULTIPLY', 20) + +def_op('BINARY_MODULO', 22) +def_op('BINARY_ADD', 23) +def_op('BINARY_SUBTRACT', 24) +def_op('BINARY_SUBSCR', 25) +def_op('BINARY_FLOOR_DIVIDE', 26) +def_op('BINARY_TRUE_DIVIDE', 27) +def_op('INPLACE_FLOOR_DIVIDE', 28) +def_op('INPLACE_TRUE_DIVIDE', 29) + +# Gone from Python 3 are +# Python 2's SLICE+0 .. SLICE+3 + +def_op('STORE_MAP', 54) +def_op('INPLACE_ADD', 55) +def_op('INPLACE_SUBTRACT', 56) +def_op('INPLACE_MULTIPLY', 57) + +def_op('INPLACE_MODULO', 59) +def_op('STORE_SUBSCR', 60) +def_op('DELETE_SUBSCR', 61) +def_op('BINARY_LSHIFT', 62) +def_op('BINARY_RSHIFT', 63) +def_op('BINARY_AND', 64) +def_op('BINARY_XOR', 65) +def_op('BINARY_OR', 66) +def_op('INPLACE_POWER', 67) +def_op('GET_ITER', 68) +def_op('STORE_LOCALS', 69) + +def_op('PRINT_EXPR', 70) +def_op('LOAD_BUILD_CLASS', 71) + +# Python3 drops/changes: +# def_op('PRINT_ITEM', 71) +# def_op('PRINT_NEWLINE', 72) +# def_op('PRINT_ITEM_TO', 73) +# def_op('PRINT_NEWLINE_TO', 74) + +def_op('INPLACE_LSHIFT', 75) +def_op('INPLACE_RSHIFT', 76) +def_op('INPLACE_AND', 77) +def_op('INPLACE_XOR', 78) +def_op('INPLACE_OR', 79) +def_op('BREAK_LOOP', 80) +def_op('WITH_CLEANUP', 81) + +def_op('RETURN_VALUE', 83) +def_op('IMPORT_STAR', 84) + +def_op('YIELD_VALUE', 86) +def_op('POP_BLOCK', 87) +def_op('END_FINALLY', 88) +def_op('POP_EXCEPT', 89) + +HAVE_ARGUMENT = 90 # Opcodes from here have an argument: + +name_op('STORE_NAME', 90) # Index in name list +name_op('DELETE_NAME', 91) # "" +def_op('UNPACK_SEQUENCE', 92) # Number of tuple items +jrel_op('FOR_ITER', 93) +def_op('UNPACK_EX', 94) +name_op('STORE_ATTR', 95) # Index in name list +name_op('DELETE_ATTR', 96) # "" +name_op('STORE_GLOBAL', 97) # "" +name_op('DELETE_GLOBAL', 98) # "" +def_op('LOAD_CONST', 100) # Index in const list +hasconst.append(100) +name_op('LOAD_NAME', 101) # Index in name list +def_op('BUILD_TUPLE', 102) # Number of tuple items +def_op('BUILD_LIST', 103) # Number of list items +def_op('BUILD_SET', 104) # Number of set items +def_op('BUILD_MAP', 105) # Number of dict entries (upto 255) +name_op('LOAD_ATTR', 106) # Index in name list +def_op('COMPARE_OP', 107) # Comparison operator +hascompare.append(107) +name_op('IMPORT_NAME', 108) # Index in name list +name_op('IMPORT_FROM', 109) # Index in name list + +jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip +jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code +jabs_op('JUMP_IF_TRUE_OR_POP', 112) # "" +jabs_op('JUMP_ABSOLUTE', 113) # "" +jabs_op('POP_JUMP_IF_FALSE', 114) # "" +jabs_op('POP_JUMP_IF_TRUE', 115) # "" + +name_op('LOAD_GLOBAL', 116) # Index in name list + +jabs_op('CONTINUE_LOOP', 119) # Target address +jrel_op('SETUP_LOOP', 120) # Distance to target address +jrel_op('SETUP_EXCEPT', 121) # "" +jrel_op('SETUP_FINALLY', 122) # "" + +def_op('LOAD_FAST', 124) # Local variable number +haslocal.append(124) +def_op('STORE_FAST', 125) # Local variable number +haslocal.append(125) +def_op('DELETE_FAST', 126) # Local variable number +haslocal.append(126) + +def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3) +def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8) +def_op('MAKE_FUNCTION', 132) # Number of args with default values +def_op('BUILD_SLICE', 133) # Number of items +def_op('MAKE_CLOSURE', 134) +def_op('LOAD_CLOSURE', 135) +hasfree.append(135) +def_op('LOAD_DEREF', 136) +hasfree.append(136) +def_op('STORE_DEREF', 137) +hasfree.append(137) +def_op('DELETE_DEREF', 138) +hasfree.append(138) + +def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8) +def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8) +def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8) + +jrel_op('SETUP_WITH', 143) + +def_op('LIST_APPEND', 145) +def_op('SET_ADD', 146) +def_op('MAP_ADD', 147) + +def_op('EXTENDED_ARG', 144) +EXTENDED_ARG = 144 + +del def_op, name_op, jrel_op, jabs_op diff --git a/uncompyle6/opcodes/opcode_34.py b/uncompyle6/opcodes/opcode_34.py index 8c80d200..b54bcf4a 100644 --- a/uncompyle6/opcodes/opcode_34.py +++ b/uncompyle6/opcodes/opcode_34.py @@ -142,6 +142,9 @@ name_op('STORE_ATTR', 95) # Index in name list name_op('DELETE_ATTR', 96) # "" name_op('STORE_GLOBAL', 97) # "" name_op('DELETE_GLOBAL', 98) # "" + +# Python 2's DUP_TOPX is gone + def_op('LOAD_CONST', 100) # Index in const list hasconst.append(100) name_op('LOAD_NAME', 101) # Index in name list diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index ed9dc35d..69535244 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -25,14 +25,11 @@ if PYTHON3: intern = sys.intern L65536 = 65536 - def cmp(a, b): - return (a > b) - (a < b) - def long(l): l else: L65536 = long(65536) # NOQA -from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_34 +from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_33, opcode_34 class Code: @@ -60,6 +57,8 @@ class Scanner(object): self.opc = opcode_25 elif version == 3.2: self.opc = opcode_32 + elif version == 3.3: + self.opc = opcode_33 elif version == 3.4: self.opc = opcode_34 @@ -280,7 +279,7 @@ class Scanner(object): return result def restrict_to_parent(self, target, parent): - '''Restrict pos to parent boundaries.''' + """Restrict target to parent structure boundaries.""" if not (parent['start'] < target < parent['end']): target = parent['end'] return target @@ -302,6 +301,9 @@ def get_scanner(version): elif version == 3.2: import uncompyle6.scanners.scanner32 as scan scanner = scan.Scanner32() + elif version == 3.3: + import uncompyle6.scanners.scanner33 as scan + scanner = scan.Scanner33() elif version == 3.4: import uncompyle6.scanners.scanner34 as scan scanner = scan.Scanner34() diff --git a/uncompyle6/scanners/scanner32.py b/uncompyle6/scanners/scanner32.py index 148b300b..8660c61e 100644 --- a/uncompyle6/scanners/scanner32.py +++ b/uncompyle6/scanners/scanner32.py @@ -1,6 +1,3 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 2005 by Dan Pascu # Copyright (c) 2015 by Rocky Bernstein """ Python 3.2 bytecode scanner/deparser @@ -12,552 +9,19 @@ for later use in deparsing. from __future__ import print_function -import dis, marshal -from collections import namedtuple -from array import array - -from uncompyle6.scanner import Token, L65536 - - -# Get all the opcodes into globals -globals().update(dis.opmap) -from uncompyle6.opcodes.opcode_27 import * +import uncompyle6.scanners.scanner33 as scan33 import uncompyle6.scanner as scan - class Scanner32(scan.Scanner): def __init__(self): scan.Scanner.__init__(self, 3.2) # check - def run(self, bytecode): - code_object = marshal.loads(bytecode) - tokens = self.tokenize(code_object) - return tokens - def disassemble(self, co, classname=None): - """ - Convert code object into a sequence of tokens. + return scan33.Scanner33().disassemble(co, classname) - The below is based on (an older version?) of Python dis.disassemble_bytes(). - """ - # Container for tokens - tokens = [] - customize = {} - self.code = code = array('B', co.co_code) - codelen = len(code) - self.build_lines_data(co) - self.build_prev_op() - - # Get jump targets - # Format: {target offset: [jump offsets]} - jump_targets = self.find_jump_targets() - - # self.lines contains (block,addrLastInstr) - if classname: - classname = '_' + classname.lstrip('_') + '__' - - def unmangle(name): - if name.startswith(classname) and name[-2:] != '__': - return name[len(classname) - 2:] - return name - - # free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] - # names = [ unmangle(name) for name in co.co_names ] - # varnames = [ unmangle(name) for name in co.co_varnames ] - else: - # free = co.co_cellvars + co.co_freevars - # names = co.co_names - # varnames = co.co_varnames - pass - - # Scan for assertions. Later we will - # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those - # assertions - - self.load_asserts = set() - for i in self.op_range(0, codelen): - if self.code[i] == POP_JUMP_IF_TRUE and self.code[i+3] == LOAD_GLOBAL: - if names[self.get_argument(i+3)] == 'AssertionError': - self.load_asserts.add(i+3) - - # FIXME: reinstate code - # cf = self.find_jump_targets(self.code) - # # contains (code, [addrRefToCode]) - # last_stmt = self.next_stmt[0] - # i = self.next_stmt[last_stmt] - # replace = {} - # while i < n-1: - # if self.lines[last_stmt].next > i: - # if self.code[last_stmt] == PRINT_ITEM: - # if self.code[i] == PRINT_ITEM: - # replace[i] = 'PRINT_ITEM_CONT' - # elif self.code[i] == PRINT_NEWLINE: - # replace[i] = 'PRINT_NEWLINE_CONT' - # last_stmt = i - # i = self.next_stmt[i] - - # imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) - # if len(imports) > 1: - # last_import = imports[0] - # for i in imports[1:]: - # if self.lines[last_import].next > i: - # if self.code[last_import] == IMPORT_NAME == self.code[i]: - # replace[i] = 'IMPORT_NAME_CONT' - # last_import = i - - # Initialize extended arg at 0. When extended arg op is encountered, - # variable preserved for next cycle and added as arg for next op - extended_arg = 0 - free = None - for offset in self.op_range(0, codelen): - # Add jump target tokens - if offset in jump_targets: - jump_idx = 0 - for jump_offset in jump_targets[offset]: - tokens.append(Token('COME_FROM', None, repr(jump_offset), - offset='{}_{}'.format(offset, jump_idx))) - jump_idx += 1 - op = code[offset] - op_name = opname[op] - - # Create token and fill all the fields we can - # w/o touching arguments - current_token = Token(dis.opname[op]) - current_token.offset = offset - - if offset in self.linestarts: - current_token.linestart = self.linestarts[offset] - else: - current_token.linestart = None - - if op >= dis.HAVE_ARGUMENT: - # Calculate op's argument value based on its argument and - # preceding extended argument, if any - oparg = code[offset+1] + code[offset+2]*256 + extended_arg - extended_arg = 0 - if op == dis.EXTENDED_ARG: - extended_arg = oparg * L65536 - - # Fill token's attr/pattr fields - current_token.attr = oparg - if op in dis.hasconst: - current_token.pattr = repr(co.co_consts[oparg]) - elif op in dis.hasname: - current_token.pattr = co.co_names[oparg] - elif op in dis.hasjrel: - current_token.pattr = repr(offset + 3 + oparg) - elif op in dis.haslocal: - current_token.pattr = co.co_varnames[oparg] - elif op in dis.hascompare: - current_token.pattr = dis.cmp_op[oparg] - elif op in dis.hasfree: - if free is None: - free = co.co_cellvars + co.co_freevars - current_token.pattr = free[oparg] - if op == JUMP_ABSOLUTE: - current_token.pattr = current_token.attr = oparg - target = self.get_target(offset) - if target < offset: - if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \ - and offset not in self.not_continue: - op_name = 'CONTINUE' - else: - op_name = 'JUMP_BACK' - current_token.type = op_name - - tokens.append(current_token) - return tokens, customize - - def build_lines_data(self, code_obj): - """ - Generate various line-related helper data. - """ - # Offset: lineno pairs, only for offsets which start line. - # Locally we use list for more convenient iteration using indices - linestarts = list(dis.findlinestarts(code_obj)) - self.linestarts = dict(linestarts) - # Plain set with offsets of first ops on line - self.linestart_offsets = {a for (a, _) in linestarts} - # 'List-map' which shows line number of current op and offset of - # first op on following line, given offset of op as index - self.lines = lines = [] - LineTuple = namedtuple('LineTuple', ['l_no', 'next']) - # Iterate through available linestarts, and fill - # the data for all code offsets encountered until - # last linestart offset - _, prev_line_no = linestarts[0] - offset = 0 - for start_offset, line_no in linestarts[1:]: - while offset < start_offset: - lines.append(LineTuple(prev_line_no, start_offset)) - offset += 1 - prev_line_no = line_no - # Fill remaining offsets with reference to last line number - # and code length as start offset of following non-existing line - codelen = len(self.code) - while offset < codelen: - lines.append(LineTuple(prev_line_no, codelen)) - offset += 1 - - def build_prev_op(self): - """ - Compose 'list-map' which allows to jump to previous - op, given offset of current op as index. - """ - code = self.code - codelen = len(code) - self.prev_op = [0] - for offset in self.op_range(0, codelen): - op = code[offset] - for _ in range(self.op_size(op)): - self.prev_op.append(offset) - - def op_size(self, op): - """ - Return size of operator with its arguments - for given opcode . - """ - if op < dis.HAVE_ARGUMENT: - return 1 - else: - return 3 - - def find_jump_targets(self): - """ - Detect all offsets in a byte code which are jump targets. - - Return the list of offsets. - - This procedure is modelled after dis.findlables(), but here - for each target the number of jumps is counted. - """ - code = self.code - codelen = len(code) - self.structs = [{'type': 'root', - 'start': 0, - 'end': codelen-1}] - - # All loop entry points - # self.loops = [] - # Map fixed jumps to their real destination - self.fixed_jumps = {} - self.ignore_if = set() - self.build_statement_indices() - # Containers filled by detect_structure() - self.not_continue = set() - self.return_end_ifs = set() - - targets = {} - for offset in self.op_range(0, codelen): - op = code[offset] - - # Determine structures and fix jumps for 2.3+ - self.detect_structure(offset) - - if op >= dis.HAVE_ARGUMENT: - label = self.fixed_jumps.get(offset) - oparg = code[offset+1] + code[offset+2] * 256 - - if label is None: - if op in dis.hasjrel and op != FOR_ITER: - label = offset + 3 + oparg - elif op in dis.hasjabs: - if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): - if oparg > offset: - label = oparg - - if label is not None and label != -1: - targets[label] = targets.get(label, []) + [offset] - elif op == END_FINALLY and offset in self.fixed_jumps: - label = self.fixed_jumps[offset] - targets[label] = targets.get(label, []) + [offset] - return targets - - def build_statement_indices(self): - code = self.code - start = 0 - end = codelen = len(code) - - statement_opcodes = { - SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP, - SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH, - POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, - STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, - STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, - RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR, - JUMP_ABSOLUTE - } - - statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE), - (POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)] - - designator_ops = { - STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, - STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE - } - - # Compose preliminary list of indices with statements, - # using plain statement opcodes - prelim = self.all_instr(start, end, statement_opcodes) - - # Initialize final container with statements with - # preliminnary data - stmts = self.stmts = set(prelim) - - # Same for opcode sequences - pass_stmts = set() - for sequence in statement_opcode_sequences: - for i in self.op_range(start, end-(len(sequence)+1)): - match = True - for elem in sequence: - if elem != code[i]: - match = False - break - i += self.op_size(code[i]) - - if match is True: - i = self.prev_op[i] - stmts.add(i) - pass_stmts.add(i) - - # Initialize statement list with the full data we've gathered so far - if pass_stmts: - stmt_offset_list = list(stmts) - stmt_offset_list.sort() - else: - stmt_offset_list = prelim - # 'List-map' which contains offset of start of - # next statement, when op offset is passed as index - self.next_stmt = slist = [] - last_stmt_offset = -1 - i = 0 - # Go through all statement offsets - for stmt_offset in stmt_offset_list: - # Process absolute jumps, but do not remove 'pass' statements - # from the set - if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts: - # If absolute jump occurs in forward direction or it takes off from the - # same line as previous statement, this is not a statement - target = self.get_target(stmt_offset) - if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: - stmts.remove(stmt_offset) - continue - # Rewing ops till we encounter non-JA one - j = self.prev_op[stmt_offset] - while code[j] == JUMP_ABSOLUTE: - j = self.prev_op[j] - # If we got here, then it's list comprehension which - # is not a statement too - if code[j] == LIST_APPEND: - stmts.remove(stmt_offset) - continue - # Exclude ROT_TWO + POP_TOP - elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO: - stmts.remove(stmt_offset) - continue - # Exclude FOR_ITER + designators - elif code[stmt_offset] in designator_ops: - j = self.prev_op[stmt_offset] - while code[j] in designator_ops: - j = self.prev_op[j] - if code[j] == FOR_ITER: - stmts.remove(stmt_offset) - continue - # Add to list another list with offset of current statement, - # equal to length of previous statement - slist += [stmt_offset] * (stmt_offset-i) - last_stmt_offset = stmt_offset - i = stmt_offset - # Finish filling the list for last statement - slist += [codelen] * (codelen-len(slist)) - - def get_target(self, offset): - """ - Get target offset for op located at given . - """ - op = self.code[offset] - target = self.code[offset+1] + self.code[offset+2] * 256 - if op in dis.hasjrel: - target += offset + 3 - return target - - def detect_structure(self, offset): - """ - Detect structures and their boundaries to fix optimizied jumps - in python2.3+ - """ - code = self.code - op = code[offset] - # Detect parent structure - parent = self.structs[0] - start = parent['start'] - end = parent['end'] - - # Pick inner-most parent for our offset - for struct in self.structs: - curent_start = struct['start'] - curent_end = struct['end'] - if (curent_start <= offset < curent_end) and (curent_start >= start and curent_end <= end): - start = curent_start - end = curent_end - parent = struct - - if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): - start = offset + self.op_size(op) - target = self.get_target(offset) - rtarget = self.restrict_to_parent(target, parent) - prev_op = self.prev_op - - # Do not let jump to go out of parent struct bounds - if target != rtarget and parent['type'] == 'and/or': - self.fixed_jumps[offset] = rtarget - return - - # Does this jump to right after another cond jump? - # If so, it's part of a larger conditional - if (code[prev_op[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, - POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > offset): - self.fixed_jumps[offset] = prev_op[target] - self.structs.append({'type': 'and/or', - 'start': start, - 'end': prev_op[target]}) - return - # Is it an and inside if block - if op == POP_JUMP_IF_FALSE: - # Search for other POP_JUMP_IF_FALSE targetting the same op, - # in current statement, starting from current offset, and filter - # everything inside inner 'or' jumps and midline ifs - match = self.rem_or(start, self.next_stmt[offset], POP_JUMP_IF_FALSE, target) - match = self.remove_mid_line_ifs(match) - # If we still have any offsets in set, start working on it - if match: - if (code[prev_op[rtarget]] in (JUMP_FORWARD, JUMP_ABSOLUTE) and prev_op[rtarget] not in self.stmts and - self.restrict_to_parent(self.get_target(prev_op[rtarget]), parent) == rtarget): - if (code[prev_op[prev_op[rtarget]]] == JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and - target == self.get_target(prev_op[prev_op[rtarget]]) and - (prev_op[prev_op[rtarget]] not in self.stmts or self.get_target(prev_op[prev_op[rtarget]]) > prev_op[prev_op[rtarget]]) and - 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)))): - pass - elif (code[prev_op[prev_op[rtarget]]] == RETURN_VALUE and self.remove_mid_line_ifs([offset]) and - 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], - (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target))) | - set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], - (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE, JUMP_ABSOLUTE), - prev_op[rtarget], True)))))): - pass - else: - fix = None - jump_ifs = self.all_instr(start, self.next_stmt[offset], POP_JUMP_IF_FALSE) - last_jump_good = True - for j in jump_ifs: - if target == self.get_target(j): - if self.lines[j].next == j + 3 and last_jump_good: - fix = j - break - else: - last_jump_good = False - self.fixed_jumps[offset] = fix or match[-1] - return - else: - self.fixed_jumps[offset] = match[-1] - return - # op == POP_JUMP_IF_TRUE - else: - next = self.next_stmt[offset] - if prev_op[next] == offset: - pass - elif code[next] in (JUMP_FORWARD, JUMP_ABSOLUTE) and target == self.get_target(next): - if code[prev_op[next]] == POP_JUMP_IF_FALSE: - if code[next] == JUMP_FORWARD or target != rtarget or code[prev_op[prev_op[rtarget]]] not in (JUMP_ABSOLUTE, RETURN_VALUE): - self.fixed_jumps[offset] = prev_op[next] - return - elif (code[next] == JUMP_ABSOLUTE and code[target] in (JUMP_ABSOLUTE, JUMP_FORWARD) and - self.get_target(target) == self.get_target(next)): - self.fixed_jumps[offset] = prev_op[next] - return - - # Don't add a struct for a while test, it's already taken care of - if offset in self.ignore_if: - return - - if (code[prev_op[rtarget]] == JUMP_ABSOLUTE and prev_op[rtarget] in self.stmts and - prev_op[rtarget] != offset and prev_op[prev_op[rtarget]] != offset and - not (code[rtarget] == JUMP_ABSOLUTE and code[rtarget+3] == POP_BLOCK and code[prev_op[prev_op[rtarget]]] != JUMP_ABSOLUTE)): - rtarget = prev_op[rtarget] - - # Does the if jump just beyond a jump op, then this is probably an if statement - if code[prev_op[rtarget]] in (JUMP_ABSOLUTE, JUMP_FORWARD): - if_end = self.get_target(prev_op[rtarget]) - - # Is this a loop not an if? - if (if_end < prev_op[rtarget]) and (code[prev_op[if_end]] == SETUP_LOOP): - if(if_end > start): - return - - end = self.restrict_to_parent(if_end, parent) - - self.structs.append({'type': 'if-then', - 'start': start, - 'end': prev_op[rtarget]}) - self.not_continue.add(prev_op[rtarget]) - - if rtarget < end: - self.structs.append({'type': 'if-else', - 'start': rtarget, - 'end': end}) - elif code[prev_op[rtarget]] == RETURN_VALUE: - self.structs.append({'type': 'if-then', - 'start': start, - 'end': rtarget}) - self.return_end_ifs.add(prev_op[rtarget]) - - elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): - target = self.get_target(offset) - if target > offset: - unop_target = self.last_instr(offset, target, JUMP_FORWARD, target) - if unop_target and code[unop_target+3] != ROT_TWO: - self.fixed_jumps[offset] = unop_target - else: - self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) - - def restrict_to_parent(self, target, parent): - """Restrict target to parent structure boundaries.""" - if not (parent['start'] < target < parent['end']): - target = parent['end'] - return target - - def rem_or(self, start, end, instr, target=None, include_beyond_target=False): - """ - Find offsets of all requested between and , - optionally ing specified offset, and return list found - offsets which are not within any POP_JUMP_IF_TRUE jumps. - """ - # Find all offsets of requested instructions - instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target) - # Get all POP_JUMP_IF_TRUE (or) offsets - pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE) - filtered = [] - for pjit_offset in pjit_offsets: - pjit_tgt = self.get_target(pjit_offset) - 3 - for instr_offset in instr_offsets: - if instr_offset <= pjit_offset or instr_offset >= pjit_tgt: - filtered.append(instr_offset) - instr_offsets = filtered - filtered = [] - return instr_offsets - - def remove_mid_line_ifs(self, ifs): - """ - Go through passed offsets, filtering ifs - located somewhere mid-line. - """ - filtered = [] - for if_ in ifs: - # For each offset, if line number of current and next op - # is the same - if self.lines[if_].l_no == self.lines[if_+3].l_no: - # Check if last op on line is PJIT or PJIF, and if it is - skip it - if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE): - continue - filtered.append(if_) - return filtered +if __name__ == "__main__": + co = inspect.currentframe().f_code + tokens, customize = Scanner33().disassemble(co) + for t in tokens: + print(t) + pass diff --git a/uncompyle6/scanners/scanner33.py b/uncompyle6/scanners/scanner33.py new file mode 100644 index 00000000..dcadc364 --- /dev/null +++ b/uncompyle6/scanners/scanner33.py @@ -0,0 +1,608 @@ +# Copyright (c) 2015 by Rocky Bernstein +""" +Python 3.3 bytecode scanner/deparser + +This overlaps Python's 3.3's dis module, but it can be run from +Python 2 and other versions of Python. Also, we save token information +for later use in deparsing. +""" + +from __future__ import print_function + +import dis, inspect, marshal +from collections import namedtuple +from array import array + +from uncompyle6.scanner import Token, L65536 + + +# Get all the opcodes into globals +globals().update(dis.opmap) +from uncompyle6.opcodes.opcode_27 import * +import uncompyle6.scanner as scan + + +class Scanner33(scan.Scanner): + def __init__(self): + scan.Scanner.__init__(self, 3.2) # check + + def run(self, bytecode): + code_object = marshal.loads(bytecode) + tokens = self.tokenize(code_object) + return tokens + + def disassemble(self, co, classname=None): + """ + Convert code object into a sequence of tokens. + + The below is based on (an older version?) of Python dis.disassemble_bytes(). + """ + # Container for tokens + tokens = [] + customize = {} + self.code = code = array('B', co.co_code) + codelen = len(code) + self.build_lines_data(co) + self.build_prev_op() + + # self.lines contains (block,addrLastInstr) + if classname: + classname = '_' + classname.lstrip('_') + '__' + + def unmangle(name): + if name.startswith(classname) and name[-2:] != '__': + return name[len(classname) - 2:] + return name + + free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] + names = [ unmangle(name) for name in co.co_names ] + varnames = [ unmangle(name) for name in co.co_varnames ] + else: + free = co.co_cellvars + co.co_freevars + names = co.co_names + varnames = co.co_varnames + pass + + # Scan for assertions. Later we will + # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those + # assertions + + self.load_asserts = set() + for i in self.op_range(0, codelen): + if self.code[i] == POP_JUMP_IF_TRUE and self.code[i+3] == LOAD_GLOBAL: + if names[self.get_argument(i+3)] == 'AssertionError': + self.load_asserts.add(i+3) + + # Get jump targets + # Format: {target offset: [jump offsets]} + jump_targets = self.find_jump_targets() + + # contains (code, [addrRefToCode]) + last_stmt = self.next_stmt[0] + i = self.next_stmt[last_stmt] + replace = {} + while i < codelen-1: + if self.lines[last_stmt].next > i: + if self.code[last_stmt] == PRINT_ITEM: + if self.code[i] == PRINT_ITEM: + replace[i] = 'PRINT_ITEM_CONT' + elif self.code[i] == PRINT_NEWLINE: + replace[i] = 'PRINT_NEWLINE_CONT' + last_stmt = i + i = self.next_stmt[i] + + imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) + if len(imports) > 1: + last_import = imports[0] + for i in imports[1:]: + if self.lines[last_import].next > i: + if self.code[last_import] == IMPORT_NAME == self.code[i]: + replace[i] = 'IMPORT_NAME_CONT' + last_import = i + + # Initialize extended arg at 0. When extended arg op is encountered, + # variable preserved for next cycle and added as arg for next op + extended_arg = 0 + + for offset in self.op_range(0, codelen): + # Add jump target tokens + if offset in jump_targets: + jump_idx = 0 + for jump_offset in jump_targets[offset]: + tokens.append(Token('COME_FROM', None, repr(jump_offset), + offset='{}_{}'.format(offset, jump_idx))) + jump_idx += 1 + pass + pass + + op = code[offset] + op_name = opname[op] + + oparg = None; pattr = None + + if op >= HAVE_ARGUMENT: + oparg = self.get_argument(offset) + extended_arg + extended_arg = 0 + if op == EXTENDED_ARG: + extended_arg = oparg * scan.L65536 + continue + if op in hasconst: + const = co.co_consts[oparg] + if inspect.iscode(const): + oparg = const + if const.co_name == '': + assert op_name == 'LOAD_CONST' + op_name = 'LOAD_LAMBDA' + elif const.co_name == '': + op_name = 'LOAD_GENEXPR' + elif const.co_name == '': + op_name = 'LOAD_DICTCOMP' + elif const.co_name == '': + op_name = 'LOAD_SETCOMP' + # verify() uses 'pattr' for comparison, since 'attr' + # now holds Code(const) and thus can not be used + # for comparison (todo: think about changing this) + # pattr = 'code_object @ 0x%x %s->%s' %\ + # (id(const), const.co_filename, const.co_name) + pattr = '' + else: + pattr = const + elif op in hasname: + pattr = names[oparg] + elif op in hasjrel: + pattr = repr(offset + 3 + oparg) + elif op in hasjabs: + pattr = repr(oparg) + elif op in haslocal: + pattr = varnames[oparg] + elif op in hascompare: + pattr = cmp_op[oparg] + elif op in hasfree: + pattr = free[oparg] + + if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, + UNPACK_SEQUENCE, + MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, + CALL_FUNCTION_VAR, CALL_FUNCTION_KW, + CALL_FUNCTION_VAR_KW, RAISE_VARARGS + ): + # As of Python 2.5, values loaded via LOAD_CLOSURE are packed into + # a tuple before calling MAKE_CLOSURE. + if (op == BUILD_TUPLE and + self.code[self.prev_op[offset]] == LOAD_CLOSURE): + continue + else: + # CALL_FUNCTION OP renaming is done as a custom rule in parse3 + if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', + 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): + op_name = '%s_%d' % (op_name, oparg) + if op != BUILD_SLICE: + customize[op_name] = oparg + elif op == JUMP_ABSOLUTE: + target = self.get_target(offset) + if target < offset: + if (offset in self.stmts + and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) + and offset not in self.not_continue): + op_name = 'CONTINUE' + else: + op_name = 'JUMP_BACK' + + elif op == LOAD_GLOBAL: + if offset in self.load_asserts: + op_name = 'LOAD_ASSERT' + elif op == RETURN_VALUE: + if offset in self.return_end_ifs: + op_name = 'RETURN_END_IF' + + if offset in self.linestarts: + linestart = self.linestarts[offset] + else: + linestart = None + + if offset not in replace: + tokens.append(Token(op_name, oparg, pattr, offset, linestart)) + else: + tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) + pass + return tokens, customize + + def build_lines_data(self, code_obj): + """ + Generate various line-related helper data. + """ + # Offset: lineno pairs, only for offsets which start line. + # Locally we use list for more convenient iteration using indices + linestarts = list(dis.findlinestarts(code_obj)) + self.linestarts = dict(linestarts) + # Plain set with offsets of first ops on line + self.linestart_offsets = {a for (a, _) in linestarts} + # 'List-map' which shows line number of current op and offset of + # first op on following line, given offset of op as index + self.lines = lines = [] + LineTuple = namedtuple('LineTuple', ['l_no', 'next']) + # Iterate through available linestarts, and fill + # the data for all code offsets encountered until + # last linestart offset + _, prev_line_no = linestarts[0] + offset = 0 + for start_offset, line_no in linestarts[1:]: + while offset < start_offset: + lines.append(LineTuple(prev_line_no, start_offset)) + offset += 1 + prev_line_no = line_no + # Fill remaining offsets with reference to last line number + # and code length as start offset of following non-existing line + codelen = len(self.code) + while offset < codelen: + lines.append(LineTuple(prev_line_no, codelen)) + offset += 1 + + def build_prev_op(self): + """ + Compose 'list-map' which allows to jump to previous + op, given offset of current op as index. + """ + code = self.code + codelen = len(code) + self.prev_op = [0] + for offset in self.op_range(0, codelen): + op = code[offset] + for _ in range(self.op_size(op)): + self.prev_op.append(offset) + + def op_size(self, op): + """ + Return size of operator with its arguments + for given opcode . + """ + if op < dis.HAVE_ARGUMENT: + return 1 + else: + return 3 + + def find_jump_targets(self): + """ + Detect all offsets in a byte code which are jump targets. + + Return the list of offsets. + + This procedure is modelled after dis.findlables(), but here + for each target the number of jumps is counted. + """ + code = self.code + codelen = len(code) + self.structs = [{'type': 'root', + 'start': 0, + 'end': codelen-1}] + + # All loop entry points + # self.loops = [] + # Map fixed jumps to their real destination + self.fixed_jumps = {} + self.ignore_if = set() + self.build_statement_indices() + # Containers filled by detect_structure() + self.not_continue = set() + self.return_end_ifs = set() + + targets = {} + for offset in self.op_range(0, codelen): + op = code[offset] + + # Determine structures and fix jumps for 2.3+ + self.detect_structure(offset) + + if op >= dis.HAVE_ARGUMENT: + label = self.fixed_jumps.get(offset) + oparg = code[offset+1] + code[offset+2] * 256 + + if label is None: + if op in dis.hasjrel and op != FOR_ITER: + label = offset + 3 + oparg + elif op in dis.hasjabs: + if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): + if oparg > offset: + label = oparg + + if label is not None and label != -1: + targets[label] = targets.get(label, []) + [offset] + elif op == END_FINALLY and offset in self.fixed_jumps: + label = self.fixed_jumps[offset] + targets[label] = targets.get(label, []) + [offset] + return targets + + # FIXME Create and move to scanner3 + def build_statement_indices(self): + code = self.code + start = 0 + end = codelen = len(code) + + statement_opcodes = { + SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP, + SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH, + POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, + STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, + STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, + RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR, + JUMP_ABSOLUTE + } + + statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE), + (POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)] + + designator_ops = { + STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, + STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE + } + + # Compose preliminary list of indices with statements, + # using plain statement opcodes + prelim = self.all_instr(start, end, statement_opcodes) + + # Initialize final container with statements with + # preliminnary data + stmts = self.stmts = set(prelim) + + # Same for opcode sequences + pass_stmts = set() + for sequence in statement_opcode_sequences: + for i in self.op_range(start, end-(len(sequence)+1)): + match = True + for elem in sequence: + if elem != code[i]: + match = False + break + i += self.op_size(code[i]) + + if match is True: + i = self.prev_op[i] + stmts.add(i) + pass_stmts.add(i) + + # Initialize statement list with the full data we've gathered so far + if pass_stmts: + stmt_offset_list = list(stmts) + stmt_offset_list.sort() + else: + stmt_offset_list = prelim + # 'List-map' which contains offset of start of + # next statement, when op offset is passed as index + self.next_stmt = slist = [] + last_stmt_offset = -1 + i = 0 + # Go through all statement offsets + for stmt_offset in stmt_offset_list: + # Process absolute jumps, but do not remove 'pass' statements + # from the set + if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts: + # If absolute jump occurs in forward direction or it takes off from the + # same line as previous statement, this is not a statement + target = self.get_target(stmt_offset) + if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: + stmts.remove(stmt_offset) + continue + # Rewing ops till we encounter non-JA one + j = self.prev_op[stmt_offset] + while code[j] == JUMP_ABSOLUTE: + j = self.prev_op[j] + # If we got here, then it's list comprehension which + # is not a statement too + if code[j] == LIST_APPEND: + stmts.remove(stmt_offset) + continue + # Exclude ROT_TWO + POP_TOP + elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO: + stmts.remove(stmt_offset) + continue + # Exclude FOR_ITER + designators + elif code[stmt_offset] in designator_ops: + j = self.prev_op[stmt_offset] + while code[j] in designator_ops: + j = self.prev_op[j] + if code[j] == FOR_ITER: + stmts.remove(stmt_offset) + continue + # Add to list another list with offset of current statement, + # equal to length of previous statement + slist += [stmt_offset] * (stmt_offset-i) + last_stmt_offset = stmt_offset + i = stmt_offset + # Finish filling the list for last statement + slist += [codelen] * (codelen-len(slist)) + + # FIXME Create and move to scanner3 + def get_target(self, offset): + """ + Get target offset for op located at given . + """ + op = self.code[offset] + target = self.code[offset+1] + self.code[offset+2] * 256 + if op in dis.hasjrel: + target += offset + 3 + return target + + # FIXME Create and move to scanner3 + def detect_structure(self, offset): + """ + Detect structures and their boundaries to fix optimizied jumps + in python2.3+ + """ + code = self.code + op = code[offset] + # Detect parent structure + parent = self.structs[0] + start = parent['start'] + end = parent['end'] + + # Pick inner-most parent for our offset + for struct in self.structs: + curent_start = struct['start'] + curent_end = struct['end'] + if (curent_start <= offset < curent_end) and (curent_start >= start and curent_end <= end): + start = curent_start + end = curent_end + parent = struct + + if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): + start = offset + self.op_size(op) + target = self.get_target(offset) + rtarget = self.restrict_to_parent(target, parent) + prev_op = self.prev_op + + # Do not let jump to go out of parent struct bounds + if target != rtarget and parent['type'] == 'and/or': + self.fixed_jumps[offset] = rtarget + return + + # Does this jump to right after another cond jump? + # If so, it's part of a larger conditional + if (code[prev_op[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, + POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > offset): + self.fixed_jumps[offset] = prev_op[target] + self.structs.append({'type': 'and/or', + 'start': start, + 'end': prev_op[target]}) + return + # Is it an and inside if block + if op == POP_JUMP_IF_FALSE: + # Search for other POP_JUMP_IF_FALSE targetting the same op, + # in current statement, starting from current offset, and filter + # everything inside inner 'or' jumps and midline ifs + match = self.rem_or(start, self.next_stmt[offset], POP_JUMP_IF_FALSE, target) + match = self.remove_mid_line_ifs(match) + # If we still have any offsets in set, start working on it + if match: + if (code[prev_op[rtarget]] in (JUMP_FORWARD, JUMP_ABSOLUTE) and prev_op[rtarget] not in self.stmts and + self.restrict_to_parent(self.get_target(prev_op[rtarget]), parent) == rtarget): + if (code[prev_op[prev_op[rtarget]]] == JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and + target == self.get_target(prev_op[prev_op[rtarget]]) and + (prev_op[prev_op[rtarget]] not in self.stmts or self.get_target(prev_op[prev_op[rtarget]]) > prev_op[prev_op[rtarget]]) and + 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)))): + pass + elif (code[prev_op[prev_op[rtarget]]] == RETURN_VALUE and self.remove_mid_line_ifs([offset]) and + 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], + (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target))) | + set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], + (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE, JUMP_ABSOLUTE), + prev_op[rtarget], True)))))): + pass + else: + fix = None + jump_ifs = self.all_instr(start, self.next_stmt[offset], POP_JUMP_IF_FALSE) + last_jump_good = True + for j in jump_ifs: + if target == self.get_target(j): + if self.lines[j].next == j + 3 and last_jump_good: + fix = j + break + else: + last_jump_good = False + self.fixed_jumps[offset] = fix or match[-1] + return + else: + self.fixed_jumps[offset] = match[-1] + return + # op == POP_JUMP_IF_TRUE + else: + next = self.next_stmt[offset] + if prev_op[next] == offset: + pass + elif code[next] in (JUMP_FORWARD, JUMP_ABSOLUTE) and target == self.get_target(next): + if code[prev_op[next]] == POP_JUMP_IF_FALSE: + if code[next] == JUMP_FORWARD or target != rtarget or code[prev_op[prev_op[rtarget]]] not in (JUMP_ABSOLUTE, RETURN_VALUE): + self.fixed_jumps[offset] = prev_op[next] + return + elif (code[next] == JUMP_ABSOLUTE and code[target] in (JUMP_ABSOLUTE, JUMP_FORWARD) and + self.get_target(target) == self.get_target(next)): + self.fixed_jumps[offset] = prev_op[next] + return + + # Don't add a struct for a while test, it's already taken care of + if offset in self.ignore_if: + return + + if (code[prev_op[rtarget]] == JUMP_ABSOLUTE and prev_op[rtarget] in self.stmts and + prev_op[rtarget] != offset and prev_op[prev_op[rtarget]] != offset and + not (code[rtarget] == JUMP_ABSOLUTE and code[rtarget+3] == POP_BLOCK and code[prev_op[prev_op[rtarget]]] != JUMP_ABSOLUTE)): + rtarget = prev_op[rtarget] + + # Does the if jump just beyond a jump op, then this is probably an if statement + if code[prev_op[rtarget]] in (JUMP_ABSOLUTE, JUMP_FORWARD): + if_end = self.get_target(prev_op[rtarget]) + + # Is this a loop not an if? + if (if_end < prev_op[rtarget]) and (code[prev_op[if_end]] == SETUP_LOOP): + if(if_end > start): + return + + end = self.restrict_to_parent(if_end, parent) + + self.structs.append({'type': 'if-then', + 'start': start, + 'end': prev_op[rtarget]}) + self.not_continue.add(prev_op[rtarget]) + + if rtarget < end: + self.structs.append({'type': 'if-else', + 'start': rtarget, + 'end': end}) + elif code[prev_op[rtarget]] == RETURN_VALUE: + self.structs.append({'type': 'if-then', + 'start': start, + 'end': rtarget}) + self.return_end_ifs.add(prev_op[rtarget]) + + elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): + target = self.get_target(offset) + if target > offset: + unop_target = self.last_instr(offset, target, JUMP_FORWARD, target) + if unop_target and code[unop_target+3] != ROT_TWO: + self.fixed_jumps[offset] = unop_target + else: + self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) + + # FIXME Create and move to scanner3 + def rem_or(self, start, end, instr, target=None, include_beyond_target=False): + """ + Find offsets of all requested between and , + optionally ing specified offset, and return list found + offsets which are not within any POP_JUMP_IF_TRUE jumps. + """ + # Find all offsets of requested instructions + instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target) + # Get all POP_JUMP_IF_TRUE (or) offsets + pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE) + filtered = [] + for pjit_offset in pjit_offsets: + pjit_tgt = self.get_target(pjit_offset) - 3 + for instr_offset in instr_offsets: + if instr_offset <= pjit_offset or instr_offset >= pjit_tgt: + filtered.append(instr_offset) + instr_offsets = filtered + filtered = [] + return instr_offsets + + # FIXME Create and move to scanner3 + def remove_mid_line_ifs(self, ifs): + """ + Go through passed offsets, filtering ifs + located somewhere mid-line. + """ + filtered = [] + for if_ in ifs: + # For each offset, if line number of current and next op + # is the same + if self.lines[if_].l_no == self.lines[if_+3].l_no: + # Check if last op on line is PJIT or PJIF, and if it is - skip it + if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE): + continue + filtered.append(if_) + return filtered + +if __name__ == "__main__": + co = inspect.currentframe().f_code + tokens, customize = Scanner33().disassemble(co) + for t in tokens: + print(t) + pass diff --git a/uncompyle6/scanners/scanner34.py b/uncompyle6/scanners/scanner34.py index be1ce321..2314eacd 100644 --- a/uncompyle6/scanners/scanner34.py +++ b/uncompyle6/scanners/scanner34.py @@ -1,9 +1,4 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 2005 by Dan Pascu # Copyright (c) 2015 by Rocky Bernstein -# -# See main module for license. """ Python 3.4 bytecode scanner/deparser @@ -30,6 +25,7 @@ globals().update(dis.opmap) from uncompyle6.opcodes.opcode_34 import * import uncompyle6.scanner as scan +import uncompyle6.scanners.scanner33 as scan33 class Scanner34(scan.Scanner): @@ -171,9 +167,11 @@ class Scanner34(scan.Scanner): pass return tokens, {} + # FIXME Create and move to scanner3 def disassemble_cross_version(self, co, classname=None): - return scan.scanner32().disassemble(self, co, classname) + return scan33.Scanner33().disassemble(co, classname) + # FIXME Create and move to scanner3 def build_lines_data(self, code_obj): """ Generate various line-related helper data. @@ -205,6 +203,7 @@ class Scanner34(scan.Scanner): lines.append(LineTuple(prev_line_no, codelen)) offset += 1 + # FIXME Create and move to scanner3 def build_prev_op(self): """ Compose 'list-map' which allows to jump to previous @@ -218,6 +217,7 @@ class Scanner34(scan.Scanner): for _ in range(self.op_size(op)): self.prev_op.append(offset) + # FIXME Create and move to scanner3 def op_size(self, op): """ Return size of operator with its arguments @@ -377,6 +377,7 @@ class Scanner34(scan.Scanner): # Finish filling the list for last statement slist += [codelen] * (codelen-len(slist)) + # FIXME Create and move to scanner3 def get_target(self, offset): """ Get target offset for op located at given . @@ -414,6 +415,7 @@ class Scanner34(scan.Scanner): elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): count_SETUP_ += 1 + # FIXME Create and move to scanner3 def detect_structure(self, offset): """ Detect structures and their boundaries to fix optimizied jumps @@ -596,12 +598,6 @@ class Scanner34(scan.Scanner): else: self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) - def restrict_to_parent(self, target, parent): - """Restrict target to parent structure boundaries.""" - if not (parent['start'] < target < parent['end']): - target = parent['end'] - return target - def rem_or(self, start, end, instr, target=None, include_beyond_target=False): """ Find offsets of all requested between and , diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 9bf665fe..8c711aba 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -1664,7 +1664,7 @@ if __name__ == '__main__': def deparse_test(co): "This is a docstring" sys_version = sys.version_info.major + (sys.version_info.minor / 10.0) - deparsed = deparse_code(sys_version, co, showasm=False, showast=False) + deparsed = deparse_code(sys_version, co, showasm=True, showast=True) # deparsed = deparse_code(sys_version, co, showasm=False, showast=False, # showgrammar=True) print(deparsed.text)