diff --git a/Makefile b/Makefile index f934a3b9..97629b9c 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ check-short: pytest $(MAKE) -C test check-short #: Tests for Python 2.7, 3.3 and 3.4 -check-2.7 check-3.3 check-3.4: pytest +check-2.6 check-2.7 check-3.3 check-3.4: pytest $(MAKE) -C test $@ #: Tests for Python 3.2 and 3.5 - pytest doesn't work here diff --git a/__pkginfo__.py b/__pkginfo__.py index c800ee15..36886e87 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -35,6 +35,7 @@ classifiers = ['Development Status :: 5 - Production/Stable', 'Programming Language :: Python :: 2.5', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', diff --git a/admin-tools/pyenv-newer-versions b/admin-tools/pyenv-newer-versions index 321ccc58..1e2fe497 100644 --- a/admin-tools/pyenv-newer-versions +++ b/admin-tools/pyenv-newer-versions @@ -5,4 +5,4 @@ if [[ $0 == ${BASH_SOURCE[0]} ]] ; then echo "This script should be *sourced* rather than run directly through bash" exit 1 fi -export PYVERSIONS='3.5.5 3.6.4 2.6.9 3.3.7 2.7.14 3.4.8' +export PYVERSIONS='3.5.5 3.6.4 2.6.9 3.3.7 2.7.14 3.2.6 3.4.8' diff --git a/pytest/test_fjt.py b/pytest/test_fjt.py index 16bf76fe..2b200127 100644 --- a/pytest/test_fjt.py +++ b/pytest/test_fjt.py @@ -1,8 +1,6 @@ #!/usr/bin/env python from uncompyle6 import PYTHON_VERSION, IS_PYPY from uncompyle6.scanner import get_scanner -from xdis.bytecode import Bytecode -from array import array def bug(state, slotstate): if state: if slotstate is not None: @@ -25,14 +23,7 @@ def test_if_in_for(): code = bug.func_code scan = get_scanner(PYTHON_VERSION) if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY: - n = scan.setup_code(code) - bytecode = Bytecode(code, scan.opc) - scan.build_lines_data(code, n) - scan.insts = list(bytecode) - scan.offset2inst_index = {} - for i, inst in enumerate(scan.insts): - scan.offset2inst_index[inst.offset] = i - scan.build_prev_op(n) + scan.build_instructions(code) fjt = scan.find_jump_targets(False) ## FIXME: the data below is wrong. @@ -47,14 +38,7 @@ def test_if_in_for(): # {'start': 62, 'end': 63, 'type': 'for-else'}] code = bug_loop.__code__ - n = scan.setup_code(code) - bytecode = Bytecode(code, scan.opc) - scan.build_lines_data(code, n) - scan.insts = list(bytecode) - scan.build_prev_op(n) - scan.offset2inst_index = {} - for i, inst in enumerate(scan.insts): - scan.offset2inst_index[inst.offset] = i + scan.build_instructions(code) fjt = scan.find_jump_targets(False) assert{64: [42], 67: [42, 42], 42: [16, 41], 19: [6]} == fjt assert scan.structs == [ @@ -68,14 +52,7 @@ def test_if_in_for(): {'start': 48, 'end': 67, 'type': 'while-loop'}] elif 3.2 < PYTHON_VERSION <= 3.4: - bytecode = Bytecode(code, scan.opc) - scan.code = array('B', code.co_code) - scan.lines = scan.build_lines_data(code) - scan.build_prev_op() - scan.insts = list(bytecode) - scan.offset2inst_index = {} - for i, inst in enumerate(scan.insts): - scan.offset2inst_index[inst.offset] = i + scan.build_instructions(code) fjt = scan.find_jump_targets(False) assert {69: [66], 63: [18]} == fjt assert scan.structs == \ @@ -85,5 +62,6 @@ def test_if_in_for(): {'end': 59, 'type': 'for-loop', 'start': 31}, {'end': 63, 'type': 'for-else', 'start': 62}] else: - assert True, "FIXME: should note fixed" + print("FIXME: should fix for %s" % PYTHON_VERSION) + assert True return diff --git a/pytest/test_grammar.py b/pytest/test_grammar.py index cd486ea9..aebbd463 100644 --- a/pytest/test_grammar.py +++ b/pytest/test_grammar.py @@ -20,13 +20,17 @@ def test_grammar(): # We have custom rules that create the below expect_lhs = set(['pos_arg', 'get_iter', 'attribute']) - unused_rhs = set(['list', 'mkfunc', 'dict', + unused_rhs = set(['list', 'mkfunc', 'mklambda', 'unpack',]) expect_right_recursive = set([('designList', ('store', 'DUP_TOP', 'designList'))]) - expect_lhs.add('kvlist') - expect_lhs.add('kv3') + + if PYTHON_VERSION > 2.6: + expect_lhs.add('kvlist') + expect_lhs.add('kv3') + unused_rhs.add('dict') + if PYTHON3: expect_lhs.add('load_genexpr') @@ -85,6 +89,8 @@ def test_grammar(): """.split()) if 2.6 <= PYTHON_VERSION <= 2.7: opcode_set = set(s.opc.opname).union(ignore_set) + if PYTHON_VERSION == 2.6: + opcode_set.add("THEN") check_tokens(tokens, opcode_set) elif PYTHON_VERSION == 3.4: ignore_set.add('LOAD_CLASSNAME') diff --git a/pytest/test_single_compile.py b/pytest/test_single_compile.py index 415c7d80..d590b209 100644 --- a/pytest/test_single_compile.py +++ b/pytest/test_single_compile.py @@ -1,6 +1,6 @@ from uncompyle6 import PYTHON_VERSION, deparse_code -if PYTHON_VERSION >= 2.5: +if PYTHON_VERSION >= 2.6: def test_single_mode(): single_expressions = ( 'i = 1', diff --git a/test/bytecode_2.7/10_del.pyc b/test/bytecode_2.7/10_del.pyc deleted file mode 100644 index 1b14c656..00000000 Binary files a/test/bytecode_2.7/10_del.pyc and /dev/null differ diff --git a/test/bytecode_3.5_run/04_call_function.pyc b/test/bytecode_3.5_run/04_call_function.pyc index 99d98c11..25f663e6 100644 Binary files a/test/bytecode_3.5_run/04_call_function.pyc and b/test/bytecode_3.5_run/04_call_function.pyc differ diff --git a/test/run-and-email.sh b/test/run-and-email.sh index d48c9603..9ae2fb45 100755 --- a/test/run-and-email.sh +++ b/test/run-and-email.sh @@ -14,7 +14,7 @@ function displaytime { printf '%d seconds\n' $S } -PYVERSION=${PYVERSION:-"3.5.5 2.7.14 3.4.8 2.6.9"} +PYVERSION=${PYVERSION:-"3.5.5 2.7.14 3.2.6 3.4.8 2.6.9 3.6.4"} # PYVERSION=${PYVERSION:-"3.5.5"} USER=${USER:-rocky} @@ -28,6 +28,10 @@ for VERSION in $PYVERSION ; do if [[ $VERSION == '3.5.5' ]] ; then MAX_TESTS=224 + elif [[ $VERSION == '3.2.6' ]] ; then + MAX_TESTS=75 + elif [[ $VERSION == '3.6.4' ]] ; then + MAX_TESTS=400 else MAX_TESTS=800 fi diff --git a/test/simple_source/bug35/04_call_function.py b/test/simple_source/bug35/04_call_function.py index a5900f6a..9f5b60d5 100644 --- a/test/simple_source/bug35/04_call_function.py +++ b/test/simple_source/bug35/04_call_function.py @@ -58,3 +58,8 @@ def __call__(self, *args, **kwds): # From 3.6.4 shutil def unpack_archive(func, filename, dict, format_info, extract_dir=None): func(filename, extract_dir, **dict(format_info[2])) + +# From 3.5.5 test_xrdrlib.py +import xdrlib +def assertRaisesConversion(self, *args): + self.assertRaises(xdrlib.ConversionError, *args) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index a32e75d6..d166bf96 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -21,15 +21,22 @@ scanner/ingestion module. From here we call various version-specific scanners, e.g. for Python 2.7 or 3.4. """ +from array import array import sys -from uncompyle6 import PYTHON3, IS_PYPY +from uncompyle6 import PYTHON3, IS_PYPY, PYTHON_VERSION from uncompyle6.scanners.tok import Token import xdis -from xdis.bytecode import instruction_size, extended_arg_val, next_offset +from xdis.bytecode import ( + Bytecode, instruction_size, extended_arg_val, next_offset) from xdis.magics import canonic_python_version from xdis.util import code2num +if PYTHON_VERSION < 2.6: + from xdis.namedtuple24 import namedtuple +else: + from collections import namedtuple + # The byte code versions we support. # Note: these all have to be floats PYTHON_VERSIONS = frozenset((1.5, @@ -88,11 +95,73 @@ class Scanner(object): # FIXME: This weird Python2 behavior is not Python3 self.resetTokenClass() - def opname_for_offset(self, offset): - return self.opc.opname[self.code[offset]] + def build_instructions(self, co): + """ + Create a list of instructions (a structured object rather than + an array of bytes) and store that in self.insts + """ + # FIXME: remove this when all subsidiary functions have been removed. + # We should be able to get everything from the self.insts list. + self.code = array('B', co.co_code) - def op_name(self, op): - return self.opc.opname[op] + bytecode = Bytecode(co, self.opc) + self.build_prev_op() + self.insts = self.remove_extended_args(list(bytecode)) + self.lines = self.build_lines_data(co) + self.offset2inst_index = {} + for i, inst in enumerate(self.insts): + self.offset2inst_index[inst.offset] = i + + return bytecode + + def build_lines_data(self, code_obj): + """ + Generate various line-related helper data. + """ + + # Offset: lineno pairs, only for offsets which start line. + # Locally we use list for more convenient iteration using indices + linestarts = list(self.opc.findlinestarts(code_obj)) + self.linestarts = dict(linestarts) + + # 'List-map' which shows line number of current op and offset of + # first op on following line, given offset of op as index + lines = [] + LineTuple = namedtuple('LineTuple', ['l_no', 'next']) + + # Iterate through available linestarts, and fill + # the data for all code offsets encountered until + # last linestart offset + _, prev_line_no = linestarts[0] + offset = 0 + for start_offset, line_no in linestarts[1:]: + while offset < start_offset: + lines.append(LineTuple(prev_line_no, start_offset)) + offset += 1 + prev_line_no = line_no + + # Fill remaining offsets with reference to last line number + # and code length as start offset of following non-existing line + codelen = len(self.code) + while offset < codelen: + lines.append(LineTuple(prev_line_no, codelen)) + offset += 1 + return lines + + def build_prev_op(self): + """ + Compose 'list-map' which allows to jump to previous + op, given offset of current op as index. + """ + code = self.code + codelen = len(code) + # 2.x uses prev 3.x uses prev_op. Sigh + # Until we get this sorted out. + self.prev = self.prev_op = [0] + for offset in self.op_range(0, codelen): + op = code[offset] + for _ in range(instruction_size(op, self.opc)): + self.prev_op.append(offset) def is_jump_forward(self, offset): """ @@ -330,6 +399,12 @@ class Scanner(object): return result + def opname_for_offset(self, offset): + return self.opc.opname[self.code[offset]] + + def op_name(self, op): + return self.opc.opname[op] + def op_range(self, start, end): """ Iterate through positions of opcodes, skipping @@ -339,11 +414,50 @@ class Scanner(object): yield start start += instruction_size(self.code[start], self.opc) + def remove_extended_args(self, instructions): + """Go through instructions removing extended ARG. + get_instruction_bytes previously adjusted the operand values + to account for these""" + new_instructions = [] + last_was_extarg = False + n = len(instructions) + for i, inst in enumerate(instructions): + if (inst.opname == 'EXTENDED_ARG' and + i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'): + last_was_extarg = True + starts_line = inst.starts_line + is_jump_target = inst.is_jump_target + offset = inst.offset + continue + if last_was_extarg: + + # j = self.stmts.index(inst.offset) + # self.lines[j] = offset + + new_inst= inst._replace(starts_line=starts_line, + is_jump_target=is_jump_target, + offset=offset) + inst = new_inst + if i < n: + new_prev = self.prev_op[instructions[i].offset] + j = instructions[i+1].offset + old_prev = self.prev_op[j] + while self.prev_op[j] == old_prev and j < n: + self.prev_op[j] = new_prev + j += 1 + + last_was_extarg = False + new_instructions.append(inst) + return new_instructions + def remove_mid_line_ifs(self, ifs): """ Go through passed offsets, filtering ifs located somewhere mid-line. """ + + # FIXME: this doesn't work for Python 3.6+ + filtered = [] for i in ifs: # For each offset, if line number of current and next op @@ -411,7 +525,7 @@ def get_scanner(version, is_pypy=False, show_asm=None): if __name__ == "__main__": import inspect, uncompyle6 co = inspect.currentframe().f_code - scanner = get_scanner('2.7.13', True) - scanner = get_scanner(sys.version[:5], False) + # scanner = get_scanner('2.7.13', True) + # scanner = get_scanner(sys.version[:5], False) scanner = get_scanner(uncompyle6.PYTHON_VERSION, IS_PYPY, True) - tokens, customize = scanner.ingest(co, {}) + tokens, customize = scanner.ingest(co, {}, show_asm='after') diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 05645a32..8a76ad1e 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -40,16 +40,15 @@ if PYTHON_VERSION < 2.6: else: from collections import namedtuple -from array import array from copy import copy from xdis.code import iscode from xdis.bytecode import ( - Bytecode, op_has_argument, instruction_size, + op_has_argument, instruction_size, _get_const_info) from xdis.util import code2num -from uncompyle6.scanner import Scanner +from uncompyle6.scanner import Scanner, Token class Scanner2(Scanner): def __init__(self, version, show_asm=None, is_pypy=False): @@ -61,6 +60,57 @@ class Scanner2(Scanner): self.genexpr_name = '' self.load_asserts = set([]) + # Create opcode classification sets + # Note: super initilization above initializes self.opc + + # Ops that start SETUP_ ... We will COME_FROM with these names + # Some blocks and END_ statements. And they can start + # a new statement + + self.statement_opcodes = frozenset([ + self.opc.SETUP_LOOP, self.opc.BREAK_LOOP, + self.opc.SETUP_FINALLY, self.opc.END_FINALLY, + self.opc.SETUP_EXCEPT, self.opc.POP_BLOCK, + self.opc.STORE_FAST, self.opc.DELETE_FAST, + self.opc.STORE_DEREF, self.opc.STORE_GLOBAL, + self.opc.DELETE_GLOBAL, self.opc.STORE_NAME, + self.opc.DELETE_NAME, self.opc.STORE_ATTR, + self.opc.DELETE_ATTR, self.opc.STORE_SUBSCR, + self.opc.DELETE_SUBSCR, self.opc.RETURN_VALUE, + self.opc.RAISE_VARARGS, self.opc.POP_TOP, + self.opc.PRINT_EXPR, self.opc.PRINT_ITEM, + self.opc.PRINT_NEWLINE, self.opc.PRINT_ITEM_TO, + self.opc.PRINT_NEWLINE_TO, self.opc.CONTINUE_LOOP, + self.opc.JUMP_ABSOLUTE, self.opc.EXEC_STMT, + ]) + + # Opcodes that can start a "store" non-terminal. + # FIXME: JUMP_ABSOLUTE is weird. What's up with that? + self.designator_ops = frozenset([ + self.opc.STORE_FAST, self.opc.STORE_NAME, + self.opc.STORE_GLOBAL, self.opc.STORE_DEREF, self.opc.STORE_ATTR, + self.opc.STORE_SLICE_0, self.opc.STORE_SLICE_1, self.opc.STORE_SLICE_2, + self.opc.STORE_SLICE_3, self.opc.STORE_SUBSCR, self.opc.UNPACK_SEQUENCE, + self.opc.JUMP_ABSOLUTE + ]) + + # Python 2.7 has POP_JUMP_IF_{TRUE,FALSE}_OR_POP but < 2.7 doesn't + # Add an empty set make processing more uniform. + self.pop_jump_if_or_pop = frozenset([]) + + # opcodes with expect a variable number pushed values whose + # count is in the opcode. For parsing we generally change the + # opcode name to include that number. + self.varargs_ops = frozenset([ + self.opc.BUILD_LIST, self.opc.BUILD_TUPLE, + self.opc.BUILD_SLICE, self.opc.UNPACK_SEQUENCE, + self.opc.MAKE_FUNCTION, self.opc.CALL_FUNCTION, + self.opc.MAKE_CLOSURE, self.opc.CALL_FUNCTION_VAR, + self.opc.CALL_FUNCTION_KW, self.opc.CALL_FUNCTION_VAR_KW, + self.opc.DUP_TOPX, self.opc.RAISE_VARARGS]) + + + @staticmethod def unmangle_name(name, classname): """Remove __ from the end of _name_ if it starts with __classname__ @@ -110,7 +160,8 @@ class Scanner2(Scanner): if not show_asm: show_asm = self.show_asm - bytecode = Bytecode(co, self.opc) + bytecode = self.build_instructions(co) + # show_asm = 'after' if show_asm in ('both', 'before'): for instr in bytecode.get_instructions(co): @@ -121,21 +172,10 @@ class Scanner2(Scanner): # "customize" is in the process of going away here customize = {} - if self.is_pypy: customize['PyPy'] = 0 - Token = self.Token # shortcut - - codelen = self.setup_code(co) - - self.build_lines_data(co, codelen) - self.build_prev_op(codelen) - - self.insts = list(bytecode) - self.offset2inst_index = {} - for i, inst in enumerate(self.insts): - self.offset2inst_index[inst.offset] = i + codelen = len(self.code) free, names, varnames = self.unmangle_code_names(co, classname) self.names = names @@ -146,8 +186,6 @@ class Scanner2(Scanner): self.load_asserts = set() for i in self.op_range(0, codelen): - self.offset2inst_index[inst.offset] = i - # We need to detect the difference between: # raise AssertionError # and @@ -318,7 +356,7 @@ class Scanner2(Scanner): if (offset in self.stmts and self.code[offset+3] not in (self.opc.END_FINALLY, self.opc.POP_BLOCK)): - if ((offset in self.linestartoffsets and + if ((offset in self.linestarts and self.code[self.prev[offset]] == self.opc.JUMP_ABSOLUTE) or self.code[target] == self.opc.FOR_ITER or offset not in self.not_continue): @@ -331,10 +369,7 @@ class Scanner2(Scanner): if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' - if offset in self.linestartoffsets: - linestart = self.linestartoffsets[offset] - else: - linestart = None + linestart = self.linestarts.get(offset, None) if offset not in replace: tokens.append(Token( @@ -353,63 +388,6 @@ class Scanner2(Scanner): print() return tokens, customize - def setup_code(self, co): - """ - Creates Python-independent bytecode structure (byte array) in - self.code and records previous instruction in self.prev - The size of self.code is returned - """ - self.code = array('B', co.co_code) - - n = -1 - for i in self.op_range(0, len(self.code)): - if self.code[i] in (self.opc.RETURN_VALUE, self.opc.END_FINALLY): - n = i + 1 - pass - pass - assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY" - self.code = array('B', co.co_code[:n]) - - return n - - def build_prev_op(self, n): - self.prev = [0] - # mapping addresses of instruction & argument - for i in self.op_range(0, n): - op = self.code[i] - self.prev.append(i) - if op_has_argument(op, self.opc): - self.prev.append(i) - self.prev.append(i) - pass - pass - - def build_lines_data(self, co, n): - """ - Initializes self.lines and self.linesstartoffsets - """ - self.lines = [] - linetuple = namedtuple('linetuple', ['l_no', 'next']) - - # self.linestarts is a tuple of (offset, line number). - # Turn that in a has that we can index - self.linestarts = list(self.opc.findlinestarts(co)) - self.linestartoffsets = {} - for offset, lineno in self.linestarts: - self.linestartoffsets[offset] = lineno - - j = 0 - (prev_start_byte, prev_line_no) = self.linestarts[0] - for (start_byte, line_no) in self.linestarts[1:]: - while j < start_byte: - self.lines.append(linetuple(prev_line_no, start_byte)) - j += 1 - prev_line_no = start_byte - while j < n: - self.lines.append(linetuple(prev_line_no, n)) - j+=1 - return - def build_statement_indices(self): code = self.code start = 0 @@ -976,7 +954,8 @@ class Scanner2(Scanner): 'end': pre_rtarget}) # FIXME: this is yet another case were we need dominators. - if pre_rtarget not in self.linestartoffsets or self.version < 2.7: + if (pre_rtarget not in self.linestarts + or self.version < 2.7): self.not_continue.add(pre_rtarget) if rtarget < end_offset: @@ -1165,6 +1144,19 @@ class Scanner2(Scanner): return targets + def patch_continue(self, tokens, offset, op): + if op in (self.opc.JUMP_FORWARD, self.opc.JUMP_ABSOLUTE): + # FIXME: this is a hack to catch stuff like: + # for ... + # try: ... + # except: continue + # the "continue" is not on a new line. + n = len(tokens) + if (n > 2 and + tokens[-1].kind == 'JUMP_BACK' and + self.code[offset+3] == self.opc.END_FINALLY): + tokens[-1].kind = intern('CONTINUE') + # FIXME: combine with scanner3.py code and put into scanner.py def rem_or(self, start, end, instr, target=None, include_beyond_target=False): """ @@ -1204,3 +1196,17 @@ class Scanner2(Scanner): instr_offsets = filtered filtered = [] return instr_offsets + +if __name__ == "__main__": + from uncompyle6 import PYTHON_VERSION + if 2.0 <= PYTHON_VERSION < 3.0: + import inspect + co = inspect.currentframe().f_code + from uncompyle6 import PYTHON_VERSION + tokens, customize = Scanner2(PYTHON_VERSION).ingest(co) + for t in tokens: + print(t) + else: + print("Need to be Python 2.x to demo; I am %s." % + PYTHON_VERSION) + pass diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index a494639f..210fab82 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -32,59 +32,21 @@ from uncompyle6.scanner import L65536 # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_26 -from xdis.bytecode import Bytecode from xdis.bytecode import _get_const_info +from uncompyle6.scanner import Token + JUMP_OPS = opcode_26.JUMP_OPS class Scanner26(scan.Scanner2): def __init__(self, show_asm=False): super(Scanner26, self).__init__(2.6, show_asm) - self.statement_opcodes = frozenset([ - self.opc.SETUP_LOOP, self.opc.BREAK_LOOP, - self.opc.SETUP_FINALLY, self.opc.END_FINALLY, - self.opc.SETUP_EXCEPT, self.opc.POP_BLOCK, - self.opc.STORE_FAST, self.opc.DELETE_FAST, - self.opc.STORE_DEREF, self.opc.STORE_GLOBAL, - self.opc.DELETE_GLOBAL, self.opc.STORE_NAME, - self.opc.DELETE_NAME, self.opc.STORE_ATTR, - self.opc.DELETE_ATTR, self.opc.STORE_SUBSCR, - self.opc.DELETE_SUBSCR, self.opc.RETURN_VALUE, - self.opc.RAISE_VARARGS, self.opc.POP_TOP, - self.opc.PRINT_EXPR, self.opc.PRINT_ITEM, - self.opc.PRINT_NEWLINE, self.opc.PRINT_ITEM_TO, - self.opc.PRINT_NEWLINE_TO, self.opc.CONTINUE_LOOP, - self.opc.JUMP_ABSOLUTE, self.opc.EXEC_STMT, - ]) # "setup" opcodes self.setup_ops = frozenset([ self.opc.SETUP_EXCEPT, self.opc.SETUP_FINALLY, ]) - # opcodes with expect a variable number pushed values whose - # count is in the opcode. For parsing we generally change the - # opcode name to include that number. - self.varargs_ops = frozenset([ - self.opc.BUILD_LIST, self.opc.BUILD_TUPLE, - self.opc.BUILD_SLICE, self.opc.UNPACK_SEQUENCE, - self.opc.MAKE_FUNCTION, self.opc.CALL_FUNCTION, - self.opc.MAKE_CLOSURE, self.opc.CALL_FUNCTION_VAR, - self.opc.CALL_FUNCTION_KW, self.opc.CALL_FUNCTION_VAR_KW, - self.opc.DUP_TOPX, self.opc.RAISE_VARARGS]) - - # opcodes that store values into a variable - self.designator_ops = frozenset([ - self.opc.STORE_FAST, self.opc.STORE_NAME, - self.opc.STORE_GLOBAL, self.opc.STORE_DEREF, self.opc.STORE_ATTR, - self.opc.STORE_SLICE_0, self.opc.STORE_SLICE_1, self.opc.STORE_SLICE_2, - self.opc.STORE_SLICE_3, self.opc.STORE_SUBSCR, self.opc.UNPACK_SEQUENCE, - self.opc.JUMP_ABSOLUTE - ]) - - # Python 2.7 has POP_JUMP_IF_{TRUE,FALSE}_OR_POP but < 2.7 doesn't - # Add an empty set make processing more uniform. - self.pop_jump_if_or_pop = frozenset([]) return def ingest(self, co, classname=None, code_objects={}, show_asm=None): @@ -106,7 +68,8 @@ class Scanner26(scan.Scanner2): if not show_asm: show_asm = self.show_asm - bytecode = Bytecode(co, self.opc) + bytecode = self.build_instructions(co) + # show_asm = 'after' if show_asm in ('both', 'before'): for instr in bytecode.get_instructions(co): @@ -119,17 +82,7 @@ class Scanner26(scan.Scanner2): if self.is_pypy: customize['PyPy'] = 1 - Token = self.Token # shortcut - - codelen = self.setup_code(co) - - self.build_lines_data(co, codelen) - self.build_prev_op(codelen) - - self.insts = list(bytecode) - self.offset2inst_index = {} - for i, inst in enumerate(self.insts): - self.offset2inst_index[inst.offset] = i + codelen = len(self.code) free, names, varnames = self.unmangle_code_names(co, classname) self.names = names @@ -288,7 +241,7 @@ class Scanner26(scan.Scanner2): if (offset in self.stmts and self.code[offset+3] not in (self.opc.END_FINALLY, self.opc.POP_BLOCK)): - if ((offset in self.linestartoffsets and + if ((offset in self.linestarts and tokens[-1].kind == 'JUMP_BACK') or offset not in self.not_continue): op_name = 'CONTINUE' @@ -309,10 +262,7 @@ class Scanner26(scan.Scanner2): if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' - if offset in self.linestartoffsets: - linestart = self.linestartoffsets[offset] - else: - linestart = None + linestart = self.linestarts.get(offset, None) if offset not in replace: tokens.append(Token( diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index 0a935929..2e1cdb79 100755 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -23,28 +23,15 @@ class Scanner27(Scanner2): super(Scanner27, self).__init__(2.7, show_asm, is_pypy) # opcodes that start statements - self.statement_opcodes = frozenset([ - self.opc.SETUP_LOOP, self.opc.BREAK_LOOP, - self.opc.SETUP_FINALLY, self.opc.END_FINALLY, - self.opc.SETUP_EXCEPT, - self.opc.POP_BLOCK, self.opc.STORE_FAST, self.opc.DELETE_FAST, - self.opc.STORE_DEREF, self.opc.STORE_GLOBAL, - self.opc.DELETE_GLOBAL, self.opc.STORE_NAME, - self.opc.DELETE_NAME, self.opc.STORE_ATTR, - self.opc.DELETE_ATTR, self.opc.STORE_SUBSCR, - self.opc.DELETE_SUBSCR, self.opc.RETURN_VALUE, - self.opc.RAISE_VARARGS, self.opc.POP_TOP, - self.opc.PRINT_EXPR, self.opc.PRINT_ITEM, - self.opc.PRINT_NEWLINE, self.opc.PRINT_ITEM_TO, - self.opc.PRINT_NEWLINE_TO, self.opc.CONTINUE_LOOP, - self.opc.JUMP_ABSOLUTE, self.opc.EXEC_STMT, - # New in 2.7 - self.opc.SETUP_WITH, - self.opc.STORE_SLICE_0, self.opc.STORE_SLICE_1, - self.opc.STORE_SLICE_2, self.opc.STORE_SLICE_3, - self.opc.DELETE_SLICE_0, self.opc.DELETE_SLICE_1, - self.opc.DELETE_SLICE_2, self.opc.DELETE_SLICE_3, - ]) + self.statement_opcodes = frozenset( + self.statement_opcodes | set([ + # New in 2.7 + self.opc.SETUP_WITH, + self.opc.STORE_SLICE_0, self.opc.STORE_SLICE_1, + self.opc.STORE_SLICE_2, self.opc.STORE_SLICE_3, + self.opc.DELETE_SLICE_0, self.opc.DELETE_SLICE_1, + self.opc.DELETE_SLICE_2, self.opc.DELETE_SLICE_3, + ])) # opcodes which expect a variable number pushed values and whose # count is in the opcode. For parsing we generally change the @@ -83,19 +70,6 @@ class Scanner27(Scanner2): return - def patch_continue(self, tokens, offset, op): - if op in (self.opc.JUMP_FORWARD, self.opc.JUMP_ABSOLUTE): - # FIXME: this is a hack to catch stuff like: - # for ... - # try: ... - # except: continue - # the "continue" is not on a new line. - n = len(tokens) - if (n > 2 and - tokens[-1].kind == 'JUMP_BACK' and - self.code[offset+3] == self.opc.END_FINALLY): - tokens[-1].kind = intern('CONTINUE') - pass if __name__ == "__main__": diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index c10153f5..7ecd4952 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -40,10 +40,8 @@ if PYTHON_VERSION < 2.6: else: from collections import namedtuple -from array import array - from xdis.code import iscode -from xdis.bytecode import Bytecode, instruction_size, _get_const_info +from xdis.bytecode import instruction_size, _get_const_info from uncompyle6.scanner import Token, parse_fn_counts import xdis @@ -104,7 +102,7 @@ class Scanner3(Scanner): self.statement_opcodes = frozenset(statement_opcodes) | self.setup_ops_no_loop - # Opcodes that can start a designator non-terminal. + # Opcodes that can start a "store" non-terminal. # FIXME: JUMP_ABSOLUTE is weird. What's up with that? self.designator_ops = frozenset([ self.opc.STORE_FAST, self.opc.STORE_NAME, self.opc.STORE_GLOBAL, @@ -158,42 +156,6 @@ class Scanner3(Scanner): # FIXME: remove the above in favor of: # self.varargs_ops = frozenset(self.opc.hasvargs) - def remove_extended_args(self, instructions): - """Go through instructions removing extended ARG. - get_instruction_bytes previously adjusted the operand values - to account for these""" - new_instructions = [] - last_was_extarg = False - n = len(instructions) - for i, inst in enumerate(instructions): - if (inst.opname == 'EXTENDED_ARG' and - i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'): - last_was_extarg = True - starts_line = inst.starts_line - is_jump_target = inst.is_jump_target - offset = inst.offset - continue - if last_was_extarg: - - # j = self.stmts.index(inst.offset) - # self.lines[j] = offset - - new_inst= inst._replace(starts_line=starts_line, - is_jump_target=is_jump_target, - offset=offset) - inst = new_inst - if i < n: - new_prev = self.prev_op[instructions[i].offset] - j = instructions[i+1].offset - old_prev = self.prev_op[j] - while self.prev_op[j] == old_prev and j < n: - self.prev_op[j] = new_prev - j += 1 - - last_was_extarg = False - new_instructions.append(inst) - return new_instructions - def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, @@ -211,15 +173,12 @@ class Scanner3(Scanner): cause specific rules for the specific number of arguments they take. """ - # FIXME: remove this when all subsidiary functions have been removed. - # We should be able to get everything from the self.insts list. - self.code = array('B', co.co_code) - - bytecode = Bytecode(co, self.opc) if not show_asm: show_asm = self.show_asm - # show_asm = 'both' + bytecode = self.build_instructions(co) + + # show_asm = 'after' if show_asm in ('both', 'before'): for instr in bytecode.get_instructions(co): print(instr.disassemble()) @@ -233,22 +192,14 @@ class Scanner3(Scanner): if self.is_pypy: customize['PyPy'] = 0 - self.lines = self.build_lines_data(co) - self.build_prev_op() - - # FIXME: put as its own method? # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() - self.insts = self.remove_extended_args(list(bytecode)) - self.offset2inst_index = {} n = len(self.insts) for i, inst in enumerate(self.insts): - self.offset2inst_index[inst.offset] = i - # We need to detect the difference between: # raise AssertionError # and @@ -488,53 +439,6 @@ class Scanner3(Scanner): print() return tokens, customize - def build_lines_data(self, code_obj): - """ - Generate various line-related helper data. - """ - # Offset: lineno pairs, only for offsets which start line. - # Locally we use list for more convenient iteration using indices - linestarts = list(self.opc.findlinestarts(code_obj)) - self.linestarts = dict(linestarts) - # Plain set with offsets of first ops on line - self.linestart_offsets = set(a for (a, _) in linestarts) - # 'List-map' which shows line number of current op and offset of - # first op on following line, given offset of op as index - lines = [] - LineTuple = namedtuple('LineTuple', ['l_no', 'next']) - # Iterate through available linestarts, and fill - # the data for all code offsets encountered until - # last linestart offset - _, prev_line_no = linestarts[0] - offset = 0 - for start_offset, line_no in linestarts[1:]: - while offset < start_offset: - lines.append(LineTuple(prev_line_no, start_offset)) - offset += 1 - prev_line_no = line_no - # Fill remaining offsets with reference to last line number - # and code length as start offset of following non-existing line - codelen = len(self.code) - while offset < codelen: - lines.append(LineTuple(prev_line_no, codelen)) - offset += 1 - return lines - - def build_prev_op(self): - """ - Compose 'list-map' which allows to jump to previous - op, given offset of current op as index. - """ - code = self.code - codelen = len(code) - # 2.x uses prev 3.x uses prev_op. Sigh - # Until we get this sorted out. - self.prev = self.prev_op = [0] - for offset in self.op_range(0, codelen): - op = code[offset] - for _ in range(instruction_size(op, self.opc)): - self.prev_op.append(offset) - def find_jump_targets(self, debug): """ Detect all offsets in a byte code which are jump targets diff --git a/uncompyle6/semantics/customize.py b/uncompyle6/semantics/customize.py index 88d431e2..d26b20bf 100644 --- a/uncompyle6/semantics/customize.py +++ b/uncompyle6/semantics/customize.py @@ -363,9 +363,9 @@ def customize_for_version(self, is_pypy, version): self.template_engine(template, args_node) else: if len(node) - nargs > 3: - template = ('*%c, %C)', 1, (nargs+kwargs+1, -1, ', ')) + template = ('*%c, %C)', nargs+1, (nargs+kwargs+1, -1, ', ')) else: - template = ('*%c)', 1) + template = ('*%c)', nargs+1) self.template_engine(template, node) self.prune()