diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 996ca808..ffbefc5f 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -36,6 +36,7 @@ class Python3Parser(PythonParser): """Add rule to grammar, but only if it hasn't been added previously """ if rule not in self.new_rules: + # print("XXX ", rule) # debug self.new_rules.add(rule) self.addRule(rule, nop_func) customize[opname] = count @@ -506,11 +507,16 @@ class Python3Parser(PythonParser): rule = ('load_closure ::= %s%s' % (('LOAD_CLOSURE ' * v), opname)) self.add_unique_rule(rule, opname, token.attr, customize) - elif self.version >= 3.5 and opname_base == 'BUILD_MAP': + elif opname_base == 'BUILD_MAP': kvlist_n = "kvlist_%s" % token.attr - rule = kvlist_n + ' ::= ' + 'expr ' * (token.attr*2) - self.add_unique_rule(rule, opname, token.attr, customize) - rule = "mapexpr ::= %s %s" % (kvlist_n, opname) + if self.version >= 3.5: + rule = kvlist_n + ' ::= ' + 'expr ' * (token.attr*2) + self.add_unique_rule(rule, opname, token.attr, customize) + rule = "mapexpr ::= %s %s" % (kvlist_n, opname) + else: + rule = kvlist_n + ' ::= ' + 'expr expr STORE_MAP ' * token.attr + self.add_unique_rule(rule, opname, token.attr, customize) + rule = "mapexpr ::= %s %s" % (opname, kvlist_n) self.add_unique_rule(rule, opname, token.attr, customize) elif opname_base in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): rule = 'unpack ::= ' + opname + ' designator' * token.attr diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index dc076b79..4809de76 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -21,6 +21,8 @@ Finally we save token information. from __future__ import print_function import dis +import uncompyle6.scanners.dis3 as dis3 + from collections import namedtuple from array import array @@ -42,6 +44,142 @@ class Scanner3(scan.Scanner): self.version = version scan.Scanner.__init__(self, version) + def disassemble3(self, co, opnames, classname=None, code_objects={}): + + # import dis; dis.disassemble(co) # DEBUG + + # Container for tokens + tokens = [] + + customize = {} + self.code = array('B', co.co_code) + self.build_lines_data(co) + self.build_prev_op() + + bytecode = dis3.Bytecode(co, opnames) + + # self.lines contains (block,addrLastInstr) + if classname: + classname = '_' + classname.lstrip('_') + '__' + + def unmangle(name): + if name.startswith(classname) and name[-2:] != '__': + return name[len(classname) - 2:] + return name + else: + pass + + # Scan for assertions. Later we will + # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those + # assertions + self.load_asserts = set() + bs = list(bytecode) + n = len(bs) + for i in range(n): + inst = bs[i] + + if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: + next_inst = bs[i+1] + if (next_inst.opname == 'LOAD_GLOBAL' and + next_inst.argval == 'AssertionError'): + self.load_asserts.add(next_inst.offset) + + # Get jump targets + # Format: {target offset: [jump offsets]} + jump_targets = self.find_jump_targets() + + for inst in bytecode: + if inst.offset in jump_targets: + jump_idx = 0 + for jump_offset in jump_targets[inst.offset]: + tokens.append(Token('COME_FROM', None, repr(jump_offset), + offset='%s_%s' % (inst.offset, jump_idx))) + jump_idx += 1 + pass + pass + + pattr = inst.argrepr + opname = inst.opname + + if opname in ['LOAD_CONST']: + const = inst.argval + if iscode(const): + if const.co_name == '': + opname = 'LOAD_LAMBDA' + elif const.co_name == '': + opname = 'LOAD_GENEXPR' + elif const.co_name == '': + opname = 'LOAD_DICTCOMP' + elif const.co_name == '': + opname = 'LOAD_SETCOMP' + elif const.co_name == '': + opname = 'LOAD_LISTCOMP' + # verify() uses 'pattr' for comparison, since 'attr' + # now holds Code(const) and thus can not be used + # for comparison (todo: think about changing this) + # pattr = 'code_object @ 0x%x %s->%s' %\ + # (id(const), const.co_filename, const.co_name) + pattr = '' + else: + pattr = const + pass + elif opname == 'MAKE_FUNCTION': + argc = inst.argval + attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF) + pos_args, name_pair_args, annotate_args = attr + if name_pair_args > 0: + opname = 'MAKE_FUNCTION_N%d' % name_pair_args + pass + if annotate_args > 0: + opname = '%s_A_%d' % [op_name, annotate_args] + pass + opname = '%s_%d' % (opname, pos_args) + pattr = ("%d positional, %d keyword pair, %d annotated" % + (pos_args, name_pair_args, annotate_args)) + tokens.append( + Token( + type_ = opname, + attr = (pos_args, name_pair_args, annotate_args), + pattr = pattr, + offset = inst.offset, + linestart = inst.starts_line) + ) + continue + elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', + 'BUILD_MAP', 'UNPACK_SEQUENCE', 'MAKE_CLOSURE', + 'RAISE_VARARGS' + ): + pos_args = inst.argval + if inst.opname != 'BUILD_SLICE': + customize[opname] = pos_args + pass + opname = '%s_%d' % (opname, pos_args) + elif opname == 'JUMP_ABSOLUTE': + pattr = inst.argval + target = self.get_target(inst.offset) + if target < inst.offset: + if (inst.offset in self.stmts and + self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK) + and inst.offset not in self.not_continue): + opname = 'CONTINUE' + else: + opname = 'JUMP_BACK' + + elif inst.offset in self.load_asserts: + opname = 'LOAD_ASSERT' + + tokens.append( + Token( + type_ = opname, + attr = inst.argval, + pattr = pattr, + offset = inst.offset, + linestart = inst.starts_line, + ) + ) + pass + return tokens, {} + def disassemble_generic(self, co, classname=None, code_objects={}): """ Convert code object into a sequence of tokens. diff --git a/uncompyle6/scanners/scanner34.py b/uncompyle6/scanners/scanner34.py index 952a8fe1..b0c7ab45 100644 --- a/uncompyle6/scanners/scanner34.py +++ b/uncompyle6/scanners/scanner34.py @@ -2,180 +2,24 @@ """ Python 3.4 bytecode scanner/deparser -This overlaps Python's 3.4's dis module, and in fact in some cases -we just fall back to that. But the intent is that it can be run from -Python 2 and other versions of Python. Also, we save token information -for later use in deparsing. +This sets up opcodes Python's 3.5 and calls a generalized +scanner routine for Python 3. """ from __future__ import print_function -import dis, inspect -from array import array -import uncompyle6.scanners.dis3 as dis3 import uncompyle6.scanners.scanner3 as scan3 from uncompyle6.opcodes.opcode_34 import opname as opnames -from uncompyle6 import PYTHON_VERSION -from uncompyle6.code import iscode -from uncompyle6.scanner import Token - -# Get all the opcodes into globals -globals().update(dis.opmap) - -import uncompyle6.opcodes.opcode_34 -# verify uses JUMP_OPs from here -JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs - -from uncompyle6.opcodes.opcode_34 import * +# bytecode verification, verify(), uses JUMP_OPs from here +from uncompyle6.opcodes.opcode_34 import JUMP_OPs class Scanner34(scan3.Scanner3): - - ## FIXME: DRY with scanner35.py - # Note: we can't use built-in disassembly routines, unless - # we do post-processing like we do here. def disassemble(self, co, classname=None, code_objects={}): - - # import dis; dis.disassemble(co) # DEBUG - - # Container for tokens - tokens = [] - - customize = {} - self.code = array('B', co.co_code) - self.build_lines_data(co) - self.build_prev_op() - - bytecode = dis3.Bytecode(co, opnames) - - # self.lines contains (block,addrLastInstr) - if classname: - classname = '_' + classname.lstrip('_') + '__' - - def unmangle(name): - if name.startswith(classname) and name[-2:] != '__': - return name[len(classname) - 2:] - return name - else: - pass - - # Scan for assertions. Later we will - # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those - # assertions - self.load_asserts = set() - bs = list(bytecode) - n = len(bs) - for i in range(n): - inst = bs[i] - - if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: - next_inst = bs[i+1] - if (next_inst.opname == 'LOAD_GLOBAL' and - next_inst.argval == 'AssertionError'): - self.load_asserts.add(next_inst.offset) - - # Get jump targets - # Format: {target offset: [jump offsets]} - jump_targets = self.find_jump_targets() - - for inst in bytecode: - if inst.offset in jump_targets: - jump_idx = 0 - for jump_offset in jump_targets[inst.offset]: - tokens.append(Token('COME_FROM', None, repr(jump_offset), - offset='%s_%s' % (inst.offset, jump_idx))) - jump_idx += 1 - pass - pass - - pattr = inst.argrepr - opname = inst.opname - - if opname in ['LOAD_CONST']: - const = inst.argval - if iscode(const): - if const.co_name == '': - opname = 'LOAD_LAMBDA' - elif const.co_name == '': - opname = 'LOAD_GENEXPR' - elif const.co_name == '': - opname = 'LOAD_DICTCOMP' - elif const.co_name == '': - opname = 'LOAD_SETCOMP' - elif const.co_name == '': - opname = 'LOAD_LISTCOMP' - # verify() uses 'pattr' for comparison, since 'attr' - # now holds Code(const) and thus can not be used - # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = '' - else: - pattr = const - pass - elif opname == 'MAKE_FUNCTION': - argc = inst.argval - attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF) - pos_args, name_pair_args, annotate_args = attr - if name_pair_args > 0: - opname = 'MAKE_FUNCTION_N%d' % name_pair_args - pass - if annotate_args > 0: - opname = '%s_A_%d' % [op_name, annotate_args] - pass - opname = '%s_%d' % (opname, pos_args) - pattr = ("%d positional, %d keyword pair, %d annotated" % - (pos_args, name_pair_args, annotate_args)) - tokens.append( - Token( - type_ = opname, - attr = (pos_args, name_pair_args, annotate_args), - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line) - ) - continue - # Note: care is needed in merging this with python3.5 - # and BUILD_MAP and parse3 custom rules. - # BUILD_MAP in 3.4 comes at the beginning and each tuple has STORE_MAP - # in 3.5 it comes at the end and STORE_MAP - # see parse3.py - elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', - 'UNPACK_SEQUENCE', 'MAKE_CLOSURE', - 'RAISE_VARARGS' - ): - pos_args = inst.argval - if inst.opname != 'BUILD_SLICE': - customize[opname] = pos_args - pass - opname = '%s_%d' % (opname, pos_args) - elif opname == 'JUMP_ABSOLUTE': - pattr = inst.argval - target = self.get_target(inst.offset) - if target < inst.offset: - if (inst.offset in self.stmts and - self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK) - and inst.offset not in self.not_continue): - opname = 'CONTINUE' - else: - opname = 'JUMP_BACK' - - elif inst.offset in self.load_asserts: - opname = 'LOAD_ASSERT' - - tokens.append( - Token( - type_ = opname, - attr = inst.argval, - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line, - ) - ) - pass - return tokens, {} + return self.disassemble3(co, opnames, classname, code_objects) if __name__ == "__main__": + import inspect co = inspect.currentframe().f_code tokens, customize = Scanner34(3.4).disassemble(co) for t in tokens: diff --git a/uncompyle6/scanners/scanner35.py b/uncompyle6/scanners/scanner35.py index d74c9da7..b38df656 100644 --- a/uncompyle6/scanners/scanner35.py +++ b/uncompyle6/scanners/scanner35.py @@ -2,171 +2,24 @@ """ Python 3.5 bytecode scanner/deparser -This overlaps Python's 3.5's dis module, and in fact in some cases -we just fall back to that. But the intent is that it can be run from -Python 2 and other versions of Python. Also, we save token information -for later use in deparsing. +This sets up opcodes Python's 3.5 and calls a generalized +scanner routine for Python 3. """ from __future__ import print_function -import inspect -from array import array -import uncompyle6.scanners.dis3 as dis3 import uncompyle6.scanners.scanner3 as scan3 from uncompyle6.opcodes.opcode_35 import opname as opnames -from uncompyle6.code import iscode -from uncompyle6.scanner import Token - -import uncompyle6.opcodes.opcode_35 -# verify uses JUMP_OPs from here -JUMP_OPs = uncompyle6.opcodes.opcode_35.JUMP_OPs - -from uncompyle6.opcodes.opcode_35 import * +# bytecode verification, verify(), uses JUMP_OPs from here +from uncompyle6.opcodes.opcode_35 import JUMP_OPs class Scanner35(scan3.Scanner3): - - ## FIXME: DRY with scanner34.py - # Note: we can't use built-in disassembly routines, unless - # we do post-processing like we do here. def disassemble(self, co, classname=None, code_objects={}): - - # import dis; dis.disassemble(co) # DEBUG - - # Container for tokens - tokens = [] - - customize = {} - self.code = array('B', co.co_code) - self.build_lines_data(co) - self.build_prev_op() - - bytecode = dis3.Bytecode(co, opnames) - - # self.lines contains (block,addrLastInstr) - if classname: - classname = '_' + classname.lstrip('_') + '__' - - def unmangle(name): - if name.startswith(classname) and name[-2:] != '__': - return name[len(classname) - 2:] - return name - else: - pass - - # Scan for assertions. Later we will - # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those - # assertions - self.load_asserts = set() - bs = list(bytecode) - n = len(bs) - for i in range(n): - inst = bs[i] - - if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: - next_inst = bs[i+1] - if (next_inst.opname == 'LOAD_GLOBAL' and - next_inst.argval == 'AssertionError'): - self.load_asserts.add(next_inst.offset) - - # Get jump targets - # Format: {target offset: [jump offsets]} - jump_targets = self.find_jump_targets() - - for inst in bytecode: - if inst.offset in jump_targets: - jump_idx = 0 - for jump_offset in jump_targets[inst.offset]: - tokens.append(Token('COME_FROM', None, repr(jump_offset), - offset='%s_%s' % (inst.offset, jump_idx))) - jump_idx += 1 - pass - pass - - pattr = inst.argrepr - opname = inst.opname - - if opname in ['LOAD_CONST']: - const = inst.argval - if iscode(const): - if const.co_name == '': - opname = 'LOAD_LAMBDA' - elif const.co_name == '': - opname = 'LOAD_GENEXPR' - elif const.co_name == '': - opname = 'LOAD_DICTCOMP' - elif const.co_name == '': - opname = 'LOAD_SETCOMP' - elif const.co_name == '': - opname = 'LOAD_LISTCOMP' - # verify() uses 'pattr' for comparison, since 'attr' - # now holds Code(const) and thus can not be used - # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = '' - else: - pattr = const - pass - elif opname == 'MAKE_FUNCTION': - argc = inst.argval - attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF) - pos_args, name_pair_args, annotate_args = attr - if name_pair_args > 0: - opname = 'MAKE_FUNCTION_N%d' % name_pair_args - pass - if annotate_args > 0: - opname = '%s_A_%d' % [op_name, annotate_args] - pass - opname = '%s_%d' % (opname, pos_args) - pattr = ("%d positional, %d keyword pair, %d annotated" % - (pos_args, name_pair_args, annotate_args)) - tokens.append( - Token( - type_ = opname, - attr = (pos_args, name_pair_args, annotate_args), - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line) - ) - continue - elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', - 'BUILD_MAP', 'UNPACK_SEQUENCE', 'MAKE_CLOSURE', - 'RAISE_VARARGS' - ): - pos_args = inst.argval - if inst.opname != 'BUILD_SLICE': - customize[opname] = pos_args - pass - opname = '%s_%d' % (opname, pos_args) - elif opname == 'JUMP_ABSOLUTE': - pattr = inst.argval - target = self.get_target(inst.offset) - if target < inst.offset: - if (inst.offset in self.stmts and - self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK) - and inst.offset not in self.not_continue): - opname = 'CONTINUE' - else: - opname = 'JUMP_BACK' - - elif inst.offset in self.load_asserts: - opname = 'LOAD_ASSERT' - - tokens.append( - Token( - type_ = opname, - attr = inst.argval, - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line, - ) - ) - pass - return tokens, {} + return self.disassemble3(co, opnames, classname, code_objects) if __name__ == "__main__": + import inspect co = inspect.currentframe().f_code tokens, customize = Scanner35(3.5).disassemble(co) for t in tokens: diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 6306744e..c0a45c72 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -992,33 +992,63 @@ class FragmentsWalker(pysource.SourceWalker, object): """ p = self.prec self.prec = 100 - assert node[-1] == 'kvlist' - kv_node = node[-1] # goto kvlist self.indentMore(INDENT_PER_LEVEL) line_seperator = ',\n' + self.indent sep = INDENT_PER_LEVEL[:-1] start = len(self.f.getvalue()) self.write('{') - for kv in kv_node: - assert kv in ('kv', 'kv2', 'kv3') - # kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR - # kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR - # kv3 ::= expr expr STORE_MAP - if kv == 'kv': - name = self.traverse(kv[-2], indent='') - kv[1].parent = kv_node - value = self.traverse(kv[1], indent=self.indent+(len(name)+2)*' ') - elif kv == 'kv2': - name = self.traverse(kv[1], indent='') - kv[-3].parent = kv_node - value = self.traverse(kv[-3], indent=self.indent+(len(name)+2)*' ') - elif kv == 'kv3': - name = self.traverse(kv[-2], indent='') - kv[0].parent = kv_node - value = self.traverse(kv[0], indent=self.indent+(len(name)+2)*' ') - self.write(sep, name, ': ', value) - sep = line_seperator + + if node[0].type.startswith('kvlist'): + # Python 3.5+ style key/value list in mapexpr + kv_node = node[0] + l = list(kv_node) + i = 0 + while i < len(l): + l[1].parent = kv_node + l[i+1].parent = kv_node + name = self.traverse(l[i], indent='') + value = self.traverse(l[i+1], indent=self.indent+(len(name)+2)*' ') + self.write(sep, name, ': ', value) + sep = line_seperator + i += 2 + elif node[1].type.startswith('kvlist'): + # Python 3.0..3.4 style key/value list in mapexpr + kv_node = node[1] + l = list(kv_node) + i = 0 + while i < len(l): + l[1].parent = kv_node + l[i+1].parent = kv_node + name = self.traverse(l[i+1], indent='') + value = self.traverse(l[i], indent=self.indent+(len(name)+2)*' ') + self.write(sep, name, ': ', value) + sep = line_seperator + i += 3 + else: + # Python 2 style kvlist + assert node[-1] == 'kvlist' + kv_node = node[-1] # goto kvlist + + for kv in kv_node: + assert kv in ('kv', 'kv2', 'kv3') + # kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR + # kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR + # kv3 ::= expr expr STORE_MAP + if kv == 'kv': + name = self.traverse(kv[-2], indent='') + kv[1].parent = kv_node + value = self.traverse(kv[1], indent=self.indent+(len(name)+2)*' ') + elif kv == 'kv2': + name = self.traverse(kv[1], indent='') + kv[-3].parent = kv_node + value = self.traverse(kv[-3], indent=self.indent+(len(name)+2)*' ') + elif kv == 'kv3': + name = self.traverse(kv[-2], indent='') + kv[0].parent = kv_node + value = self.traverse(kv[0], indent=self.indent+(len(name)+2)*' ') + self.write(sep, name, ': ', value) + sep = line_seperator self.write('}') finish = len(self.f.getvalue()) for n in node: diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 4b624c3d..0d25675f 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -1238,7 +1238,7 @@ class SourceWalker(GenericASTTraversal, object): self.write('{') if node[0].type.startswith('kvlist'): - # Python 3.5 style key/value list in mapexpr + # Python 3.5+ style key/value list in mapexpr l = list(node[0]) i = 0 while i < len(l): @@ -1247,11 +1247,22 @@ class SourceWalker(GenericASTTraversal, object): self.write(sep, name, ': ', value) sep = line_seperator i += 2 + elif node[1].type.startswith('kvlist'): + # Python 3.0..3.4 style key/value list in mapexpr + l = list(node[1]) + i = 0 + while i < len(l): + name = self.traverse(l[i+1], indent='') + value = self.traverse(l[i], indent=self.indent+(len(name)+2)*' ') + self.write(sep, name, ': ', value) + sep = line_seperator + i += 3 else: + # Python 2 style kvlist assert node[-1] == 'kvlist' - node = node[-1] # goto kvlist + kv_node = node[-1] # goto kvlist - for kv in node: + for kv in kv_node: assert kv in ('kv', 'kv2', 'kv3') # kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR # kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR