From 6e2ca8f53d16c2efe9574c50f40e1cd876b35cd1 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 25 Feb 2018 08:59:12 -0500 Subject: [PATCH 1/2] Add another guard on a test --- uncompyle6/scanners/scanner3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 218b86d4..3938a29a 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -227,7 +227,8 @@ class Scanner3(Scanner): if op == self.opc.EXTENDED_ARG: # FIXME: The EXTENDED_ARG is used to signal annotation # parameters - if self.insts[i+1].opcode != self.opc.MAKE_FUNCTION: + if (i+1 < n and + self.insts[i+1].opcode != self.opc.MAKE_FUNCTION): continue if inst.offset in jump_targets: From 8c0f256b78d13fbdf130200d6905b41ca3f41f7b Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 25 Feb 2018 09:42:04 -0500 Subject: [PATCH 2/2] Sync python2 and python3 scanner/injest code more --- uncompyle6/scanner.py | 21 ++++++++++------ uncompyle6/scanners/scanner2.py | 18 ++++++++------ uncompyle6/scanners/scanner3.py | 44 ++++++++++----------------------- 3 files changed, 37 insertions(+), 46 deletions(-) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 5400f006..65cdf753 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -17,8 +17,8 @@ import sys from uncompyle6 import PYTHON3, IS_PYPY from uncompyle6.scanners.tok import Token import xdis -from xdis.bytecode import op_size, extended_arg_val -from xdis.magics import py_str2float, canonic_python_version +from xdis.bytecode import op_size, extended_arg_val, next_offset +from xdis.magics import canonic_python_version from xdis.util import code2num # The byte code versions we support. @@ -98,12 +98,17 @@ class Scanner(object): # FIXME 0 isn't always correct return offset < self.get_target(offset, 0) - def get_target(self, pos, op=None): - if op is None: - op = self.code[pos] - target = self.get_argument(pos) - if op in self.opc.JREL_OPS: - target += pos + 3 + def get_target(self, offset, extended_arg=0): + """ + Get next instruction offset for op located at given . + NOTE: extended_arg is no longer used + """ + inst = self.insts[self.offset2inst_index[offset]] + if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS: + target = inst.argval + else: + # No jump offset, so use fall-through offset + target = next_offset(inst.opcode, self.opc, inst.offset) return target def get_argument(self, pos): diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 25b1c9cb..c0b4c0f9 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -26,7 +26,7 @@ from collections import namedtuple from array import array from xdis.code import iscode -from xdis.bytecode import Bytecode, op_has_argument, op_size, instruction_size +from xdis.bytecode import Bytecode, op_has_argument, instruction_size from xdis.util import code2num from uncompyle6.scanner import Scanner @@ -72,13 +72,14 @@ class Scanner2(Scanner): def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, - returning a list of uncompyle6 'Token's. + returning a list of uncompyle6 Token's. The transformations are made to assist the deparsing grammar. Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + - some EXTENDED_ARGS instructions are removed Also, when we encounter certain tokens, we add them to a set which will cause custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST @@ -112,6 +113,7 @@ class Scanner2(Scanner): self.insts = list(bytecode) self.offset2inst_index = {} + n = len(self.insts) for i, inst in enumerate(self.insts): self.offset2inst_index[inst.offset] = i @@ -141,8 +143,10 @@ class Scanner2(Scanner): if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) + # Get jump targets + # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets(show_asm) - # contains (code, [addrRefToCode]) + # print("XXX2", jump_targets) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] @@ -383,7 +387,7 @@ class Scanner2(Scanner): if elem != code[i]: match = False break - i += op_size(code[i], self.opc) + i += instruction_size(code[i], self.opc) if match: i = self.prev[i] @@ -629,7 +633,7 @@ class Scanner2(Scanner): 'start': jump_back_offset+3, 'end': loop_end_offset}) elif op == self.opc.SETUP_EXCEPT: - start = offset + op_size(op, self.opc) + start = offset + instruction_size(op, self.opc) target = self.get_target(offset, op) end_offset = self.restrict_to_parent(target, parent) if target != end_offset: @@ -653,7 +657,7 @@ class Scanner2(Scanner): setup_except_nest -= 1 elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT: setup_except_nest += 1 - end_finally_offset += op_size(code[end_finally_offset], self.opc) + end_finally_offset += instruction_size(code[end_finally_offset], self.opc) pass # Add the except blocks @@ -866,7 +870,7 @@ class Scanner2(Scanner): else: # We still have the case in 2.7 that the next instruction # is a jump to a SETUP_LOOP target. - next_offset = target + op_size(self.code[target], self.opc) + next_offset = target + instruction_size(self.code[target], self.opc) next_op = self.code[next_offset] if self.op_name(next_op) == 'JUMP_FORWARD': jump_target = self.get_target(next_offset, next_op) diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 3938a29a..b19ab790 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -1,6 +1,6 @@ -# Copyright (c) 2015-2018 by Rocky Bernstein -# Copyright (c) 2005 by Dan Pascu -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2015-2018 by Rocky Bernstein +# Copyright (c) 2005 by Dan Pascu +# Copyright (c) 2000-2002 by hartmut Goebel """ Python 3 Generic bytecode scanner/deparser @@ -25,9 +25,8 @@ from __future__ import print_function from collections import namedtuple from array import array -from uncompyle6.scanner import Scanner from xdis.code import iscode -from xdis.bytecode import Bytecode, instruction_size, next_offset +from xdis.bytecode import Bytecode, instruction_size from uncompyle6.scanner import Token, parse_fn_counts import xdis @@ -35,6 +34,8 @@ import xdis # Get all the opcodes into globals import xdis.opcodes.opcode_33 as op3 +from uncompyle6.scanner import Scanner + import sys from uncompyle6 import PYTHON3 if PYTHON3: @@ -171,11 +172,7 @@ class Scanner3(Scanner): # list of tokens/instructions tokens = [] - # "customize" is a dict whose keys are nonterminals - # and the value is the argument stack entries for that - # nonterminal. The count is a little hoaky. It is mostly - # not used, but sometimes it is. - # "customize" is a dict whose keys are nonterminals + # "customize" is in the process of going away here customize = {} if self.is_pypy: @@ -417,7 +414,7 @@ class Scanner3(Scanner): if show_asm in ('both', 'after'): for t in tokens: - print(t) + print(t.format(line_prefix='L.')) print() return tokens, customize @@ -620,34 +617,19 @@ class Scanner3(Scanner): # Finish filling the list for last statement slist += [codelen] * (codelen-len(slist)) - def get_target(self, offset, extended_arg=0): - """ - Get next instruction offset for op located at given . - NOTE: extended_arg is no longer used - """ - inst = self.insts[self.offset2inst_index[offset]] - if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS: - target = inst.argval - else: - # No jump offset, so use fall-through offset - target = next_offset(inst.opcode, self.opc, inst.offset) - return target - def detect_control_flow(self, offset, targets, inst_index): """ - Detect structures and their boundaries to fix optimized jumps + Detect type of block structures and their boundaries to fix optimized jumps in python2.3+ """ - # TODO: check the struct boundaries more precisely -Dan - code = self.code op = self.insts[inst_index].opcode # Detect parent structure parent = self.structs[0] - start = parent['start'] - end = parent['end'] + start = parent['start'] + end = parent['end'] # Pick inner-most parent for our offset for struct in self.structs: @@ -655,8 +637,8 @@ class Scanner3(Scanner): current_end = struct['end'] if ((current_start <= offset < current_end) and (current_start >= start and current_end <= end)): - start = current_start - end = current_end + start = current_start + end = current_end parent = struct if op == self.opc.SETUP_LOOP: