diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 5400f006..f4f4ae0e 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -17,8 +17,8 @@ import sys from uncompyle6 import PYTHON3, IS_PYPY from uncompyle6.scanners.tok import Token import xdis -from xdis.bytecode import op_size, extended_arg_val -from xdis.magics import py_str2float, canonic_python_version +from xdis.bytecode import instruction_size, extended_arg_val, next_offset +from xdis.magics import canonic_python_version from xdis.util import code2num # The byte code versions we support. @@ -98,12 +98,20 @@ class Scanner(object): # FIXME 0 isn't always correct return offset < self.get_target(offset, 0) - def get_target(self, pos, op=None): - if op is None: - op = self.code[pos] - target = self.get_argument(pos) - if op in self.opc.JREL_OPS: - target += pos + 3 + def get_target(self, offset, extended_arg=0): + """ + Get next instruction offset for op located at given . + NOTE: extended_arg is no longer used + """ + # instructions can get moved as a result of EXTENDED_ARGS removal + if offset not in self.offset2inst_index: + offset -= instruction_size(self.opc.EXTENDED_ARG, self.opc) + inst = self.insts[self.offset2inst_index[offset]] + if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS: + target = inst.argval + else: + # No jump offset, so use fall-through offset + target = next_offset(inst.opcode, self.opc, inst.offset) return target def get_argument(self, pos): @@ -269,7 +277,7 @@ class Scanner(object): """ while start < end: yield start - start += op_size(self.code[start], self.opc) + start += instruction_size(self.code[start], self.opc) def remove_mid_line_ifs(self, ifs): """ diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 25b1c9cb..c0b4c0f9 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -26,7 +26,7 @@ from collections import namedtuple from array import array from xdis.code import iscode -from xdis.bytecode import Bytecode, op_has_argument, op_size, instruction_size +from xdis.bytecode import Bytecode, op_has_argument, instruction_size from xdis.util import code2num from uncompyle6.scanner import Scanner @@ -72,13 +72,14 @@ class Scanner2(Scanner): def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, - returning a list of uncompyle6 'Token's. + returning a list of uncompyle6 Token's. The transformations are made to assist the deparsing grammar. Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + - some EXTENDED_ARGS instructions are removed Also, when we encounter certain tokens, we add them to a set which will cause custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST @@ -112,6 +113,7 @@ class Scanner2(Scanner): self.insts = list(bytecode) self.offset2inst_index = {} + n = len(self.insts) for i, inst in enumerate(self.insts): self.offset2inst_index[inst.offset] = i @@ -141,8 +143,10 @@ class Scanner2(Scanner): if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) + # Get jump targets + # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets(show_asm) - # contains (code, [addrRefToCode]) + # print("XXX2", jump_targets) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] @@ -383,7 +387,7 @@ class Scanner2(Scanner): if elem != code[i]: match = False break - i += op_size(code[i], self.opc) + i += instruction_size(code[i], self.opc) if match: i = self.prev[i] @@ -629,7 +633,7 @@ class Scanner2(Scanner): 'start': jump_back_offset+3, 'end': loop_end_offset}) elif op == self.opc.SETUP_EXCEPT: - start = offset + op_size(op, self.opc) + start = offset + instruction_size(op, self.opc) target = self.get_target(offset, op) end_offset = self.restrict_to_parent(target, parent) if target != end_offset: @@ -653,7 +657,7 @@ class Scanner2(Scanner): setup_except_nest -= 1 elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT: setup_except_nest += 1 - end_finally_offset += op_size(code[end_finally_offset], self.opc) + end_finally_offset += instruction_size(code[end_finally_offset], self.opc) pass # Add the except blocks @@ -866,7 +870,7 @@ class Scanner2(Scanner): else: # We still have the case in 2.7 that the next instruction # is a jump to a SETUP_LOOP target. - next_offset = target + op_size(self.code[target], self.opc) + next_offset = target + instruction_size(self.code[target], self.opc) next_op = self.code[next_offset] if self.op_name(next_op) == 'JUMP_FORWARD': jump_target = self.get_target(next_offset, next_op) diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 8798e6a2..499a746d 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -1,6 +1,6 @@ -# Copyright (c) 2015-2018 by Rocky Bernstein -# Copyright (c) 2005 by Dan Pascu -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2015-2018 by Rocky Bernstein +# Copyright (c) 2005 by Dan Pascu +# Copyright (c) 2000-2002 by hartmut Goebel """ Python 3 Generic bytecode scanner/deparser @@ -25,9 +25,8 @@ from __future__ import print_function from collections import namedtuple from array import array -from uncompyle6.scanner import Scanner from xdis.code import iscode -from xdis.bytecode import Bytecode, instruction_size, next_offset +from xdis.bytecode import Bytecode, instruction_size from uncompyle6.scanner import Token, parse_fn_counts import xdis @@ -35,6 +34,8 @@ import xdis # Get all the opcodes into globals import xdis.opcodes.opcode_33 as op3 +from uncompyle6.scanner import Scanner + import sys from uncompyle6 import PYTHON3 if PYTHON3: @@ -42,38 +43,6 @@ if PYTHON3: globals().update(op3.opmap) -def remove_extended_args(instructions, prev_op): - """Go through instructions removing extended ARG. - get_instruction_bytes previously adjusted the operand values - to account for these""" - new_instructions = [] - last_was_extarg = False - n = len(instructions) - for i, inst in enumerate(instructions): - if (inst.opname == 'EXTENDED_ARG' and - i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'): - last_was_extarg = True - starts_line = inst.starts_line - is_jump_target = inst.is_jump_target - offset = inst.offset - continue - if last_was_extarg: - new_inst= inst._replace(starts_line=starts_line, - is_jump_target=is_jump_target, - offset=offset) - inst = new_inst - if i < n: - j = instructions[i+1].offset - old_prev = prev_op[j] - while prev_op[j] == old_prev and j < n: - prev_op[j] = prev_op[i] - j += 1 - - last_was_extarg = False - new_instructions.append(inst) - return new_instructions - - class Scanner3(Scanner): def __init__(self, version, show_asm=None, is_pypy=False): @@ -172,6 +141,41 @@ class Scanner3(Scanner): # FIXME: remove the above in favor of: # self.varargs_ops = frozenset(self.opc.hasvargs) + def remove_extended_args(self, instructions): + """Go through instructions removing extended ARG. + get_instruction_bytes previously adjusted the operand values + to account for these""" + new_instructions = [] + last_was_extarg = False + n = len(instructions) + for i, inst in enumerate(instructions): + if (inst.opname == 'EXTENDED_ARG' and + i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'): + last_was_extarg = True + starts_line = inst.starts_line + is_jump_target = inst.is_jump_target + offset = inst.offset + continue + if last_was_extarg: + + # j = self.stmts.index(inst.offset) + # self.lines[j] = offset + + new_inst= inst._replace(starts_line=starts_line, + is_jump_target=is_jump_target, + offset=offset) + inst = new_inst + if i < n: + j = instructions[i+1].offset + old_prev = self.prev_op[j] + while self.prev_op[j] == old_prev and j < n: + self.prev_op[j] = self.prev_op[i] + j += 1 + + last_was_extarg = False + new_instructions.append(inst) + return new_instructions + def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, @@ -203,17 +207,13 @@ class Scanner3(Scanner): # list of tokens/instructions tokens = [] - # "customize" is a dict whose keys are nonterminals - # and the value is the argument stack entries for that - # nonterminal. The count is a little hoaky. It is mostly - # not used, but sometimes it is. - # "customize" is a dict whose keys are nonterminals + # "customize" is in the process of going away here customize = {} if self.is_pypy: customize['PyPy'] = 0 - self.build_lines_data(co) + self.lines = self.build_lines_data(co) self.build_prev_op() # FIXME: put as its own method? @@ -221,7 +221,7 @@ class Scanner3(Scanner): # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() - self.insts = remove_extended_args(list(bytecode), self.prev_op) + self.insts = self.remove_extended_args(list(bytecode)) self.offset2inst_index = {} n = len(self.insts) @@ -450,7 +450,7 @@ class Scanner3(Scanner): if show_asm in ('both', 'after'): for t in tokens: - print(t) + print(t.format(line_prefix='L.')) print() return tokens, customize @@ -466,7 +466,7 @@ class Scanner3(Scanner): self.linestart_offsets = set(a for (a, _) in linestarts) # 'List-map' which shows line number of current op and offset of # first op on following line, given offset of op as index - self.lines = lines = [] + lines = [] LineTuple = namedtuple('LineTuple', ['l_no', 'next']) # Iterate through available linestarts, and fill # the data for all code offsets encountered until @@ -484,6 +484,7 @@ class Scanner3(Scanner): while offset < codelen: lines.append(LineTuple(prev_line_no, codelen)) offset += 1 + return lines def build_prev_op(self): """ @@ -653,34 +654,19 @@ class Scanner3(Scanner): # Finish filling the list for last statement slist += [codelen] * (codelen-len(slist)) - def get_target(self, offset, extended_arg=0): - """ - Get next instruction offset for op located at given . - NOTE: extended_arg is no longer used - """ - inst = self.insts[self.offset2inst_index[offset]] - if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS: - target = inst.argval - else: - # No jump offset, so use fall-through offset - target = next_offset(inst.opcode, self.opc, inst.offset) - return target - def detect_control_flow(self, offset, targets, inst_index): """ - Detect structures and their boundaries to fix optimized jumps + Detect type of block structures and their boundaries to fix optimized jumps in python2.3+ """ - # TODO: check the struct boundaries more precisely -Dan - code = self.code op = self.insts[inst_index].opcode # Detect parent structure parent = self.structs[0] - start = parent['start'] - end = parent['end'] + start = parent['start'] + end = parent['end'] # Pick inner-most parent for our offset for struct in self.structs: @@ -688,8 +674,8 @@ class Scanner3(Scanner): current_end = struct['end'] if ((current_start <= offset < current_end) and (current_start >= start and current_end <= end)): - start = current_start - end = current_end + start = current_start + end = current_end parent = struct if op == self.opc.SETUP_LOOP: