diff --git a/uncompyle6/bin/pydisassemble.py b/uncompyle6/bin/pydisassemble.py index f4e1446a..dcf74503 100755 --- a/uncompyle6/bin/pydisassemble.py +++ b/uncompyle6/bin/pydisassemble.py @@ -6,7 +6,7 @@ from __future__ import print_function import sys, os, getopt -program = os.path.splitext(os.path.basename(__file__)) +program, ext = os.path.splitext(os.path.basename(__file__)) __doc__ = """ Usage: diff --git a/uncompyle6/scanners/dis35.py b/uncompyle6/scanners/dis3.py similarity index 80% rename from uncompyle6/scanners/dis35.py rename to uncompyle6/scanners/dis3.py index 30ba5a35..304bc8b9 100644 --- a/uncompyle6/scanners/dis35.py +++ b/uncompyle6/scanners/dis3.py @@ -1,15 +1,14 @@ -# This is take from the python 3.5 dis module +# This is taken from the python 3.x dis module """Disassembler of Python byte code into mnemonics.""" +# This part is modified for cross Python compatability +from uncompyle6.opcodes.opcode_3x import * + from dis import findlinestarts import types import collections import io -# This part is modified for cross Python compatability -from uncompyle6.opcodes.opcode_35 import * -from uncompyle6.opcodes.opcode_35 import opname - _have_code = (types.MethodType, types.FunctionType, types.CodeType, type) def _try_compile(source, name): @@ -142,7 +141,7 @@ def show_code(co): _Instruction = collections.namedtuple("_Instruction", "opname opcode arg argval argrepr offset starts_line is_jump_target") -class Instruction(_Instruction): +class Instruction3(_Instruction): """Details for a bytecode operation Defined fields: @@ -192,8 +191,9 @@ class Instruction(_Instruction): fields.append('(' + self.argrepr + ')') return ' '.join(fields).rstrip() + ## FIXME: figure out how to do _disassemble passing in opnames -def get_instructions(x, first_line=None): +def get_instructions(x, opnames, first_line=None): """Iterator for the opcodes in methods, functions or code Generates a series of Instruction named tuples giving the details of @@ -211,39 +211,11 @@ def get_instructions(x, first_line=None): line_offset = first_line - co.co_firstlineno else: line_offset = 0 - return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, + return _get_instructions_bytes(co.co_code, opnames, co.co_varnames, co.co_names, co.co_consts, cell_names, linestarts, line_offset) -def _get_const_info(const_index, const_list): - """Helper to get optional details about const references - - Returns the dereferenced constant and its repr if the constant - list is defined. - Otherwise returns the constant index and its repr(). - """ - argval = const_index - if const_list is not None: - argval = const_list[const_index] - return argval, repr(argval) - -def _get_name_info(name_index, name_list): - """Helper to get optional details about named references - - Returns the dereferenced name as both value and repr if the name - list is defined. - Otherwise returns the name index and its repr(). - """ - argval = name_index - if name_list is not None: - argval = name_list[name_index] - argrepr = argval - else: - argrepr = repr(argval) - return argval, argrepr - - -def _get_instructions_bytes(code, varnames=None, names=None, constants=None, +def _get_instructions_bytes(code, opnames, varnames=None, names=None, constants=None, cells=None, linestarts=None, line_offset=0): """Iterate over the instructions in a bytecode string. @@ -308,7 +280,105 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None, elif op in hasnargs: argrepr = ("%d positional, %d keyword pair, %d annotated" % (code[i-2], code[i-1], code[i])) - yield Instruction(opname[op_num], op, + yield Instruction(opnames[op_num], op, + arg, argval, argrepr, + offset, starts_line, is_jump_target) + +def _get_const_info(const_index, const_list): + """Helper to get optional details about const references + + Returns the dereferenced constant and its repr if the constant + list is defined. + Otherwise returns the constant index and its repr(). + """ + argval = const_index + if const_list is not None: + argval = const_list[const_index] + return argval, repr(argval) + +def _get_name_info(name_index, name_list): + """Helper to get optional details about named references + + Returns the dereferenced name as both value and repr if the name + list is defined. + Otherwise returns the name index and its repr(). + """ + argval = name_index + if name_list is not None: + argval = name_list[name_index] + argrepr = argval + else: + argrepr = repr(argval) + return argval, argrepr + + +def _get_instructions_bytes(code, opnames, varnames=None, names=None, constants=None, + cells=None, linestarts=None, line_offset=0): + """Iterate over the instructions in a bytecode string. + + Generates a sequence of Instruction namedtuples giving the details of each + opcode. Additional information about the code's runtime environment + (e.g. variable names, constants) can be specified using optional + arguments. + + """ + labels = findlabels(code) + extended_arg = 0 + starts_line = None + # enumerate() is not an option, since we sometimes process + # multiple elements on a single pass through the loop + n = len(code) + i = 0 + while i < n: + op = code[i] + if isinstance(op, str): + op_num = ord(op) + else: + op_num = op + + offset = i + if linestarts is not None: + starts_line = linestarts.get(i, None) + if starts_line is not None: + starts_line += line_offset + is_jump_target = i in labels + i = i+1 + arg = None + argval = None + argrepr = '' + if op >= HAVE_ARGUMENT: + if isinstance(code[i], str): + arg = op_num + ord(code[i+1])*256 + extended_arg + else: + arg = code[i] + code[i+1]*256 + extended_arg + extended_arg = 0 + i = i+2 + if op == EXTENDED_ARG: + extended_arg = arg*65536 + # Set argval to the dereferenced value of the argument when + # availabe, and argrepr to the string representation of argval. + # _disassemble_bytes needs the string repr of the + # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. + argval = arg + if op in hasconst: + argval, argrepr = _get_const_info(arg, constants) + elif op in hasname: + argval, argrepr = _get_name_info(arg, names) + elif op in hasjrel: + argval = i + arg + argrepr = "to " + repr(argval) + elif op in haslocal: + argval, argrepr = _get_name_info(arg, varnames) + elif op in hascompare: + argval = cmp_op[arg] + argrepr = argval + elif op in hasfree: + argval, argrepr = _get_name_info(arg, cells) + elif op in hasnargs: + argrepr = ("%d positional, %d keyword pair, %d annotated" % + (code[i-2], code[i-1], code[i])) + opname = opnames[op_num] + yield Instruction3(opname, op, arg, argval, argrepr, offset, starts_line, is_jump_target) @@ -347,7 +417,7 @@ class Bytecode: Iterating over this yields the bytecode operations as Instruction instances. """ - def __init__(self, x, first_line=None, current_offset=None): + def __init__(self, x, opnames, first_line=None, current_offset=None): self.codeobj = co = _get_code_object(x) if first_line is None: self.first_line = co.co_firstlineno @@ -358,11 +428,12 @@ class Bytecode: self._cell_names = co.co_cellvars + co.co_freevars self._linestarts = dict(findlinestarts(co)) self._original_object = x + self.opnames = opnames self.current_offset = current_offset def __iter__(self): co = self.codeobj - return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, + return _get_instructions_bytes(co.co_code, self.opnames, co.co_varnames, co.co_names, co.co_consts, self._cell_names, self._linestarts, line_offset=self._line_offset) diff --git a/uncompyle6/scanners/scanner34.py b/uncompyle6/scanners/scanner34.py index e03352ec..952a8fe1 100644 --- a/uncompyle6/scanners/scanner34.py +++ b/uncompyle6/scanners/scanner34.py @@ -12,7 +12,9 @@ from __future__ import print_function import dis, inspect from array import array +import uncompyle6.scanners.dis3 as dis3 import uncompyle6.scanners.scanner3 as scan3 +from uncompyle6.opcodes.opcode_34 import opname as opnames from uncompyle6 import PYTHON_VERSION from uncompyle6.code import iscode @@ -29,24 +31,22 @@ from uncompyle6.opcodes.opcode_34 import * class Scanner34(scan3.Scanner3): + ## FIXME: DRY with scanner35.py + # Note: we can't use built-in disassembly routines, unless + # we do post-processing like we do here. def disassemble(self, co, classname=None, code_objects={}): - fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \ - else self.disassemble_generic - return fn(co, classname, code_objects=code_objects) - def disassemble_built_in(self, co, classname=None, - code_objects={}): + # import dis; dis.disassemble(co) # DEBUG + # Container for tokens tokens = [] + customize = {} self.code = array('B', co.co_code) self.build_lines_data(co) self.build_prev_op() - # Get jump targets - # Format: {target offset: [jump offsets]} - jump_targets = self.find_jump_targets() - bytecode = dis.Bytecode(co) + bytecode = dis3.Bytecode(co, opnames) # self.lines contains (block,addrLastInstr) if classname: @@ -67,12 +67,17 @@ class Scanner34(scan3.Scanner3): n = len(bs) for i in range(n): inst = bs[i] - if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: + + if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: next_inst = bs[i+1] if (next_inst.opname == 'LOAD_GLOBAL' and next_inst.argval == 'AssertionError'): self.load_asserts.add(next_inst.offset) + # Get jump targets + # Format: {target offset: [jump offsets]} + jump_targets = self.find_jump_targets() + for inst in bytecode: if inst.offset in jump_targets: jump_idx = 0 @@ -130,29 +135,27 @@ class Scanner34(scan3.Scanner3): linestart = inst.starts_line) ) continue + # Note: care is needed in merging this with python3.5 + # and BUILD_MAP and parse3 custom rules. + # BUILD_MAP in 3.4 comes at the beginning and each tuple has STORE_MAP + # in 3.5 it comes at the end and STORE_MAP + # see parse3.py elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', - 'UNPACK_SEQUENCE', - 'MAKE_CLOSURE', + 'UNPACK_SEQUENCE', 'MAKE_CLOSURE', 'RAISE_VARARGS' ): - # if opname == 'BUILD_TUPLE' and \ - # self.code[self.prev[offset]] == LOAD_CLOSURE: - # continue - # else: - # op_name = '%s_%d' % (op_name, oparg) - # if opname != BUILD_SLICE: - # customize[op_name] = oparg - opname = '%s_%d' % (opname, inst.argval) + pos_args = inst.argval if inst.opname != 'BUILD_SLICE': - customize[opname] = inst.argval - + customize[opname] = pos_args + pass + opname = '%s_%d' % (opname, pos_args) elif opname == 'JUMP_ABSOLUTE': pattr = inst.argval target = self.get_target(inst.offset) if target < inst.offset: if (inst.offset in self.stmts and self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK) - and offset not in self.not_continue): + and inst.offset not in self.not_continue): opname = 'CONTINUE' else: opname = 'JUMP_BACK' diff --git a/uncompyle6/scanners/scanner35.py b/uncompyle6/scanners/scanner35.py index 316e75f9..d74c9da7 100644 --- a/uncompyle6/scanners/scanner35.py +++ b/uncompyle6/scanners/scanner35.py @@ -12,8 +12,9 @@ from __future__ import print_function import inspect from array import array +import uncompyle6.scanners.dis3 as dis3 import uncompyle6.scanners.scanner3 as scan3 -import uncompyle6.scanners.dis35 as dis35 +from uncompyle6.opcodes.opcode_35 import opname as opnames from uncompyle6.code import iscode from uncompyle6.scanner import Token @@ -26,12 +27,12 @@ from uncompyle6.opcodes.opcode_35 import * class Scanner35(scan3.Scanner3): + ## FIXME: DRY with scanner34.py # Note: we can't use built-in disassembly routines, unless # we do post-processing like we do here. - def disassemble(self, co, classname=None, - code_objects={}): + def disassemble(self, co, classname=None, code_objects={}): - # imoprt dis; dis.disassemble(co) # DEBUG + # import dis; dis.disassemble(co) # DEBUG # Container for tokens tokens = [] @@ -41,7 +42,7 @@ class Scanner35(scan3.Scanner3): self.build_lines_data(co) self.build_prev_op() - bytecode = dis35.Bytecode(co) + bytecode = dis3.Bytecode(co, opnames) # self.lines contains (block,addrLastInstr) if classname: