Start to DRY Python 3.4 and 3.5 scanners

This commit is contained in:
rocky
2016-05-14 16:55:44 -04:00
parent 51df8d8cbe
commit 1d9ab4e1d1
4 changed files with 144 additions and 69 deletions

View File

@@ -6,7 +6,7 @@
from __future__ import print_function
import sys, os, getopt
program = os.path.splitext(os.path.basename(__file__))
program, ext = os.path.splitext(os.path.basename(__file__))
__doc__ = """
Usage:

View File

@@ -1,15 +1,14 @@
# This is take from the python 3.5 dis module
# This is taken from the python 3.x dis module
"""Disassembler of Python byte code into mnemonics."""
# This part is modified for cross Python compatability
from uncompyle6.opcodes.opcode_3x import *
from dis import findlinestarts
import types
import collections
import io
# This part is modified for cross Python compatability
from uncompyle6.opcodes.opcode_35 import *
from uncompyle6.opcodes.opcode_35 import opname
_have_code = (types.MethodType, types.FunctionType, types.CodeType, type)
def _try_compile(source, name):
@@ -142,7 +141,7 @@ def show_code(co):
_Instruction = collections.namedtuple("_Instruction",
"opname opcode arg argval argrepr offset starts_line is_jump_target")
class Instruction(_Instruction):
class Instruction3(_Instruction):
"""Details for a bytecode operation
Defined fields:
@@ -192,8 +191,9 @@ class Instruction(_Instruction):
fields.append('(' + self.argrepr + ')')
return ' '.join(fields).rstrip()
## FIXME: figure out how to do _disassemble passing in opnames
def get_instructions(x, first_line=None):
def get_instructions(x, opnames, first_line=None):
"""Iterator for the opcodes in methods, functions or code
Generates a series of Instruction named tuples giving the details of
@@ -211,39 +211,11 @@ def get_instructions(x, first_line=None):
line_offset = first_line - co.co_firstlineno
else:
line_offset = 0
return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
return _get_instructions_bytes(co.co_code, opnames, co.co_varnames, co.co_names,
co.co_consts, cell_names, linestarts,
line_offset)
def _get_const_info(const_index, const_list):
"""Helper to get optional details about const references
Returns the dereferenced constant and its repr if the constant
list is defined.
Otherwise returns the constant index and its repr().
"""
argval = const_index
if const_list is not None:
argval = const_list[const_index]
return argval, repr(argval)
def _get_name_info(name_index, name_list):
"""Helper to get optional details about named references
Returns the dereferenced name as both value and repr if the name
list is defined.
Otherwise returns the name index and its repr().
"""
argval = name_index
if name_list is not None:
argval = name_list[name_index]
argrepr = argval
else:
argrepr = repr(argval)
return argval, argrepr
def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
def _get_instructions_bytes(code, opnames, varnames=None, names=None, constants=None,
cells=None, linestarts=None, line_offset=0):
"""Iterate over the instructions in a bytecode string.
@@ -308,7 +280,105 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasnargs:
argrepr = ("%d positional, %d keyword pair, %d annotated" %
(code[i-2], code[i-1], code[i]))
yield Instruction(opname[op_num], op,
yield Instruction(opnames[op_num], op,
arg, argval, argrepr,
offset, starts_line, is_jump_target)
def _get_const_info(const_index, const_list):
"""Helper to get optional details about const references
Returns the dereferenced constant and its repr if the constant
list is defined.
Otherwise returns the constant index and its repr().
"""
argval = const_index
if const_list is not None:
argval = const_list[const_index]
return argval, repr(argval)
def _get_name_info(name_index, name_list):
"""Helper to get optional details about named references
Returns the dereferenced name as both value and repr if the name
list is defined.
Otherwise returns the name index and its repr().
"""
argval = name_index
if name_list is not None:
argval = name_list[name_index]
argrepr = argval
else:
argrepr = repr(argval)
return argval, argrepr
def _get_instructions_bytes(code, opnames, varnames=None, names=None, constants=None,
cells=None, linestarts=None, line_offset=0):
"""Iterate over the instructions in a bytecode string.
Generates a sequence of Instruction namedtuples giving the details of each
opcode. Additional information about the code's runtime environment
(e.g. variable names, constants) can be specified using optional
arguments.
"""
labels = findlabels(code)
extended_arg = 0
starts_line = None
# enumerate() is not an option, since we sometimes process
# multiple elements on a single pass through the loop
n = len(code)
i = 0
while i < n:
op = code[i]
if isinstance(op, str):
op_num = ord(op)
else:
op_num = op
offset = i
if linestarts is not None:
starts_line = linestarts.get(i, None)
if starts_line is not None:
starts_line += line_offset
is_jump_target = i in labels
i = i+1
arg = None
argval = None
argrepr = ''
if op >= HAVE_ARGUMENT:
if isinstance(code[i], str):
arg = op_num + ord(code[i+1])*256 + extended_arg
else:
arg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = arg*65536
# Set argval to the dereferenced value of the argument when
# availabe, and argrepr to the string representation of argval.
# _disassemble_bytes needs the string repr of the
# raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
argval = arg
if op in hasconst:
argval, argrepr = _get_const_info(arg, constants)
elif op in hasname:
argval, argrepr = _get_name_info(arg, names)
elif op in hasjrel:
argval = i + arg
argrepr = "to " + repr(argval)
elif op in haslocal:
argval, argrepr = _get_name_info(arg, varnames)
elif op in hascompare:
argval = cmp_op[arg]
argrepr = argval
elif op in hasfree:
argval, argrepr = _get_name_info(arg, cells)
elif op in hasnargs:
argrepr = ("%d positional, %d keyword pair, %d annotated" %
(code[i-2], code[i-1], code[i]))
opname = opnames[op_num]
yield Instruction3(opname, op,
arg, argval, argrepr,
offset, starts_line, is_jump_target)
@@ -347,7 +417,7 @@ class Bytecode:
Iterating over this yields the bytecode operations as Instruction instances.
"""
def __init__(self, x, first_line=None, current_offset=None):
def __init__(self, x, opnames, first_line=None, current_offset=None):
self.codeobj = co = _get_code_object(x)
if first_line is None:
self.first_line = co.co_firstlineno
@@ -358,11 +428,12 @@ class Bytecode:
self._cell_names = co.co_cellvars + co.co_freevars
self._linestarts = dict(findlinestarts(co))
self._original_object = x
self.opnames = opnames
self.current_offset = current_offset
def __iter__(self):
co = self.codeobj
return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
return _get_instructions_bytes(co.co_code, self.opnames, co.co_varnames, co.co_names,
co.co_consts, self._cell_names,
self._linestarts,
line_offset=self._line_offset)

View File

@@ -12,7 +12,9 @@ from __future__ import print_function
import dis, inspect
from array import array
import uncompyle6.scanners.dis3 as dis3
import uncompyle6.scanners.scanner3 as scan3
from uncompyle6.opcodes.opcode_34 import opname as opnames
from uncompyle6 import PYTHON_VERSION
from uncompyle6.code import iscode
@@ -29,24 +31,22 @@ from uncompyle6.opcodes.opcode_34 import *
class Scanner34(scan3.Scanner3):
## FIXME: DRY with scanner35.py
# Note: we can't use built-in disassembly routines, unless
# we do post-processing like we do here.
def disassemble(self, co, classname=None, code_objects={}):
fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \
else self.disassemble_generic
return fn(co, classname, code_objects=code_objects)
def disassemble_built_in(self, co, classname=None,
code_objects={}):
# import dis; dis.disassemble(co) # DEBUG
# Container for tokens
tokens = []
customize = {}
self.code = array('B', co.co_code)
self.build_lines_data(co)
self.build_prev_op()
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets()
bytecode = dis.Bytecode(co)
bytecode = dis3.Bytecode(co, opnames)
# self.lines contains (block,addrLastInstr)
if classname:
@@ -67,12 +67,17 @@ class Scanner34(scan3.Scanner3):
n = len(bs)
for i in range(n):
inst = bs[i]
if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
next_inst = bs[i+1]
if (next_inst.opname == 'LOAD_GLOBAL' and
next_inst.argval == 'AssertionError'):
self.load_asserts.add(next_inst.offset)
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets()
for inst in bytecode:
if inst.offset in jump_targets:
jump_idx = 0
@@ -130,29 +135,27 @@ class Scanner34(scan3.Scanner3):
linestart = inst.starts_line)
)
continue
# Note: care is needed in merging this with python3.5
# and BUILD_MAP and parse3 custom rules.
# BUILD_MAP in 3.4 comes at the beginning and each tuple has STORE_MAP
# in 3.5 it comes at the end and STORE_MAP
# see parse3.py
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
'UNPACK_SEQUENCE',
'MAKE_CLOSURE',
'UNPACK_SEQUENCE', 'MAKE_CLOSURE',
'RAISE_VARARGS'
):
# if opname == 'BUILD_TUPLE' and \
# self.code[self.prev[offset]] == LOAD_CLOSURE:
# continue
# else:
# op_name = '%s_%d' % (op_name, oparg)
# if opname != BUILD_SLICE:
# customize[op_name] = oparg
opname = '%s_%d' % (opname, inst.argval)
pos_args = inst.argval
if inst.opname != 'BUILD_SLICE':
customize[opname] = inst.argval
customize[opname] = pos_args
pass
opname = '%s_%d' % (opname, pos_args)
elif opname == 'JUMP_ABSOLUTE':
pattr = inst.argval
target = self.get_target(inst.offset)
if target < inst.offset:
if (inst.offset in self.stmts and
self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK)
and offset not in self.not_continue):
and inst.offset not in self.not_continue):
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'

View File

@@ -12,8 +12,9 @@ from __future__ import print_function
import inspect
from array import array
import uncompyle6.scanners.dis3 as dis3
import uncompyle6.scanners.scanner3 as scan3
import uncompyle6.scanners.dis35 as dis35
from uncompyle6.opcodes.opcode_35 import opname as opnames
from uncompyle6.code import iscode
from uncompyle6.scanner import Token
@@ -26,12 +27,12 @@ from uncompyle6.opcodes.opcode_35 import *
class Scanner35(scan3.Scanner3):
## FIXME: DRY with scanner34.py
# Note: we can't use built-in disassembly routines, unless
# we do post-processing like we do here.
def disassemble(self, co, classname=None,
code_objects={}):
def disassemble(self, co, classname=None, code_objects={}):
# imoprt dis; dis.disassemble(co) # DEBUG
# import dis; dis.disassemble(co) # DEBUG
# Container for tokens
tokens = []
@@ -41,7 +42,7 @@ class Scanner35(scan3.Scanner3):
self.build_lines_data(co)
self.build_prev_op()
bytecode = dis35.Bytecode(co)
bytecode = dis3.Bytecode(co, opnames)
# self.lines contains (block,addrLastInstr)
if classname: