You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 00:45:53 +08:00
Start to DRY Python 3.4 and 3.5 scanners
This commit is contained in:
@@ -6,7 +6,7 @@
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
import sys, os, getopt
|
import sys, os, getopt
|
||||||
|
|
||||||
program = os.path.splitext(os.path.basename(__file__))
|
program, ext = os.path.splitext(os.path.basename(__file__))
|
||||||
|
|
||||||
__doc__ = """
|
__doc__ = """
|
||||||
Usage:
|
Usage:
|
||||||
|
@@ -1,15 +1,14 @@
|
|||||||
# This is take from the python 3.5 dis module
|
# This is taken from the python 3.x dis module
|
||||||
"""Disassembler of Python byte code into mnemonics."""
|
"""Disassembler of Python byte code into mnemonics."""
|
||||||
|
|
||||||
|
# This part is modified for cross Python compatability
|
||||||
|
from uncompyle6.opcodes.opcode_3x import *
|
||||||
|
|
||||||
from dis import findlinestarts
|
from dis import findlinestarts
|
||||||
import types
|
import types
|
||||||
import collections
|
import collections
|
||||||
import io
|
import io
|
||||||
|
|
||||||
# This part is modified for cross Python compatability
|
|
||||||
from uncompyle6.opcodes.opcode_35 import *
|
|
||||||
from uncompyle6.opcodes.opcode_35 import opname
|
|
||||||
|
|
||||||
_have_code = (types.MethodType, types.FunctionType, types.CodeType, type)
|
_have_code = (types.MethodType, types.FunctionType, types.CodeType, type)
|
||||||
|
|
||||||
def _try_compile(source, name):
|
def _try_compile(source, name):
|
||||||
@@ -142,7 +141,7 @@ def show_code(co):
|
|||||||
_Instruction = collections.namedtuple("_Instruction",
|
_Instruction = collections.namedtuple("_Instruction",
|
||||||
"opname opcode arg argval argrepr offset starts_line is_jump_target")
|
"opname opcode arg argval argrepr offset starts_line is_jump_target")
|
||||||
|
|
||||||
class Instruction(_Instruction):
|
class Instruction3(_Instruction):
|
||||||
"""Details for a bytecode operation
|
"""Details for a bytecode operation
|
||||||
|
|
||||||
Defined fields:
|
Defined fields:
|
||||||
@@ -192,8 +191,9 @@ class Instruction(_Instruction):
|
|||||||
fields.append('(' + self.argrepr + ')')
|
fields.append('(' + self.argrepr + ')')
|
||||||
return ' '.join(fields).rstrip()
|
return ' '.join(fields).rstrip()
|
||||||
|
|
||||||
|
## FIXME: figure out how to do _disassemble passing in opnames
|
||||||
|
|
||||||
def get_instructions(x, first_line=None):
|
def get_instructions(x, opnames, first_line=None):
|
||||||
"""Iterator for the opcodes in methods, functions or code
|
"""Iterator for the opcodes in methods, functions or code
|
||||||
|
|
||||||
Generates a series of Instruction named tuples giving the details of
|
Generates a series of Instruction named tuples giving the details of
|
||||||
@@ -211,39 +211,11 @@ def get_instructions(x, first_line=None):
|
|||||||
line_offset = first_line - co.co_firstlineno
|
line_offset = first_line - co.co_firstlineno
|
||||||
else:
|
else:
|
||||||
line_offset = 0
|
line_offset = 0
|
||||||
return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
|
return _get_instructions_bytes(co.co_code, opnames, co.co_varnames, co.co_names,
|
||||||
co.co_consts, cell_names, linestarts,
|
co.co_consts, cell_names, linestarts,
|
||||||
line_offset)
|
line_offset)
|
||||||
|
|
||||||
def _get_const_info(const_index, const_list):
|
def _get_instructions_bytes(code, opnames, varnames=None, names=None, constants=None,
|
||||||
"""Helper to get optional details about const references
|
|
||||||
|
|
||||||
Returns the dereferenced constant and its repr if the constant
|
|
||||||
list is defined.
|
|
||||||
Otherwise returns the constant index and its repr().
|
|
||||||
"""
|
|
||||||
argval = const_index
|
|
||||||
if const_list is not None:
|
|
||||||
argval = const_list[const_index]
|
|
||||||
return argval, repr(argval)
|
|
||||||
|
|
||||||
def _get_name_info(name_index, name_list):
|
|
||||||
"""Helper to get optional details about named references
|
|
||||||
|
|
||||||
Returns the dereferenced name as both value and repr if the name
|
|
||||||
list is defined.
|
|
||||||
Otherwise returns the name index and its repr().
|
|
||||||
"""
|
|
||||||
argval = name_index
|
|
||||||
if name_list is not None:
|
|
||||||
argval = name_list[name_index]
|
|
||||||
argrepr = argval
|
|
||||||
else:
|
|
||||||
argrepr = repr(argval)
|
|
||||||
return argval, argrepr
|
|
||||||
|
|
||||||
|
|
||||||
def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
|
|
||||||
cells=None, linestarts=None, line_offset=0):
|
cells=None, linestarts=None, line_offset=0):
|
||||||
"""Iterate over the instructions in a bytecode string.
|
"""Iterate over the instructions in a bytecode string.
|
||||||
|
|
||||||
@@ -308,7 +280,105 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
|
|||||||
elif op in hasnargs:
|
elif op in hasnargs:
|
||||||
argrepr = ("%d positional, %d keyword pair, %d annotated" %
|
argrepr = ("%d positional, %d keyword pair, %d annotated" %
|
||||||
(code[i-2], code[i-1], code[i]))
|
(code[i-2], code[i-1], code[i]))
|
||||||
yield Instruction(opname[op_num], op,
|
yield Instruction(opnames[op_num], op,
|
||||||
|
arg, argval, argrepr,
|
||||||
|
offset, starts_line, is_jump_target)
|
||||||
|
|
||||||
|
def _get_const_info(const_index, const_list):
|
||||||
|
"""Helper to get optional details about const references
|
||||||
|
|
||||||
|
Returns the dereferenced constant and its repr if the constant
|
||||||
|
list is defined.
|
||||||
|
Otherwise returns the constant index and its repr().
|
||||||
|
"""
|
||||||
|
argval = const_index
|
||||||
|
if const_list is not None:
|
||||||
|
argval = const_list[const_index]
|
||||||
|
return argval, repr(argval)
|
||||||
|
|
||||||
|
def _get_name_info(name_index, name_list):
|
||||||
|
"""Helper to get optional details about named references
|
||||||
|
|
||||||
|
Returns the dereferenced name as both value and repr if the name
|
||||||
|
list is defined.
|
||||||
|
Otherwise returns the name index and its repr().
|
||||||
|
"""
|
||||||
|
argval = name_index
|
||||||
|
if name_list is not None:
|
||||||
|
argval = name_list[name_index]
|
||||||
|
argrepr = argval
|
||||||
|
else:
|
||||||
|
argrepr = repr(argval)
|
||||||
|
return argval, argrepr
|
||||||
|
|
||||||
|
|
||||||
|
def _get_instructions_bytes(code, opnames, varnames=None, names=None, constants=None,
|
||||||
|
cells=None, linestarts=None, line_offset=0):
|
||||||
|
"""Iterate over the instructions in a bytecode string.
|
||||||
|
|
||||||
|
Generates a sequence of Instruction namedtuples giving the details of each
|
||||||
|
opcode. Additional information about the code's runtime environment
|
||||||
|
(e.g. variable names, constants) can be specified using optional
|
||||||
|
arguments.
|
||||||
|
|
||||||
|
"""
|
||||||
|
labels = findlabels(code)
|
||||||
|
extended_arg = 0
|
||||||
|
starts_line = None
|
||||||
|
# enumerate() is not an option, since we sometimes process
|
||||||
|
# multiple elements on a single pass through the loop
|
||||||
|
n = len(code)
|
||||||
|
i = 0
|
||||||
|
while i < n:
|
||||||
|
op = code[i]
|
||||||
|
if isinstance(op, str):
|
||||||
|
op_num = ord(op)
|
||||||
|
else:
|
||||||
|
op_num = op
|
||||||
|
|
||||||
|
offset = i
|
||||||
|
if linestarts is not None:
|
||||||
|
starts_line = linestarts.get(i, None)
|
||||||
|
if starts_line is not None:
|
||||||
|
starts_line += line_offset
|
||||||
|
is_jump_target = i in labels
|
||||||
|
i = i+1
|
||||||
|
arg = None
|
||||||
|
argval = None
|
||||||
|
argrepr = ''
|
||||||
|
if op >= HAVE_ARGUMENT:
|
||||||
|
if isinstance(code[i], str):
|
||||||
|
arg = op_num + ord(code[i+1])*256 + extended_arg
|
||||||
|
else:
|
||||||
|
arg = code[i] + code[i+1]*256 + extended_arg
|
||||||
|
extended_arg = 0
|
||||||
|
i = i+2
|
||||||
|
if op == EXTENDED_ARG:
|
||||||
|
extended_arg = arg*65536
|
||||||
|
# Set argval to the dereferenced value of the argument when
|
||||||
|
# availabe, and argrepr to the string representation of argval.
|
||||||
|
# _disassemble_bytes needs the string repr of the
|
||||||
|
# raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
|
||||||
|
argval = arg
|
||||||
|
if op in hasconst:
|
||||||
|
argval, argrepr = _get_const_info(arg, constants)
|
||||||
|
elif op in hasname:
|
||||||
|
argval, argrepr = _get_name_info(arg, names)
|
||||||
|
elif op in hasjrel:
|
||||||
|
argval = i + arg
|
||||||
|
argrepr = "to " + repr(argval)
|
||||||
|
elif op in haslocal:
|
||||||
|
argval, argrepr = _get_name_info(arg, varnames)
|
||||||
|
elif op in hascompare:
|
||||||
|
argval = cmp_op[arg]
|
||||||
|
argrepr = argval
|
||||||
|
elif op in hasfree:
|
||||||
|
argval, argrepr = _get_name_info(arg, cells)
|
||||||
|
elif op in hasnargs:
|
||||||
|
argrepr = ("%d positional, %d keyword pair, %d annotated" %
|
||||||
|
(code[i-2], code[i-1], code[i]))
|
||||||
|
opname = opnames[op_num]
|
||||||
|
yield Instruction3(opname, op,
|
||||||
arg, argval, argrepr,
|
arg, argval, argrepr,
|
||||||
offset, starts_line, is_jump_target)
|
offset, starts_line, is_jump_target)
|
||||||
|
|
||||||
@@ -347,7 +417,7 @@ class Bytecode:
|
|||||||
|
|
||||||
Iterating over this yields the bytecode operations as Instruction instances.
|
Iterating over this yields the bytecode operations as Instruction instances.
|
||||||
"""
|
"""
|
||||||
def __init__(self, x, first_line=None, current_offset=None):
|
def __init__(self, x, opnames, first_line=None, current_offset=None):
|
||||||
self.codeobj = co = _get_code_object(x)
|
self.codeobj = co = _get_code_object(x)
|
||||||
if first_line is None:
|
if first_line is None:
|
||||||
self.first_line = co.co_firstlineno
|
self.first_line = co.co_firstlineno
|
||||||
@@ -358,11 +428,12 @@ class Bytecode:
|
|||||||
self._cell_names = co.co_cellvars + co.co_freevars
|
self._cell_names = co.co_cellvars + co.co_freevars
|
||||||
self._linestarts = dict(findlinestarts(co))
|
self._linestarts = dict(findlinestarts(co))
|
||||||
self._original_object = x
|
self._original_object = x
|
||||||
|
self.opnames = opnames
|
||||||
self.current_offset = current_offset
|
self.current_offset = current_offset
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
co = self.codeobj
|
co = self.codeobj
|
||||||
return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
|
return _get_instructions_bytes(co.co_code, self.opnames, co.co_varnames, co.co_names,
|
||||||
co.co_consts, self._cell_names,
|
co.co_consts, self._cell_names,
|
||||||
self._linestarts,
|
self._linestarts,
|
||||||
line_offset=self._line_offset)
|
line_offset=self._line_offset)
|
@@ -12,7 +12,9 @@ from __future__ import print_function
|
|||||||
|
|
||||||
import dis, inspect
|
import dis, inspect
|
||||||
from array import array
|
from array import array
|
||||||
|
import uncompyle6.scanners.dis3 as dis3
|
||||||
import uncompyle6.scanners.scanner3 as scan3
|
import uncompyle6.scanners.scanner3 as scan3
|
||||||
|
from uncompyle6.opcodes.opcode_34 import opname as opnames
|
||||||
|
|
||||||
from uncompyle6 import PYTHON_VERSION
|
from uncompyle6 import PYTHON_VERSION
|
||||||
from uncompyle6.code import iscode
|
from uncompyle6.code import iscode
|
||||||
@@ -29,24 +31,22 @@ from uncompyle6.opcodes.opcode_34 import *
|
|||||||
|
|
||||||
class Scanner34(scan3.Scanner3):
|
class Scanner34(scan3.Scanner3):
|
||||||
|
|
||||||
|
## FIXME: DRY with scanner35.py
|
||||||
|
# Note: we can't use built-in disassembly routines, unless
|
||||||
|
# we do post-processing like we do here.
|
||||||
def disassemble(self, co, classname=None, code_objects={}):
|
def disassemble(self, co, classname=None, code_objects={}):
|
||||||
fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \
|
|
||||||
else self.disassemble_generic
|
|
||||||
return fn(co, classname, code_objects=code_objects)
|
|
||||||
|
|
||||||
def disassemble_built_in(self, co, classname=None,
|
# import dis; dis.disassemble(co) # DEBUG
|
||||||
code_objects={}):
|
|
||||||
# Container for tokens
|
# Container for tokens
|
||||||
tokens = []
|
tokens = []
|
||||||
|
|
||||||
customize = {}
|
customize = {}
|
||||||
self.code = array('B', co.co_code)
|
self.code = array('B', co.co_code)
|
||||||
self.build_lines_data(co)
|
self.build_lines_data(co)
|
||||||
self.build_prev_op()
|
self.build_prev_op()
|
||||||
|
|
||||||
# Get jump targets
|
bytecode = dis3.Bytecode(co, opnames)
|
||||||
# Format: {target offset: [jump offsets]}
|
|
||||||
jump_targets = self.find_jump_targets()
|
|
||||||
bytecode = dis.Bytecode(co)
|
|
||||||
|
|
||||||
# self.lines contains (block,addrLastInstr)
|
# self.lines contains (block,addrLastInstr)
|
||||||
if classname:
|
if classname:
|
||||||
@@ -67,12 +67,17 @@ class Scanner34(scan3.Scanner3):
|
|||||||
n = len(bs)
|
n = len(bs)
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
inst = bs[i]
|
inst = bs[i]
|
||||||
if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
|
|
||||||
|
if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
|
||||||
next_inst = bs[i+1]
|
next_inst = bs[i+1]
|
||||||
if (next_inst.opname == 'LOAD_GLOBAL' and
|
if (next_inst.opname == 'LOAD_GLOBAL' and
|
||||||
next_inst.argval == 'AssertionError'):
|
next_inst.argval == 'AssertionError'):
|
||||||
self.load_asserts.add(next_inst.offset)
|
self.load_asserts.add(next_inst.offset)
|
||||||
|
|
||||||
|
# Get jump targets
|
||||||
|
# Format: {target offset: [jump offsets]}
|
||||||
|
jump_targets = self.find_jump_targets()
|
||||||
|
|
||||||
for inst in bytecode:
|
for inst in bytecode:
|
||||||
if inst.offset in jump_targets:
|
if inst.offset in jump_targets:
|
||||||
jump_idx = 0
|
jump_idx = 0
|
||||||
@@ -130,29 +135,27 @@ class Scanner34(scan3.Scanner3):
|
|||||||
linestart = inst.starts_line)
|
linestart = inst.starts_line)
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
# Note: care is needed in merging this with python3.5
|
||||||
|
# and BUILD_MAP and parse3 custom rules.
|
||||||
|
# BUILD_MAP in 3.4 comes at the beginning and each tuple has STORE_MAP
|
||||||
|
# in 3.5 it comes at the end and STORE_MAP
|
||||||
|
# see parse3.py
|
||||||
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
|
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
|
||||||
'UNPACK_SEQUENCE',
|
'UNPACK_SEQUENCE', 'MAKE_CLOSURE',
|
||||||
'MAKE_CLOSURE',
|
|
||||||
'RAISE_VARARGS'
|
'RAISE_VARARGS'
|
||||||
):
|
):
|
||||||
# if opname == 'BUILD_TUPLE' and \
|
pos_args = inst.argval
|
||||||
# self.code[self.prev[offset]] == LOAD_CLOSURE:
|
|
||||||
# continue
|
|
||||||
# else:
|
|
||||||
# op_name = '%s_%d' % (op_name, oparg)
|
|
||||||
# if opname != BUILD_SLICE:
|
|
||||||
# customize[op_name] = oparg
|
|
||||||
opname = '%s_%d' % (opname, inst.argval)
|
|
||||||
if inst.opname != 'BUILD_SLICE':
|
if inst.opname != 'BUILD_SLICE':
|
||||||
customize[opname] = inst.argval
|
customize[opname] = pos_args
|
||||||
|
pass
|
||||||
|
opname = '%s_%d' % (opname, pos_args)
|
||||||
elif opname == 'JUMP_ABSOLUTE':
|
elif opname == 'JUMP_ABSOLUTE':
|
||||||
pattr = inst.argval
|
pattr = inst.argval
|
||||||
target = self.get_target(inst.offset)
|
target = self.get_target(inst.offset)
|
||||||
if target < inst.offset:
|
if target < inst.offset:
|
||||||
if (inst.offset in self.stmts and
|
if (inst.offset in self.stmts and
|
||||||
self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK)
|
self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK)
|
||||||
and offset not in self.not_continue):
|
and inst.offset not in self.not_continue):
|
||||||
opname = 'CONTINUE'
|
opname = 'CONTINUE'
|
||||||
else:
|
else:
|
||||||
opname = 'JUMP_BACK'
|
opname = 'JUMP_BACK'
|
||||||
|
@@ -12,8 +12,9 @@ from __future__ import print_function
|
|||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
from array import array
|
from array import array
|
||||||
|
import uncompyle6.scanners.dis3 as dis3
|
||||||
import uncompyle6.scanners.scanner3 as scan3
|
import uncompyle6.scanners.scanner3 as scan3
|
||||||
import uncompyle6.scanners.dis35 as dis35
|
from uncompyle6.opcodes.opcode_35 import opname as opnames
|
||||||
|
|
||||||
from uncompyle6.code import iscode
|
from uncompyle6.code import iscode
|
||||||
from uncompyle6.scanner import Token
|
from uncompyle6.scanner import Token
|
||||||
@@ -26,12 +27,12 @@ from uncompyle6.opcodes.opcode_35 import *
|
|||||||
|
|
||||||
class Scanner35(scan3.Scanner3):
|
class Scanner35(scan3.Scanner3):
|
||||||
|
|
||||||
|
## FIXME: DRY with scanner34.py
|
||||||
# Note: we can't use built-in disassembly routines, unless
|
# Note: we can't use built-in disassembly routines, unless
|
||||||
# we do post-processing like we do here.
|
# we do post-processing like we do here.
|
||||||
def disassemble(self, co, classname=None,
|
def disassemble(self, co, classname=None, code_objects={}):
|
||||||
code_objects={}):
|
|
||||||
|
|
||||||
# imoprt dis; dis.disassemble(co) # DEBUG
|
# import dis; dis.disassemble(co) # DEBUG
|
||||||
|
|
||||||
# Container for tokens
|
# Container for tokens
|
||||||
tokens = []
|
tokens = []
|
||||||
@@ -41,7 +42,7 @@ class Scanner35(scan3.Scanner3):
|
|||||||
self.build_lines_data(co)
|
self.build_lines_data(co)
|
||||||
self.build_prev_op()
|
self.build_prev_op()
|
||||||
|
|
||||||
bytecode = dis35.Bytecode(co)
|
bytecode = dis3.Bytecode(co, opnames)
|
||||||
|
|
||||||
# self.lines contains (block,addrLastInstr)
|
# self.lines contains (block,addrLastInstr)
|
||||||
if classname:
|
if classname:
|
||||||
|
Reference in New Issue
Block a user