You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 16:59:52 +08:00
Merge pull request #135 from rocky/3.6-instruction-refactor
3.6 instruction refactor
This commit is contained in:
2
Makefile
2
Makefile
@@ -44,7 +44,7 @@ check-2.6:
|
||||
|
||||
#:PyPy 2.6.1 PyPy 5.0.1, or PyPy 5.8.0-beta0
|
||||
# Skip for now
|
||||
2.6 5.0 5.3 5.8:
|
||||
2.6 5.0 5.3 5.6 5.8:
|
||||
|
||||
#:PyPy pypy3-2.4.0 Python 3:
|
||||
pypy-3.2 2.4:
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
from uncompyle6 import PYTHON_VERSION, IS_PYPY
|
||||
from uncompyle6.scanner import get_scanner
|
||||
from xdis.bytecode import Bytecode
|
||||
from array import array
|
||||
def bug(state, slotstate):
|
||||
if state:
|
||||
@@ -29,12 +30,17 @@ def test_if_in_for():
|
||||
scan.build_lines_data(code, n)
|
||||
scan.build_prev_op(n)
|
||||
fjt = scan.find_jump_targets(False)
|
||||
assert {15: [3], 69: [66], 63: [18]} == fjt
|
||||
assert scan.structs == \
|
||||
[{'start': 0, 'end': 72, 'type': 'root'},
|
||||
{'start': 15, 'end': 66, 'type': 'if-then'},
|
||||
{'start': 31, 'end': 59, 'type': 'for-loop'},
|
||||
{'start': 62, 'end': 63, 'type': 'for-else'}]
|
||||
|
||||
## FIXME: the data below is wrong.
|
||||
## we get different results currenty as well.
|
||||
## We need to probably fix both the code
|
||||
## and the test below
|
||||
# assert {15: [3], 69: [66], 63: [18]} == fjt
|
||||
# assert scan.structs == \
|
||||
# [{'start': 0, 'end': 72, 'type': 'root'},
|
||||
# {'start': 15, 'end': 66, 'type': 'if-then'},
|
||||
# {'start': 31, 'end': 59, 'type': 'for-loop'},
|
||||
# {'start': 62, 'end': 63, 'type': 'for-else'}]
|
||||
|
||||
code = bug_loop.__code__
|
||||
n = scan.setup_code(code)
|
||||
@@ -53,9 +59,11 @@ def test_if_in_for():
|
||||
{'start': 48, 'end': 67, 'type': 'while-loop'}]
|
||||
|
||||
elif 3.2 < PYTHON_VERSION <= 3.4:
|
||||
bytecode = Bytecode(code, scan.opc)
|
||||
scan.code = array('B', code.co_code)
|
||||
scan.build_lines_data(code)
|
||||
scan.build_prev_op()
|
||||
scan.insts = list(bytecode)
|
||||
fjt = scan.find_jump_targets(False)
|
||||
assert {69: [66], 63: [18]} == fjt
|
||||
assert scan.structs == \
|
||||
|
@@ -50,8 +50,8 @@ check-3.6: check-bytecode
|
||||
$(PYTHON) test_pythonlib.py --bytecode-3.6 --weak-verify $(COMPILE)
|
||||
|
||||
# FIXME
|
||||
#: this is called when running under pypy3.5-5.8.0
|
||||
5.8:
|
||||
#: this is called when running under pypy3.5-5.8.0 or pypy2-5.6.0
|
||||
5.8 5.6:
|
||||
|
||||
#: Check deparsing only, but from a different Python version
|
||||
check-disasm:
|
||||
@@ -71,7 +71,7 @@ check-bytecode-2:
|
||||
check-bytecode-3:
|
||||
$(PYTHON) test_pythonlib.py --bytecode-3.0 \
|
||||
--bytecode-3.1 --bytecode-3.2 --bytecode-3.3 \
|
||||
--bytecode-3.4 --bytecode-3.5 --bytecode-pypy3.2
|
||||
--bytecode-3.4 --bytecode-3.5 --bytecode-3.6 --bytecode-pypy3.2
|
||||
|
||||
#: Check deparsing bytecode that works running Python 2 and Python 3
|
||||
check-bytecode: check-bytecode-3
|
||||
|
@@ -120,18 +120,22 @@ class PythonParser(GenericASTBuilder):
|
||||
|
||||
def error(self, instructions, index):
|
||||
# Find the last line boundary
|
||||
start, finish = -1, -1
|
||||
for start in range(index, -1, -1):
|
||||
if instructions[start].linestart: break
|
||||
pass
|
||||
for finish in range(index+1, len(instructions)):
|
||||
if instructions[finish].linestart: break
|
||||
pass
|
||||
err_token = instructions[index]
|
||||
print("Instruction context:")
|
||||
for i in range(start, finish):
|
||||
indent = ' ' if i != index else '-> '
|
||||
print("%s%s" % (indent, instructions[i]))
|
||||
raise ParserError(err_token, err_token.offset)
|
||||
if start > 0:
|
||||
err_token = instructions[index]
|
||||
print("Instruction context:")
|
||||
for i in range(start, finish):
|
||||
indent = ' ' if i != index else '-> '
|
||||
print("%s%s" % (indent, instructions[i]))
|
||||
raise ParserError(err_token, err_token.offset)
|
||||
else:
|
||||
raise ParserError(None, -1)
|
||||
|
||||
def typestring(self, token):
|
||||
return token.kind
|
||||
|
@@ -36,6 +36,26 @@ class Python36Parser(Python35Parser):
|
||||
# This might be valid in < 3.6
|
||||
and ::= expr jmp_false expr
|
||||
|
||||
# Adds a COME_FROM_ASYNC_WITH over 3.5
|
||||
# FIXME: remove corresponding rule for 3.5?
|
||||
async_with_as_stmt ::= expr
|
||||
BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM
|
||||
SETUP_ASYNC_WITH designator
|
||||
suite_stmts_opt
|
||||
POP_BLOCK LOAD_CONST
|
||||
COME_FROM_ASYNC_WITH
|
||||
WITH_CLEANUP_START
|
||||
GET_AWAITABLE LOAD_CONST YIELD_FROM
|
||||
WITH_CLEANUP_FINISH END_FINALLY
|
||||
async_with_stmt ::= expr
|
||||
BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM
|
||||
SETUP_ASYNC_WITH POP_TOP suite_stmts_opt
|
||||
POP_BLOCK LOAD_CONST
|
||||
COME_FROM_ASYNC_WITH
|
||||
WITH_CLEANUP_START
|
||||
GET_AWAITABLE LOAD_CONST YIELD_FROM
|
||||
WITH_CLEANUP_FINISH END_FINALLY
|
||||
|
||||
except_suite ::= c_stmts_opt COME_FROM POP_EXCEPT jump_except COME_FROM
|
||||
"""
|
||||
|
||||
|
@@ -27,8 +27,7 @@ from array import array
|
||||
|
||||
from uncompyle6.scanner import Scanner
|
||||
from xdis.code import iscode
|
||||
from xdis.bytecode import Bytecode, op_has_argument, instruction_size
|
||||
from xdis.util import code2num
|
||||
from xdis.bytecode import Bytecode, instruction_size
|
||||
|
||||
from uncompyle6.scanner import Token, parse_fn_counts
|
||||
import xdis
|
||||
@@ -144,23 +143,28 @@ class Scanner3(Scanner):
|
||||
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
|
||||
"""
|
||||
Pick out tokens from an uncompyle6 code object, and transform them,
|
||||
returning a list of uncompyle6 'Token's.
|
||||
returning a list of uncompyle6 Token's.
|
||||
|
||||
The transformations are made to assist the deparsing grammar.
|
||||
Specificially:
|
||||
- various types of LOAD_CONST's are categorized in terms of what they load
|
||||
- COME_FROM instructions are added to assist parsing control structures
|
||||
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
|
||||
- some EXTENDED_ARGS instructions are removed
|
||||
|
||||
Also, when we encounter certain tokens, we add them to a set which will cause custom
|
||||
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
|
||||
cause specific rules for the specific number of arguments they take.
|
||||
"""
|
||||
|
||||
# FIXME: remove this when all subsidiary functions have been removed.
|
||||
# We should be able to get everything from the self.insts list.
|
||||
self.code = array('B', co.co_code)
|
||||
|
||||
bytecode = Bytecode(co, self.opc)
|
||||
show_asm = self.show_asm if not show_asm else show_asm
|
||||
# show_asm = 'both'
|
||||
if show_asm in ('both', 'before'):
|
||||
bytecode = Bytecode(co, self.opc)
|
||||
for instr in bytecode.get_instructions(co):
|
||||
print(instr.disassemble())
|
||||
|
||||
@@ -171,42 +175,36 @@ class Scanner3(Scanner):
|
||||
# and the value is the argument stack entries for that
|
||||
# nonterminal. The count is a little hoaky. It is mostly
|
||||
# not used, but sometimes it is.
|
||||
# "customize" is a dict whose keys are nonterminals
|
||||
customize = {}
|
||||
|
||||
if self.is_pypy:
|
||||
customize['PyPy'] = 0
|
||||
|
||||
self.code = array('B', co.co_code)
|
||||
self.build_lines_data(co)
|
||||
self.build_prev_op()
|
||||
|
||||
bytecode = Bytecode(co, self.opc)
|
||||
|
||||
# FIXME: put as its own method?
|
||||
# Scan for assertions. Later we will
|
||||
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
|
||||
# 'LOAD_ASSERT' is used in assert statements.
|
||||
self.load_asserts = set()
|
||||
bs = list(bytecode)
|
||||
n = len(bs)
|
||||
for i in range(n):
|
||||
inst = bs[i]
|
||||
|
||||
# We need to detect the difference between
|
||||
# "raise AssertionError" and "assert"
|
||||
self.insts = list(bytecode)
|
||||
n = len(self.insts)
|
||||
for i, inst in enumerate(self.insts):
|
||||
# We need to detect the difference between:
|
||||
# raise AssertionError
|
||||
# and
|
||||
# assert ...
|
||||
# If we have a JUMP_FORWARD after the
|
||||
# RAISE_VARARGS then we have a "raise" statement
|
||||
# else we have an "assert" statement.
|
||||
if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
|
||||
next_inst = bs[i+1]
|
||||
next_inst = self.insts[i+1]
|
||||
if (next_inst.opname == 'LOAD_GLOBAL' and
|
||||
next_inst.argval == 'AssertionError'):
|
||||
for j in range(i+2, n):
|
||||
raise_inst = bs[j]
|
||||
if raise_inst.opname.startswith('RAISE_VARARGS'):
|
||||
if j+1 >= n or bs[j+1].opname != 'JUMP_FORWARD':
|
||||
self.load_asserts.add(next_inst.offset)
|
||||
pass
|
||||
break
|
||||
if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')):
|
||||
self.load_asserts.add(next_inst.offset)
|
||||
pass
|
||||
pass
|
||||
|
||||
@@ -214,30 +212,18 @@ class Scanner3(Scanner):
|
||||
# Format: {target offset: [jump offsets]}
|
||||
jump_targets = self.find_jump_targets(show_asm)
|
||||
# print("XXX2", jump_targets)
|
||||
|
||||
last_op_was_break = False
|
||||
|
||||
extended_arg = 0
|
||||
for i, inst in enumerate(bytecode):
|
||||
|
||||
argval = inst.argval
|
||||
op = inst.opcode
|
||||
has_arg = op_has_argument(op, self.opc)
|
||||
if has_arg:
|
||||
if op == self.opc.EXTENDED_ARG:
|
||||
extended_arg += self.extended_arg_val(argval)
|
||||
|
||||
# Normally we remove EXTENDED_ARG from the
|
||||
# opcodes, but in the case of annotated functions
|
||||
# can use the EXTENDED_ARG tuple to signal we have
|
||||
# an annotated function.
|
||||
if not bs[i+1].opname.startswith("MAKE_FUNCTION"):
|
||||
continue
|
||||
|
||||
if isinstance(argval, int) and extended_arg:
|
||||
min_extended= self.extended_arg_val(1)
|
||||
if argval < min_extended:
|
||||
argval += extended_arg
|
||||
extended_arg = 0
|
||||
if op == self.opc.EXTENDED_ARG:
|
||||
# FIXME: The EXTENDED_ARG is used to signal annotation
|
||||
# parameters
|
||||
if self.insts[i+1].opcode != self.opc.MAKE_FUNCTION:
|
||||
continue
|
||||
|
||||
if inst.offset in jump_targets:
|
||||
jump_idx = 0
|
||||
@@ -256,9 +242,6 @@ class Scanner3(Scanner):
|
||||
pass
|
||||
elif inst.offset in self.except_targets:
|
||||
come_from_name = 'COME_FROM_EXCEPT_CLAUSE'
|
||||
if self.version <= 3.2:
|
||||
continue
|
||||
pass
|
||||
tokens.append(Token(come_from_name,
|
||||
None, repr(jump_offset),
|
||||
offset='%s_%s' % (inst.offset, jump_idx),
|
||||
@@ -278,10 +261,11 @@ class Scanner3(Scanner):
|
||||
pattr = inst.argrepr
|
||||
opname = inst.opname
|
||||
|
||||
if opname in ['LOAD_CONST']:
|
||||
if op in self.opc.CONST_OPS:
|
||||
const = argval
|
||||
if iscode(const):
|
||||
if const.co_name == '<lambda>':
|
||||
assert opname == 'LOAD_CONST'
|
||||
opname = 'LOAD_LAMBDA'
|
||||
elif const.co_name == '<genexpr>':
|
||||
opname = 'LOAD_GENEXPR'
|
||||
@@ -336,7 +320,7 @@ class Scanner3(Scanner):
|
||||
offset = inst.offset,
|
||||
linestart = inst.starts_line,
|
||||
op = op,
|
||||
has_arg = op_has_argument(op, op3),
|
||||
has_arg = inst.has_arg,
|
||||
opc = self.opc
|
||||
)
|
||||
)
|
||||
@@ -415,7 +399,7 @@ class Scanner3(Scanner):
|
||||
offset = inst.offset,
|
||||
linestart = inst.starts_line,
|
||||
op = op,
|
||||
has_arg = (op >= op3.HAVE_ARGUMENT),
|
||||
has_arg = inst.has_arg,
|
||||
opc = self.opc
|
||||
)
|
||||
)
|
||||
@@ -506,26 +490,17 @@ class Scanner3(Scanner):
|
||||
self.setup_loops = {} # setup_loop offset given target
|
||||
|
||||
targets = {}
|
||||
extended_arg = 0
|
||||
for offset in self.op_range(0, n):
|
||||
op = code[offset]
|
||||
|
||||
if op == self.opc.EXTENDED_ARG:
|
||||
arg = code2num(code, offset+1) | extended_arg
|
||||
extended_arg = self.extended_arg_val(arg)
|
||||
continue
|
||||
for i, inst in enumerate(self.insts):
|
||||
offset = inst.offset
|
||||
op = inst.opcode
|
||||
|
||||
# Determine structures and fix jumps in Python versions
|
||||
# since 2.3
|
||||
self.detect_control_flow(offset, targets, extended_arg)
|
||||
self.detect_control_flow(offset, targets, 0)
|
||||
|
||||
has_arg = (op >= op3.HAVE_ARGUMENT)
|
||||
if has_arg:
|
||||
if inst.has_arg:
|
||||
label = self.fixed_jumps.get(offset)
|
||||
if self.version >= 3.6:
|
||||
oparg = code[offset+1]
|
||||
else:
|
||||
oparg = code[offset+1] + code[offset+2] * 256
|
||||
oparg = inst.arg
|
||||
next_offset = xdis.next_offset(op, self.opc, offset)
|
||||
|
||||
if label is None:
|
||||
@@ -543,7 +518,6 @@ class Scanner3(Scanner):
|
||||
targets[label] = targets.get(label, []) + [offset]
|
||||
pass
|
||||
|
||||
extended_arg = 0
|
||||
pass # for loop
|
||||
|
||||
# DEBUG:
|
||||
@@ -1063,9 +1037,9 @@ class Scanner3(Scanner):
|
||||
op = self.code[i]
|
||||
if op == self.opc.END_FINALLY:
|
||||
if count_END_FINALLY == count_SETUP_:
|
||||
assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE,
|
||||
JUMP_FORWARD,
|
||||
RETURN_VALUE)
|
||||
assert self.code[self.prev_op[i]] in frozenset([self.opc.JUMP_ABSOLUTE,
|
||||
self.opc.JUMP_FORWARD,
|
||||
self.opc.RETURN_VALUE])
|
||||
self.not_continue.add(self.prev_op[i])
|
||||
return self.prev_op[i]
|
||||
count_END_FINALLY += 1
|
||||
@@ -1083,7 +1057,11 @@ class Scanner3(Scanner):
|
||||
# Find all offsets of requested instructions
|
||||
instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target)
|
||||
# Get all POP_JUMP_IF_TRUE (or) offsets
|
||||
pjit_offsets = self.all_instr(start, end, self.opc.POP_JUMP_IF_TRUE)
|
||||
if self.version == 3.0:
|
||||
jump_true_op = self.opc.JUMP_IF_TRUE
|
||||
else:
|
||||
jump_true_op = self.opc.POP_JUMP_IF_TRUE
|
||||
pjit_offsets = self.all_instr(start, end, jump_true_op)
|
||||
filtered = []
|
||||
for pjit_offset in pjit_offsets:
|
||||
pjit_tgt = self.get_target(pjit_offset) - 3
|
||||
|
@@ -369,28 +369,6 @@ class Scanner30(Scanner3):
|
||||
pass
|
||||
return
|
||||
|
||||
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
|
||||
"""
|
||||
Find offsets of all requested <instr> between <start> and <end>,
|
||||
optionally <target>ing specified offset, and return list found
|
||||
<instr> offsets which are not within any POP_JUMP_IF_TRUE jumps.
|
||||
"""
|
||||
assert(start>=0 and end<=len(self.code) and start <= end)
|
||||
|
||||
# Find all offsets of requested instructions
|
||||
instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target)
|
||||
# Get all JUMP_IF_TRUE (or) offsets
|
||||
pjit_offsets = self.all_instr(start, end, opc.JUMP_IF_TRUE)
|
||||
filtered = []
|
||||
for pjit_offset in pjit_offsets:
|
||||
pjit_tgt = self.get_target(pjit_offset) - 3
|
||||
for instr_offset in instr_offsets:
|
||||
if instr_offset <= pjit_offset or instr_offset >= pjit_tgt:
|
||||
filtered.append(instr_offset)
|
||||
instr_offsets = filtered
|
||||
filtered = []
|
||||
return instr_offsets
|
||||
|
||||
if __name__ == "__main__":
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION == 3.0:
|
||||
|
@@ -13,6 +13,8 @@ from __future__ import print_function
|
||||
|
||||
from uncompyle6.scanners.scanner3 import Scanner3
|
||||
|
||||
import xdis
|
||||
|
||||
# bytecode verification, verify(), uses JUMP_OPS from here
|
||||
from xdis.opcodes import opcode_36 as opc
|
||||
JUMP_OPS = opc.JUMP_OPS
|
||||
@@ -40,8 +42,6 @@ class Scanner36(Scanner3):
|
||||
pass
|
||||
return tokens, customize
|
||||
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION == 3.6:
|
||||
|
Reference in New Issue
Block a user