You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-02 16:44:46 +08:00
First step towards managing control flow decoding
This commit is contained in:
@@ -31,7 +31,7 @@ def test_grammar():
|
||||
assert expect_right_recursive == right_recursive
|
||||
s = get_scanner(PYTHON_VERSION, IS_PYPY)
|
||||
ignore_set = set(
|
||||
"""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
|
||||
"""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM COME_FROM_EXCEPT
|
||||
LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP
|
||||
LAMBDA_MARKER RETURN_LAST
|
||||
""".split())
|
||||
|
@@ -169,7 +169,7 @@ class Python3Parser(PythonParser):
|
||||
# COME_FROM targets from the wrong places
|
||||
|
||||
trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
|
||||
try_middle _come_from
|
||||
try_middle opt_come_from_except
|
||||
|
||||
# this is nested inside a trystmt
|
||||
tryfinallystmt ::= SETUP_FINALLY suite_stmts_opt
|
||||
@@ -187,8 +187,14 @@ class Python3Parser(PythonParser):
|
||||
|
||||
try_middle ::= jmp_abs COME_FROM except_stmts
|
||||
END_FINALLY
|
||||
try_middle ::= jmp_abs COME_FROM_EXCEPT except_stmts
|
||||
END_FINALLY
|
||||
|
||||
# FIXME: remove this
|
||||
try_middle ::= JUMP_FORWARD COME_FROM except_stmts
|
||||
END_FINALLY COME_FROM
|
||||
try_middle ::= JUMP_FORWARD COME_FROM except_stmts
|
||||
END_FINALLY COME_FROM_EXCEPT
|
||||
|
||||
except_stmts ::= except_stmts except_stmt
|
||||
except_stmts ::= except_stmt
|
||||
@@ -242,17 +248,25 @@ class Python3Parser(PythonParser):
|
||||
|
||||
def p_misc3(self, args):
|
||||
"""
|
||||
try_middle ::= JUMP_FORWARD COME_FROM except_stmts END_FINALLY NOP COME_FROM
|
||||
try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM
|
||||
|
||||
for_block ::= l_stmts
|
||||
iflaststmtl ::= testexpr c_stmts_opt
|
||||
iflaststmt ::= testexpr c_stmts_opt34
|
||||
c_stmts_opt34 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt
|
||||
"""
|
||||
|
||||
def p_come_from3(self, args):
|
||||
"""
|
||||
opt_come_from_except ::= COME_FROM_EXCEPT
|
||||
opt_come_from_except ::= come_froms
|
||||
|
||||
come_froms ::= come_froms COME_FROM
|
||||
come_froms ::=
|
||||
"""
|
||||
|
||||
def p_jump3(self, args):
|
||||
"""
|
||||
come_froms ::= come_froms COME_FROM
|
||||
come_froms ::= COME_FROM
|
||||
jmp_false ::= POP_JUMP_IF_FALSE
|
||||
jmp_true ::= POP_JUMP_IF_TRUE
|
||||
|
||||
|
40
uncompyle6/scanners/controlflow.py
Normal file
40
uncompyle6/scanners/controlflow.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
Detect control flow as much as possible.
|
||||
The basic idea here is to put in explicit end instructions that make
|
||||
grammar parsing simpler and more precise.
|
||||
"""
|
||||
|
||||
from collections import namedtuple
|
||||
from xdis.bytecode import Bytecode
|
||||
|
||||
control_flow_start = namedtuple('control_flow_start', ['name', 'type', 'offset'])
|
||||
control_flow_end = namedtuple('control_flow_end', ['name', 'type', 'offset'])
|
||||
control_flow_pair = namedtuple('control_flow_pair', ['name', 'start_offset', 'end_offset'])
|
||||
|
||||
|
||||
class ControlFlow():
|
||||
def __init__(self, scanner):
|
||||
self.scanner = scanner
|
||||
self.opc = self.scanner.opc
|
||||
self.setup_ops = self.scanner.setup_ops
|
||||
self.op_range = self.scanner.op_range
|
||||
|
||||
# Control-flow nesting
|
||||
self.offset_action = {}
|
||||
self.cf_end = []
|
||||
|
||||
def detect_control_flow(self, co):
|
||||
self.bytecode = Bytecode(co, self.opc)
|
||||
for inst in self.bytecode:
|
||||
if inst.opcode in self.setup_ops:
|
||||
# Use part after SETUP_
|
||||
name = inst.opname[len('SETUP_'):]
|
||||
self.offset_action[inst.offset] = control_flow_start(name, 'start', inst.offset)
|
||||
self.offset_action[inst.argval] = control_flow_end(name, 'end', inst.offset)
|
||||
pass
|
||||
pass
|
||||
# import pprint
|
||||
# pp = pprint.PrettyPrinter(indent=4)
|
||||
# pp.pprint(self.offset_action)
|
||||
|
||||
return self.offset_action
|
@@ -29,6 +29,7 @@ from uncompyle6.scanner import Scanner, op_has_argument
|
||||
from xdis.code import iscode
|
||||
from xdis.bytecode import Bytecode
|
||||
from uncompyle6.scanner import Token, parse_fn_counts
|
||||
from uncompyle6.scanners.controlflow import ControlFlow
|
||||
|
||||
# Get all the opcodes into globals
|
||||
import xdis.opcodes.opcode_33 as op3
|
||||
@@ -102,6 +103,11 @@ class Scanner3(Scanner):
|
||||
varargs_ops.add(self.opc.CALL_METHOD)
|
||||
self.varargs_ops = frozenset(varargs_ops)
|
||||
|
||||
self.setup_ops = frozenset([
|
||||
self.opc.SETUP_LOOP,
|
||||
self.opc.SETUP_EXCEPT, self.opc.SETUP_FINALLY,
|
||||
self.opc.SETUP_WITH])
|
||||
|
||||
# Not really a set, but still clasification-like
|
||||
self.statement_opcode_sequences = [
|
||||
(self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_FORWARD),
|
||||
@@ -127,7 +133,7 @@ class Scanner3(Scanner):
|
||||
"""
|
||||
|
||||
show_asm = self.show_asm if not show_asm else show_asm
|
||||
# show_asm = 'both'
|
||||
# show_asm = 'after'
|
||||
if show_asm in ('both', 'before'):
|
||||
bytecode = Bytecode(co, self.opc)
|
||||
for instr in bytecode.get_instructions(co):
|
||||
@@ -179,13 +185,24 @@ class Scanner3(Scanner):
|
||||
# Format: {target offset: [jump offsets]}
|
||||
jump_targets = self.find_jump_targets()
|
||||
|
||||
offset_action = ControlFlow(self).detect_control_flow(co)
|
||||
for inst in bytecode:
|
||||
|
||||
argval = inst.argval
|
||||
if inst.offset in jump_targets:
|
||||
jump_idx = 0
|
||||
for jump_offset in jump_targets[inst.offset]:
|
||||
tokens.append(Token('COME_FROM', None, repr(jump_offset),
|
||||
come_from_name = 'COME_FROM'
|
||||
if (inst.offset in offset_action
|
||||
and offset_action[inst.offset].type == 'end'
|
||||
# Adjust the grammar and remove the below
|
||||
and offset_action[inst.offset].name in ['EXCEPT']
|
||||
):
|
||||
come_from_name = '%s_%s' % (
|
||||
(come_from_name, offset_action[inst.offset].name))
|
||||
pass
|
||||
tokens.append(Token(come_from_name,
|
||||
None, repr(jump_offset),
|
||||
offset='%s_%s' % (inst.offset, jump_idx),
|
||||
has_arg = True, opc=self.opc))
|
||||
jump_idx += 1
|
||||
|
@@ -61,14 +61,7 @@ class Token:
|
||||
if self.pattr:
|
||||
pattr = self.pattr
|
||||
if self.opc:
|
||||
if self.op in self.opc.hasjrel:
|
||||
pattr = "to " + self.pattr
|
||||
elif self.op in self.opc.hasjabs:
|
||||
self.pattr= str(self.pattr)
|
||||
if not self.pattr.startswith('to '):
|
||||
pattr = "to " + str(self.pattr)
|
||||
pass
|
||||
elif self.op in self.opc.hascompare:
|
||||
if self.op in self.opc.hascompare:
|
||||
if isinstance(self.attr, int):
|
||||
pattr = self.opc.cmp_op[self.attr]
|
||||
# And so on. See xdis/bytecode.py get_instructions_bytes
|
||||
|
Reference in New Issue
Block a user