First step towards managing control flow decoding

This commit is contained in:
rocky
2016-09-21 21:30:57 -04:00
parent 4f83a87a00
commit b6dee24289
5 changed files with 79 additions and 15 deletions

View File

@@ -31,7 +31,7 @@ def test_grammar():
assert expect_right_recursive == right_recursive
s = get_scanner(PYTHON_VERSION, IS_PYPY)
ignore_set = set(
"""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
"""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM COME_FROM_EXCEPT
LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP
LAMBDA_MARKER RETURN_LAST
""".split())

View File

@@ -169,7 +169,7 @@ class Python3Parser(PythonParser):
# COME_FROM targets from the wrong places
trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle _come_from
try_middle opt_come_from_except
# this is nested inside a trystmt
tryfinallystmt ::= SETUP_FINALLY suite_stmts_opt
@@ -187,8 +187,14 @@ class Python3Parser(PythonParser):
try_middle ::= jmp_abs COME_FROM except_stmts
END_FINALLY
try_middle ::= jmp_abs COME_FROM_EXCEPT except_stmts
END_FINALLY
# FIXME: remove this
try_middle ::= JUMP_FORWARD COME_FROM except_stmts
END_FINALLY COME_FROM
try_middle ::= JUMP_FORWARD COME_FROM except_stmts
END_FINALLY COME_FROM_EXCEPT
except_stmts ::= except_stmts except_stmt
except_stmts ::= except_stmt
@@ -242,17 +248,25 @@ class Python3Parser(PythonParser):
def p_misc3(self, args):
"""
try_middle ::= JUMP_FORWARD COME_FROM except_stmts END_FINALLY NOP COME_FROM
try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM
for_block ::= l_stmts
iflaststmtl ::= testexpr c_stmts_opt
iflaststmt ::= testexpr c_stmts_opt34
c_stmts_opt34 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt
"""
def p_come_from3(self, args):
"""
opt_come_from_except ::= COME_FROM_EXCEPT
opt_come_from_except ::= come_froms
come_froms ::= come_froms COME_FROM
come_froms ::=
"""
def p_jump3(self, args):
"""
come_froms ::= come_froms COME_FROM
come_froms ::= COME_FROM
jmp_false ::= POP_JUMP_IF_FALSE
jmp_true ::= POP_JUMP_IF_TRUE

View File

@@ -0,0 +1,40 @@
"""
Detect control flow as much as possible.
The basic idea here is to put in explicit end instructions that make
grammar parsing simpler and more precise.
"""
from collections import namedtuple
from xdis.bytecode import Bytecode
control_flow_start = namedtuple('control_flow_start', ['name', 'type', 'offset'])
control_flow_end = namedtuple('control_flow_end', ['name', 'type', 'offset'])
control_flow_pair = namedtuple('control_flow_pair', ['name', 'start_offset', 'end_offset'])
class ControlFlow():
def __init__(self, scanner):
self.scanner = scanner
self.opc = self.scanner.opc
self.setup_ops = self.scanner.setup_ops
self.op_range = self.scanner.op_range
# Control-flow nesting
self.offset_action = {}
self.cf_end = []
def detect_control_flow(self, co):
self.bytecode = Bytecode(co, self.opc)
for inst in self.bytecode:
if inst.opcode in self.setup_ops:
# Use part after SETUP_
name = inst.opname[len('SETUP_'):]
self.offset_action[inst.offset] = control_flow_start(name, 'start', inst.offset)
self.offset_action[inst.argval] = control_flow_end(name, 'end', inst.offset)
pass
pass
# import pprint
# pp = pprint.PrettyPrinter(indent=4)
# pp.pprint(self.offset_action)
return self.offset_action

View File

@@ -29,6 +29,7 @@ from uncompyle6.scanner import Scanner, op_has_argument
from xdis.code import iscode
from xdis.bytecode import Bytecode
from uncompyle6.scanner import Token, parse_fn_counts
from uncompyle6.scanners.controlflow import ControlFlow
# Get all the opcodes into globals
import xdis.opcodes.opcode_33 as op3
@@ -102,6 +103,11 @@ class Scanner3(Scanner):
varargs_ops.add(self.opc.CALL_METHOD)
self.varargs_ops = frozenset(varargs_ops)
self.setup_ops = frozenset([
self.opc.SETUP_LOOP,
self.opc.SETUP_EXCEPT, self.opc.SETUP_FINALLY,
self.opc.SETUP_WITH])
# Not really a set, but still clasification-like
self.statement_opcode_sequences = [
(self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_FORWARD),
@@ -127,7 +133,7 @@ class Scanner3(Scanner):
"""
show_asm = self.show_asm if not show_asm else show_asm
# show_asm = 'both'
# show_asm = 'after'
if show_asm in ('both', 'before'):
bytecode = Bytecode(co, self.opc)
for instr in bytecode.get_instructions(co):
@@ -179,13 +185,24 @@ class Scanner3(Scanner):
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets()
offset_action = ControlFlow(self).detect_control_flow(co)
for inst in bytecode:
argval = inst.argval
if inst.offset in jump_targets:
jump_idx = 0
for jump_offset in jump_targets[inst.offset]:
tokens.append(Token('COME_FROM', None, repr(jump_offset),
come_from_name = 'COME_FROM'
if (inst.offset in offset_action
and offset_action[inst.offset].type == 'end'
# Adjust the grammar and remove the below
and offset_action[inst.offset].name in ['EXCEPT']
):
come_from_name = '%s_%s' % (
(come_from_name, offset_action[inst.offset].name))
pass
tokens.append(Token(come_from_name,
None, repr(jump_offset),
offset='%s_%s' % (inst.offset, jump_idx),
has_arg = True, opc=self.opc))
jump_idx += 1

View File

@@ -61,14 +61,7 @@ class Token:
if self.pattr:
pattr = self.pattr
if self.opc:
if self.op in self.opc.hasjrel:
pattr = "to " + self.pattr
elif self.op in self.opc.hasjabs:
self.pattr= str(self.pattr)
if not self.pattr.startswith('to '):
pattr = "to " + str(self.pattr)
pass
elif self.op in self.opc.hascompare:
if self.op in self.opc.hascompare:
if isinstance(self.attr, int):
pattr = self.opc.cmp_op[self.attr]
# And so on. See xdis/bytecode.py get_instructions_bytes