You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 00:45:53 +08:00
Interval order COME_FROMs in Python3
This bug had possibly caused lots of grammar pollution which may need addressing. We want to process COME_FROMs to the same offset to be in *descending* order so we have the larger range or biggest instruction interval last. (I think they are sorted in increasing order, but for safety we sort them). That way, specific COME_FROM tags will match up properly. For example, a "loop" with an "if" nested in it should have the "loop" tag last so the grammar rule matches that properly Adjust Python 3 grammar for more COME_FROM -> COME_FROM_LOOP. And remove optional COME_FROM_LOOP where possible. Previously, the optional-ness was a result of inner nestings gobbling up the COME_FROM. We'll probably want to go back and fix this up in Python2.
This commit is contained in:
14
HISTORY.md
14
HISTORY.md
@@ -29,7 +29,9 @@ augmented with pseudo instruction COME_FROM. This code introduced
|
||||
another clever idea: using table-driven semantics routines, using
|
||||
format specifiers.
|
||||
|
||||
The last mention of a release of SPARK from John is around 2002.
|
||||
The last mention of a release of SPARK from John is around 2002. As
|
||||
released, although the Early Algorithm parser was in good shape, this
|
||||
code was woefully lacking as serious Python deparser.
|
||||
|
||||
In the fall of 2000, Hartmut Goebel
|
||||
[took over maintaining the code](https://groups.google.com/forum/#!searchin/comp.lang.python/hartmut$20goebel/comp.lang.python/35s3mp4-nuY/UZALti6ujnQJ). The
|
||||
@@ -112,12 +114,18 @@ Fenx's uncompyle3 which I used for inspiration for Python3 support.
|
||||
I started working on this late 2015, mostly to add fragment support.
|
||||
In that, I decided to make this runnable on Python 3.2+ and Python 2.6+
|
||||
while, handling Python bytecodes from Python versions 2.5+ and
|
||||
3.2+.
|
||||
3.2+. In doing so, it has been expedient to separate this into three
|
||||
projects: load loading and disassembly (xdis), parsing and tree
|
||||
building (spark_parser), and grammar and semantic actions for
|
||||
decompiling (uncompyle6).
|
||||
|
||||
|
||||
Over the many years, code styles and Python features have
|
||||
changed. However brilliant the code was and still is, it hasn't really
|
||||
had a single public active maintainer. And there have been many forks
|
||||
of the code.
|
||||
of the code. I have spent a great deal of time trying to organize and
|
||||
modularize the code so that it can handle more Python versions more
|
||||
gracefully (with still only moderate success).
|
||||
|
||||
That it has been in need of an overhaul has been recognized by the
|
||||
Hartmut a decade an a half ago:
|
||||
|
@@ -174,9 +174,33 @@ class PythonParser(GenericASTBuilder):
|
||||
else_suitec ::= c_stmts
|
||||
else_suitec ::= return_stmts
|
||||
|
||||
stmt ::= assert
|
||||
stmt ::= assert2
|
||||
|
||||
stmt ::= classdef
|
||||
stmt ::= call_stmt
|
||||
|
||||
stmt ::= ifstmt
|
||||
stmt ::= ifelsestmt
|
||||
|
||||
stmt ::= whilestmt
|
||||
stmt ::= while1stmt
|
||||
stmt ::= whileelsestmt
|
||||
stmt ::= while1elsestmt
|
||||
stmt ::= forstmt
|
||||
stmt ::= forelsestmt
|
||||
stmt ::= trystmt
|
||||
stmt ::= tryelsestmt
|
||||
stmt ::= tryfinallystmt
|
||||
stmt ::= withstmt
|
||||
stmt ::= withasstmt
|
||||
|
||||
stmt ::= del_stmt
|
||||
del_stmt ::= DELETE_FAST
|
||||
del_stmt ::= DELETE_NAME
|
||||
del_stmt ::= DELETE_GLOBAL
|
||||
|
||||
|
||||
stmt ::= return_stmt
|
||||
return_stmt ::= ret_expr RETURN_VALUE
|
||||
return_stmts ::= return_stmt
|
||||
|
@@ -94,27 +94,6 @@ class Python2Parser(PythonParser):
|
||||
|
||||
stmt ::= exec_stmt
|
||||
|
||||
stmt ::= assert
|
||||
stmt ::= assert2
|
||||
stmt ::= ifstmt
|
||||
stmt ::= ifelsestmt
|
||||
|
||||
stmt ::= whilestmt
|
||||
stmt ::= while1stmt
|
||||
stmt ::= whileelsestmt
|
||||
stmt ::= while1elsestmt
|
||||
stmt ::= forstmt
|
||||
stmt ::= forelsestmt
|
||||
stmt ::= trystmt
|
||||
stmt ::= tryelsestmt
|
||||
stmt ::= tryfinallystmt
|
||||
stmt ::= withstmt
|
||||
stmt ::= withasstmt
|
||||
|
||||
stmt ::= del_stmt
|
||||
del_stmt ::= DELETE_FAST
|
||||
del_stmt ::= DELETE_NAME
|
||||
del_stmt ::= DELETE_GLOBAL
|
||||
del_stmt ::= expr DELETE_SLICE+0
|
||||
del_stmt ::= expr expr DELETE_SLICE+1
|
||||
del_stmt ::= expr expr DELETE_SLICE+2
|
||||
|
@@ -95,27 +95,6 @@ class Python3Parser(PythonParser):
|
||||
raise_stmt2 ::= expr expr RAISE_VARARGS_2
|
||||
raise_stmt3 ::= expr expr expr RAISE_VARARGS_3
|
||||
|
||||
stmt ::= assert
|
||||
stmt ::= assert2
|
||||
stmt ::= ifstmt
|
||||
stmt ::= ifelsestmt
|
||||
|
||||
stmt ::= whilestmt
|
||||
stmt ::= while1stmt
|
||||
stmt ::= whileelsestmt
|
||||
stmt ::= while1elsestmt
|
||||
stmt ::= forstmt
|
||||
stmt ::= forelsestmt
|
||||
stmt ::= trystmt
|
||||
stmt ::= tryelsestmt
|
||||
stmt ::= tryfinallystmt
|
||||
stmt ::= withstmt
|
||||
stmt ::= withasstmt
|
||||
|
||||
stmt ::= del_stmt
|
||||
del_stmt ::= DELETE_FAST
|
||||
del_stmt ::= DELETE_NAME
|
||||
del_stmt ::= DELETE_GLOBAL
|
||||
del_stmt ::= delete_subscr
|
||||
delete_subscr ::= expr expr DELETE_SUBSCR
|
||||
del_stmt ::= expr DELETE_ATTR
|
||||
@@ -301,20 +280,41 @@ class Python3Parser(PythonParser):
|
||||
stmt ::= LOAD_CLOSURE RETURN_VALUE RETURN_LAST
|
||||
stmt ::= whileTruestmt
|
||||
ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite _come_from
|
||||
"""
|
||||
|
||||
forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK opt_come_from_loop
|
||||
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK opt_come_from_loop
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK opt_come_from_loop
|
||||
def p_loop_stmt3(self, args):
|
||||
"""
|
||||
forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK
|
||||
opt_come_from_loop
|
||||
forelsestmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK else_suite
|
||||
COME_FROM_LOOP
|
||||
|
||||
forelselaststmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK else_suitec
|
||||
COME_FROM_LOOP
|
||||
|
||||
forelselaststmtl ::= SETUP_LOOP expr _for designator for_block POP_BLOCK else_suitel
|
||||
COME_FROM_LOOP
|
||||
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK
|
||||
COME_FROM_LOOP
|
||||
whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK
|
||||
else_suite COME_FROM_LOOP
|
||||
whileelselaststmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK
|
||||
else_suitec COME_FROM_LOOP
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK
|
||||
COME_FROM_LOOP
|
||||
|
||||
# Python < 3.5 no POP BLOCK
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK opt_come_from_loop
|
||||
whileTruestmt ::= SETUP_LOOP return_stmts opt_come_from_loop
|
||||
while1stmt ::= SETUP_LOOP l_stmts _come_from JUMP_BACK opt_come_from_loop
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK COME_FROM_LOOP
|
||||
whileTruestmt ::= SETUP_LOOP return_stmts COME_FROM_LOOP
|
||||
while1stmt ::= SETUP_LOOP l_stmts _come_from JUMP_BACK COME_FROM_LOOP
|
||||
|
||||
# FIXME: investigate - can code really produce a NOP?
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP opt_come_from_loop
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP opt_come_from_loop
|
||||
forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK NOP opt_come_from_loop
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP
|
||||
COME_FROM_LOOP
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP
|
||||
COME_FROM_LOOP
|
||||
forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK NOP
|
||||
COME_FROM_LOOP
|
||||
"""
|
||||
|
||||
def p_genexpr3(self, args):
|
||||
|
@@ -198,14 +198,21 @@ class Scanner3(Scanner):
|
||||
argval = inst.argval
|
||||
if inst.offset in jump_targets:
|
||||
jump_idx = 0
|
||||
for jump_offset in jump_targets[inst.offset]:
|
||||
# We want to process COME_FROMs to the same offset to be in *descending*
|
||||
# offset order so we have the larger range or biggest instruction interval
|
||||
# last. (I think they are sorted in increasing order, but for safety
|
||||
# we sort them). That way, specific COME_FROM tags will match up
|
||||
# properly. For example, a "loop" with an "if" nested in it should have the
|
||||
# "loop" tag last so the grammar rule matches that properly.
|
||||
for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
|
||||
come_from_name = 'COME_FROM'
|
||||
if (inst.offset in offset_action):
|
||||
action = offset_action[inst.offset]
|
||||
if (action.type == 'end'
|
||||
# Adjust the grammar and remove the below
|
||||
and (self.opName(jump_offset)[len('SETUP_'):]
|
||||
== action.name)
|
||||
# After the grammar is fully adjusted, remove the below
|
||||
# test
|
||||
and action.name in ['EXCEPT', 'LOOP', 'WITH']):
|
||||
come_from_name = '%s_%s' % (
|
||||
(come_from_name, action.name))
|
||||
|
Reference in New Issue
Block a user