From 1fc8ac470019ab479f71da78bee9a2713c2bb49e Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 26 Sep 2016 08:31:28 -0400 Subject: [PATCH] Interval order COME_FROMs in Python3 This bug had possibly caused lots of grammar pollution which may need addressing. We want to process COME_FROMs to the same offset to be in *descending* order so we have the larger range or biggest instruction interval last. (I think they are sorted in increasing order, but for safety we sort them). That way, specific COME_FROM tags will match up properly. For example, a "loop" with an "if" nested in it should have the "loop" tag last so the grammar rule matches that properly Adjust Python 3 grammar for more COME_FROM -> COME_FROM_LOOP. And remove optional COME_FROM_LOOP where possible. Previously, the optional-ness was a result of inner nestings gobbling up the COME_FROM. We'll probably want to go back and fix this up in Python2. --- HISTORY.md | 14 ++++++-- uncompyle6/parser.py | 24 +++++++++++++ uncompyle6/parsers/parse2.py | 21 ------------ uncompyle6/parsers/parse3.py | 60 ++++++++++++++++----------------- uncompyle6/scanners/scanner3.py | 11 ++++-- 5 files changed, 74 insertions(+), 56 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 5226a4ac..7fe6696a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -29,7 +29,9 @@ augmented with pseudo instruction COME_FROM. This code introduced another clever idea: using table-driven semantics routines, using format specifiers. -The last mention of a release of SPARK from John is around 2002. +The last mention of a release of SPARK from John is around 2002. As +released, although the Early Algorithm parser was in good shape, this +code was woefully lacking as serious Python deparser. In the fall of 2000, Hartmut Goebel [took over maintaining the code](https://groups.google.com/forum/#!searchin/comp.lang.python/hartmut$20goebel/comp.lang.python/35s3mp4-nuY/UZALti6ujnQJ). The @@ -112,12 +114,18 @@ Fenx's uncompyle3 which I used for inspiration for Python3 support. I started working on this late 2015, mostly to add fragment support. In that, I decided to make this runnable on Python 3.2+ and Python 2.6+ while, handling Python bytecodes from Python versions 2.5+ and -3.2+. +3.2+. In doing so, it has been expedient to separate this into three +projects: load loading and disassembly (xdis), parsing and tree +building (spark_parser), and grammar and semantic actions for +decompiling (uncompyle6). + Over the many years, code styles and Python features have changed. However brilliant the code was and still is, it hasn't really had a single public active maintainer. And there have been many forks -of the code. +of the code. I have spent a great deal of time trying to organize and +modularize the code so that it can handle more Python versions more +gracefully (with still only moderate success). That it has been in need of an overhaul has been recognized by the Hartmut a decade an a half ago: diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 271d3d89..90e0118f 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -174,9 +174,33 @@ class PythonParser(GenericASTBuilder): else_suitec ::= c_stmts else_suitec ::= return_stmts + stmt ::= assert + stmt ::= assert2 + stmt ::= classdef stmt ::= call_stmt + stmt ::= ifstmt + stmt ::= ifelsestmt + + stmt ::= whilestmt + stmt ::= while1stmt + stmt ::= whileelsestmt + stmt ::= while1elsestmt + stmt ::= forstmt + stmt ::= forelsestmt + stmt ::= trystmt + stmt ::= tryelsestmt + stmt ::= tryfinallystmt + stmt ::= withstmt + stmt ::= withasstmt + + stmt ::= del_stmt + del_stmt ::= DELETE_FAST + del_stmt ::= DELETE_NAME + del_stmt ::= DELETE_GLOBAL + + stmt ::= return_stmt return_stmt ::= ret_expr RETURN_VALUE return_stmts ::= return_stmt diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index 3e222ead..0af84025 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -94,27 +94,6 @@ class Python2Parser(PythonParser): stmt ::= exec_stmt - stmt ::= assert - stmt ::= assert2 - stmt ::= ifstmt - stmt ::= ifelsestmt - - stmt ::= whilestmt - stmt ::= while1stmt - stmt ::= whileelsestmt - stmt ::= while1elsestmt - stmt ::= forstmt - stmt ::= forelsestmt - stmt ::= trystmt - stmt ::= tryelsestmt - stmt ::= tryfinallystmt - stmt ::= withstmt - stmt ::= withasstmt - - stmt ::= del_stmt - del_stmt ::= DELETE_FAST - del_stmt ::= DELETE_NAME - del_stmt ::= DELETE_GLOBAL del_stmt ::= expr DELETE_SLICE+0 del_stmt ::= expr expr DELETE_SLICE+1 del_stmt ::= expr expr DELETE_SLICE+2 diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index e8b9589e..70c46a7e 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -95,27 +95,6 @@ class Python3Parser(PythonParser): raise_stmt2 ::= expr expr RAISE_VARARGS_2 raise_stmt3 ::= expr expr expr RAISE_VARARGS_3 - stmt ::= assert - stmt ::= assert2 - stmt ::= ifstmt - stmt ::= ifelsestmt - - stmt ::= whilestmt - stmt ::= while1stmt - stmt ::= whileelsestmt - stmt ::= while1elsestmt - stmt ::= forstmt - stmt ::= forelsestmt - stmt ::= trystmt - stmt ::= tryelsestmt - stmt ::= tryfinallystmt - stmt ::= withstmt - stmt ::= withasstmt - - stmt ::= del_stmt - del_stmt ::= DELETE_FAST - del_stmt ::= DELETE_NAME - del_stmt ::= DELETE_GLOBAL del_stmt ::= delete_subscr delete_subscr ::= expr expr DELETE_SUBSCR del_stmt ::= expr DELETE_ATTR @@ -301,20 +280,41 @@ class Python3Parser(PythonParser): stmt ::= LOAD_CLOSURE RETURN_VALUE RETURN_LAST stmt ::= whileTruestmt ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite _come_from + """ - forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK opt_come_from_loop - whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK opt_come_from_loop - whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK opt_come_from_loop + def p_loop_stmt3(self, args): + """ + forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK + opt_come_from_loop + forelsestmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK else_suite + COME_FROM_LOOP + + forelselaststmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK else_suitec + COME_FROM_LOOP + + forelselaststmtl ::= SETUP_LOOP expr _for designator for_block POP_BLOCK else_suitel + COME_FROM_LOOP + whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK + COME_FROM_LOOP + whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK + else_suite COME_FROM_LOOP + whileelselaststmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK + else_suitec COME_FROM_LOOP + whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK + COME_FROM_LOOP # Python < 3.5 no POP BLOCK - whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK opt_come_from_loop - whileTruestmt ::= SETUP_LOOP return_stmts opt_come_from_loop - while1stmt ::= SETUP_LOOP l_stmts _come_from JUMP_BACK opt_come_from_loop + whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK COME_FROM_LOOP + whileTruestmt ::= SETUP_LOOP return_stmts COME_FROM_LOOP + while1stmt ::= SETUP_LOOP l_stmts _come_from JUMP_BACK COME_FROM_LOOP # FIXME: investigate - can code really produce a NOP? - whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP opt_come_from_loop - whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP opt_come_from_loop - forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK NOP opt_come_from_loop + whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP + COME_FROM_LOOP + whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP + COME_FROM_LOOP + forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK NOP + COME_FROM_LOOP """ def p_genexpr3(self, args): diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index a0fa43f0..556aa5ac 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -198,14 +198,21 @@ class Scanner3(Scanner): argval = inst.argval if inst.offset in jump_targets: jump_idx = 0 - for jump_offset in jump_targets[inst.offset]: + # We want to process COME_FROMs to the same offset to be in *descending* + # offset order so we have the larger range or biggest instruction interval + # last. (I think they are sorted in increasing order, but for safety + # we sort them). That way, specific COME_FROM tags will match up + # properly. For example, a "loop" with an "if" nested in it should have the + # "loop" tag last so the grammar rule matches that properly. + for jump_offset in sorted(jump_targets[inst.offset], reverse=True): come_from_name = 'COME_FROM' if (inst.offset in offset_action): action = offset_action[inst.offset] if (action.type == 'end' - # Adjust the grammar and remove the below and (self.opName(jump_offset)[len('SETUP_'):] == action.name) + # After the grammar is fully adjusted, remove the below + # test and action.name in ['EXCEPT', 'LOOP', 'WITH']): come_from_name = '%s_%s' % ( (come_from_name, action.name))