From 4fcb385dc0b31687b0fef356858f266fa6f7555c Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 22 Nov 2016 19:59:19 -0500 Subject: [PATCH 01/15] DRY Python3 grammar --- uncompyle6/parsers/parse3.py | 2 -- uncompyle6/parsers/parse32.py | 3 --- uncompyle6/parsers/parse33.py | 1 - uncompyle6/parsers/parse34.py | 2 -- uncompyle6/parsers/parse35.py | 6 ------ 5 files changed, 14 deletions(-) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 3788a738..a892c80a 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -376,9 +376,7 @@ class Python3Parser(PythonParser): # Python 3.4+ expr ::= LOAD_CLASSDEREF - binary_subscr2 ::= expr expr DUP_TOP_TWO BINARY_SUBSCR # Python3 drops slice0..slice3 - ''' @staticmethod diff --git a/uncompyle6/parsers/parse32.py b/uncompyle6/parsers/parse32.py index 9f343066..24d4e573 100644 --- a/uncompyle6/parsers/parse32.py +++ b/uncompyle6/parsers/parse32.py @@ -10,9 +10,6 @@ from uncompyle6.parsers.parse3 import Python3Parser class Python32Parser(Python3Parser): def p_32to35(self, args): """ - # In Python 3.2+, DUP_TOPX is DUP_TOP_TWO - binary_subscr2 ::= expr expr DUP_TOP_TWO BINARY_SUBSCR - # Store locals is only in Python 3.0 to 3.3 stmt ::= store_locals store_locals ::= LOAD_FAST STORE_LOCALS diff --git a/uncompyle6/parsers/parse33.py b/uncompyle6/parsers/parse33.py index a48066b2..b310af61 100644 --- a/uncompyle6/parsers/parse33.py +++ b/uncompyle6/parsers/parse33.py @@ -19,7 +19,6 @@ class Python33Parser(Python32Parser): # actions that want c_stmts_opt at index 1 iflaststmt ::= testexpr c_stmts_opt33 - iflaststmtl ::= testexpr c_stmts_opt c_stmts_opt33 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD _come_from diff --git a/uncompyle6/parsers/parse34.py b/uncompyle6/parsers/parse34.py index f818836a..fa136f71 100644 --- a/uncompyle6/parsers/parse34.py +++ b/uncompyle6/parsers/parse34.py @@ -17,8 +17,6 @@ class Python34Parser(Python33Parser): """ # Python 3.4+ optimizes the trailing two JUMPS away - for_block ::= l_stmts - # Is this 3.4 only? yield_from ::= expr GET_ITER LOAD_CONST YIELD_FROM """ diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py index 996eec72..032a7e18 100644 --- a/uncompyle6/parsers/parse35.py +++ b/uncompyle6/parsers/parse35.py @@ -45,13 +45,7 @@ class Python35Parser(Python34Parser): # Python 3.3+ also has yield from. 3.5 does it # differently than 3.3, 3.4 - expr ::= yield_from yield_from ::= expr GET_YIELD_FROM_ITER LOAD_CONST YIELD_FROM - - # Python 3.4+ has more loop optimization that removes - # JUMP_FORWARD in some cases, and hence we also don't - # see COME_FROM - _ifstmts_jump ::= c_stmts_opt """ class Python35ParserSingle(Python35Parser, PythonParserSingle): pass From 6aa1531972de83ecab15b4c96b89c873ea5a7458 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 23 Nov 2016 00:48:38 -0500 Subject: [PATCH 02/15] Circle CI uses 2.7.10 and 2.7.12 is not available --- circle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/circle.yml b/circle.yml index 4fcb3457..a8bbed58 100644 --- a/circle.yml +++ b/circle.yml @@ -1,6 +1,6 @@ machine: python: - version: 2.7.12 + version: 2.7.10 environment: COMPILE: --compile From df2ca51f4a5827f6a810a2b11c93af5b7b508c2b Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 23 Nov 2016 08:28:10 -0500 Subject: [PATCH 03/15] Note that we now work on 2.4 and 2.5 --- __pkginfo__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/__pkginfo__.py b/__pkginfo__.py index 59df58e3..7652cba5 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -16,10 +16,10 @@ classifiers = ['Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.4', + 'Programming Language :: Python :: 2.5', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', From cbcfd53dae52c55493389ece5f60ffdd9193241c Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 23 Nov 2016 21:44:53 -0500 Subject: [PATCH 04/15] Python 2.6 grammary bug and.. __pkginfo.py__: Bump spark_parser version for parse_flags 'dups' --- __pkginfo__.py | 2 +- pytest/test_grammar.py | 4 ++-- uncompyle6/parser.py | 4 ++-- uncompyle6/parsers/parse26.py | 3 +++ 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/__pkginfo__.py b/__pkginfo__.py index 7652cba5..7ee929f1 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -37,7 +37,7 @@ entry_points={ 'pydisassemble=uncompyle6.bin.pydisassemble:main', ]} ftp_url = None -install_requires = ['spark-parser >= 1.4.0, < 1.5.0', +install_requires = ['spark-parser >= 1.4.3, < 1.5.0', 'xdis >= 3.2.3, < 3.3.0'] license = 'MIT' mailing_list = 'python-debugger@googlegroups.com' diff --git a/pytest/test_grammar.py b/pytest/test_grammar.py index a99da771..13e8159c 100644 --- a/pytest/test_grammar.py +++ b/pytest/test_grammar.py @@ -59,5 +59,5 @@ def test_dup_rule(): python_parser(PYTHON_VERSION, inspect.currentframe().f_code, is_pypy=IS_PYPY, parser_debug={ - 'rules': True, 'transition': False, 'reduce': False, - 'errorstack': None, 'context': True}) + 'dups': True, 'transition': False, 'reduce': False, + 'rules': False, 'errorstack': None, 'context': True}) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index abe73d5b..ea730dfc 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -709,8 +709,8 @@ def python_parser(version, co, out=sys.stdout, showasm=False, maybe_show_asm(showasm, tokens) # For heavy grammar debugging - parser_debug = {'rules': True, 'transition': True, 'reduce' : True, - 'showstack': 'full'} + # parser_debug = {'rules': True, 'transition': True, 'reduce' : True, + # 'showstack': 'full'} p = get_python_parser(version, parser_debug) return parse(p, tokens, customize) diff --git a/uncompyle6/parsers/parse26.py b/uncompyle6/parsers/parse26.py index 47c0ac0b..c485b00f 100644 --- a/uncompyle6/parsers/parse26.py +++ b/uncompyle6/parsers/parse26.py @@ -84,6 +84,7 @@ class Python26Parser(Python2Parser): jb_cont ::= CONTINUE jb_cf_pop ::= JUMP_BACK come_froms POP_TOP + jb_cf_pop ::= JUMP_BACK POP_TOP ja_cf_pop ::= JUMP_ABSOLUTE come_froms POP_TOP jf_cf_pop ::= JUMP_FORWARD come_froms POP_TOP @@ -188,6 +189,8 @@ class Python26Parser(Python2Parser): comp_body ::= gen_comp_body + for_block ::= l_stmts_opt _come_from POP_TOP JUMP_BACK + # Make sure we keep indices the same as 2.7 setup_loop_lf ::= SETUP_LOOP LOAD_FAST genexpr_func ::= setup_loop_lf FOR_ITER designator comp_iter jb_bp_come_from From 8941417a54d59ee11164545d7a7da7e2fbb91b22 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 24 Nov 2016 05:33:08 -0500 Subject: [PATCH 05/15] <2.7 "if" detection and dup Python 3 grammar rule --- uncompyle6/parsers/parse3.py | 2 -- uncompyle6/scanners/scanner2.py | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index a892c80a..adc52a86 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -354,8 +354,6 @@ class Python3Parser(PythonParser): COME_FROM_LOOP whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP COME_FROM_LOOP - whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP - COME_FROM_LOOP forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK NOP COME_FROM_LOOP """ diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 0ffb78ef..4675f37d 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -446,10 +446,16 @@ class Scanner2(scan.Scanner): if self.version < 2.7 and self.code[jmp] in self.jump_forward: self.not_continue.add(jmp) jmp = self.get_target(jmp) + prev_offset = self.prev[except_match] + # COMPARE_OP argument should be "exception match" or 10 + if (self.code[prev_offset] == self.opc.COMPARE_OP and + self.code[prev_offset+1] != 10): + return None if jmp not in self.pop_jump_if | self.jump_forward: self.ignore_if.add(except_match) return None + self.ignore_if.add(except_match) self.not_continue.add(jmp) return jmp From 8be6369bdf2ea4886701d9040f6afcca2078b30c Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 24 Nov 2016 10:31:38 -0500 Subject: [PATCH 06/15] Better line number tracking Indent Python 2 list comprehensions, albeit badly. DRY code a little via indent_if_source_nl --- uncompyle6/semantics/pysource.py | 60 ++++++++++++++++---------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index f987e8d1..153ac477 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -487,6 +487,12 @@ class SourceWalker(GenericASTTraversal, object): return + def indent_if_source_nl(self, line_number, indent): + if (line_number != self.line_number): + self.write("\n" + self.indent + INDENT_PER_LEVEL[:-1]) + return self.line_number + + def customize_for_version(self, is_pypy, version): if is_pypy: ######################## @@ -672,9 +678,8 @@ class SourceWalker(GenericASTTraversal, object): None) def set_pos_info(self, node): - if hasattr(node, 'offset'): - if node.offset in self.linestarts: - self.line_number = self.linestarts[node.offset] + if hasattr(node, 'linestart') and node.linestart: + self.line_number = node.linestart def preorder(self, node=None): super(SourceWalker, self).preorder(node) @@ -1129,6 +1134,7 @@ class SourceWalker(GenericASTTraversal, object): assert n == 'lc_body' self.write( '[ ') + if self.version >= 2.7: expr = n[0] list_iter = node[-1] @@ -1141,9 +1147,19 @@ class SourceWalker(GenericASTTraversal, object): # FIXME: use source line numbers for directing line breaks + line_number = self.line_number + last_line = self.f.getvalue().split("\n")[-1] + l = len(last_line) + indent = ' ' * (l-1) + self.preorder(expr) + line_number = self.indent_if_source_nl(line_number, indent) self.preorder(list_iter) - self.write( ' ]') + l2 = self.indent_if_source_nl(line_number, indent) + if l2 != line_number: + self.write(' ' * (len(indent) - len(self.indent) - 1) + ']') + else: + self.write( ' ]') self.prec = p self.prune() # stop recursing @@ -1631,9 +1647,8 @@ class SourceWalker(GenericASTTraversal, object): self.write(sep) name = self.traverse(l[i], indent='') if i > 0: - if (line_number != self.line_number): - self.write("\n" + self.indent + INDENT_PER_LEVEL[:-1]) - pass + line_number = self.indent_if_source_nl(line_number, + self.indent + INDENT_PER_LEVEL[:-1]) line_number = self.line_number self.write(name, ': ') value = self.traverse(l[i+1], indent=self.indent+(len(name)+2)*' ') @@ -1658,9 +1673,8 @@ class SourceWalker(GenericASTTraversal, object): self.write(sep) name = self.traverse(l[i+1], indent='') if i > 0: - if (line_number != self.line_number): - self.write("\n" + self.indent + INDENT_PER_LEVEL[:-1]) - pass + line_number = self.indent_if_source_nl(line_number, + self.indent + INDENT_PER_LEVEL[:-1]) pass line_number = self.line_number self.write(name, ': ') @@ -1689,13 +1703,12 @@ class SourceWalker(GenericASTTraversal, object): # kv3 ::= expr expr STORE_MAP # FIXME: DRY this and the above + indent = self.indent + " " if kv == 'kv': self.write(sep) name = self.traverse(kv[-2], indent='') if first_time: - if (line_number != self.line_number): - self.write("\n" + self.indent + " ") - pass + line_number = self.indent_if_source_nl(line_number, indent) first_time = False pass line_number = self.line_number @@ -1705,9 +1718,7 @@ class SourceWalker(GenericASTTraversal, object): self.write(sep) name = self.traverse(kv[1], indent='') if first_time: - if (line_number != self.line_number): - self.write("\n" + self.indent + " ") - pass + line_number = self.indent_if_source_nl(line_number, indent) first_time = False pass line_number = self.line_number @@ -1717,9 +1728,7 @@ class SourceWalker(GenericASTTraversal, object): self.write(sep) name = self.traverse(kv[-2], indent='') if first_time: - if (line_number != self.line_number): - self.write("\n" + self.indent + " ") - pass + line_number = self.indent_if_source_nl(line_number, indent) first_time = False pass line_number = self.line_number @@ -1890,18 +1899,9 @@ class SourceWalker(GenericASTTraversal, object): node[0].attr == 1): self.write(',') elif typ == 'c': - # FIXME: In Python3 sometimes like from - # importfrom - # importlist2 - # import_as - # designator - # STORE_NAME 'load_entry_point' - # POP_TOP '' (2, (0, 1)) - # we get that weird POP_TOP tuple, e.g (2, (0,1)). - # Why? and - # Is there some sort of invalid bounds access going on? if isinstance(entry[arg], int): - self.preorder(node[entry[arg]]) + entry_node = node[entry[arg]] + self.preorder(entry_node) arg += 1 elif typ == 'p': p = self.prec From abecb21671619a4f7aecdea459270bf837f06ad0 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 24 Nov 2016 21:41:23 -0500 Subject: [PATCH 07/15] 2.7 grammar bug workaround. Fix docstring bug --- test/ok_lib2.7/{cmd.pyc => cmd.pyc-notyet} | Bin .../{codeop.pyc => codeop.pyc_notyet} | Bin test/ok_lib2.7/compiler/syntax.pyc | Bin 2140 -> 2140 bytes test/ok_lib2.7/{dis.pyc-notyet => dis.pyc} | Bin uncompyle6/parsers/parse27.py | 5 ++++- uncompyle6/scanners/scanner2.py | 4 ++++ uncompyle6/semantics/helper.py | 14 ++++++++------ uncompyle6/semantics/pysource.py | 6 +++--- 8 files changed, 19 insertions(+), 10 deletions(-) rename test/ok_lib2.7/{cmd.pyc => cmd.pyc-notyet} (100%) rename test/ok_lib2.7/{codeop.pyc => codeop.pyc_notyet} (100%) rename test/ok_lib2.7/{dis.pyc-notyet => dis.pyc} (100%) diff --git a/test/ok_lib2.7/cmd.pyc b/test/ok_lib2.7/cmd.pyc-notyet similarity index 100% rename from test/ok_lib2.7/cmd.pyc rename to test/ok_lib2.7/cmd.pyc-notyet diff --git a/test/ok_lib2.7/codeop.pyc b/test/ok_lib2.7/codeop.pyc_notyet similarity index 100% rename from test/ok_lib2.7/codeop.pyc rename to test/ok_lib2.7/codeop.pyc_notyet diff --git a/test/ok_lib2.7/compiler/syntax.pyc b/test/ok_lib2.7/compiler/syntax.pyc index a6b2a86035a005a7d326b6158f488b73f2bbc5c4..ba08ba4890630cf617d1f268b24461df7d0602a8 100644 GIT binary patch delta 17 Ycmca3a7TcH`7 Date: Fri, 25 Nov 2016 12:30:42 -0500 Subject: [PATCH 08/15] Start grammar reduction checks --- __pkginfo__.py | 2 +- test/Makefile | 2 +- ...le1.pyc => 02_while1_if_while1.pyc-notyet} | Bin ...while1.pyc => 04_while1_while1.pyc-notyet} | Bin uncompyle6/parser.py | 19 ++++++++++++ uncompyle6/parsers/parse2.py | 15 +++++++-- uncompyle6/parsers/parse3.py | 29 +++++++++++++++--- 7 files changed, 58 insertions(+), 9 deletions(-) rename test/bytecode_3.0/{02_while1_if_while1.pyc => 02_while1_if_while1.pyc-notyet} (100%) rename test/bytecode_3.4/{04_while1_while1.pyc => 04_while1_while1.pyc-notyet} (100%) diff --git a/__pkginfo__.py b/__pkginfo__.py index 7ee929f1..076581bc 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -37,7 +37,7 @@ entry_points={ 'pydisassemble=uncompyle6.bin.pydisassemble:main', ]} ftp_url = None -install_requires = ['spark-parser >= 1.4.3, < 1.5.0', +install_requires = ['spark-parser >= 1.5.0, < 1.6.0', 'xdis >= 3.2.3, < 3.3.0'] license = 'MIT' mailing_list = 'python-debugger@googlegroups.com' diff --git a/test/Makefile b/test/Makefile index 2f8e56ca..343d4c19 100644 --- a/test/Makefile +++ b/test/Makefile @@ -104,7 +104,7 @@ check-bytecode-2.6: #: Check deparsing Python 2.7 check-bytecode-2.7: - $(PYTHON) test_pythonlib.py --bytecode-2.7 + $(PYTHON) test_pythonlib.py --bytecode-2.7 --verify #: Check deparsing Python 3.0 check-bytecode-3.0: diff --git a/test/bytecode_3.0/02_while1_if_while1.pyc b/test/bytecode_3.0/02_while1_if_while1.pyc-notyet similarity index 100% rename from test/bytecode_3.0/02_while1_if_while1.pyc rename to test/bytecode_3.0/02_while1_if_while1.pyc-notyet diff --git a/test/bytecode_3.4/04_while1_while1.pyc b/test/bytecode_3.4/04_while1_while1.pyc-notyet similarity index 100% rename from test/bytecode_3.4/04_while1_while1.pyc rename to test/bytecode_3.4/04_while1_while1.pyc-notyet diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index ea730dfc..ee9abcb1 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -69,6 +69,25 @@ class PythonParser(GenericASTBuilder): for i in dir(self): setattr(self, i, None) + def debug_reduce(self, rule, tokens, parent, i): + """Customized format and print for our kind of tokens + which gets called in debugging grammar reduce rules + """ + prefix = '' + if parent and tokens: + p_token = tokens[parent] + if hasattr(p_token, 'linestart') and p_token.linestart: + prefix = 'L.%3d: ' % p_token.linestart + else: + prefix = ' ' + if hasattr(p_token, 'offset'): + prefix += "%3d " % p_token.offset + prefix += " " + else: + prefix = ' ' + + print("%s%s ::= %s" % (prefix, rule[0], ' '.join(rule[1]))) + def error(self, instructions, index): # Find the last line boundary for start in range(index, -1, -1): diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index 530c3585..6778f014 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -241,7 +241,7 @@ class Python2Parser(PythonParser): """ def add_custom_rules(self, tokens, customize): - ''' + """ Special handling for opcodes such as those that take a variable number of arguments -- we add a new rule for each: @@ -260,7 +260,7 @@ class Python2Parser(PythonParser): expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP PyPy adds custom rules here as well - ''' + """ for opname, v in list(customize.items()): opname_base = opname[:opname.rfind('_')] if opname == 'PyPy': @@ -389,6 +389,17 @@ class Python2Parser(PythonParser): else: raise Exception('unknown customize token %s' % opname) self.add_unique_rule(rule, opname_base, v, customize) + pass + self.check_reduce['augassign1'] = 'AST' + self.check_reduce['augassign2'] = 'AST' + return + + def reduce_is_invalid(self, rule, ast, tokens, first, last): + lhs = rule[0] + if lhs in ('augassign1', 'augassign2') and ast[0][0] == 'and': + return True + # Add more stuff, like COME_FROM checking + return False class Python2ParserSingle(Python2Parser, PythonParserSingle): pass diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index adc52a86..0c1e7112 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -146,8 +146,6 @@ class Python3Parser(PythonParser): ifelsestmtr ::= testexpr return_if_stmts return_stmts ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel - ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel JUMP_BACK COME_FROM_LOOP - ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel COME_FROM_LOOP # FIXME: this feels like a hack. Is it just 1 or two @@ -335,9 +333,6 @@ class Python3Parser(PythonParser): whilestmt ::= SETUP_LOOP testexpr return_stmts POP_BLOCK COME_FROM_LOOP - whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK - else_suite COME_FROM_LOOP - while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK else_suite @@ -348,6 +343,7 @@ class Python3Parser(PythonParser): # FIXME: Python 3.? starts adding branch optimization? Put this starting there. while1stmt ::= SETUP_LOOP l_stmts + while1stmt ::= SETUP_LOOP l_stmts COME_FROM_LOOP # FIXME: investigate - can code really produce a NOP? whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP @@ -680,8 +676,31 @@ class Python3Parser(PythonParser): rule = ('mkfunc ::= %sload_closure LOAD_CONST %s' % ('expr ' * args_pos, opname)) self.add_unique_rule(rule, opname, token.attr, customize) + pass + self.check_reduce['augassign1'] = 'AST' + self.check_reduce['augassign2'] = 'AST' + self.check_reduce['while1stmt'] = 'noAST' return + def reduce_is_invalid(self, rule, ast, tokens, first, last): + lhs = rule[0] + if lhs in ('augassign1', 'augassign2') and ast[0][0] == 'and': + return True + elif lhs == 'while1stmt': + # Skip COME_FROM tokens + skip = 0 + if tokens[last] != 'COME_FROM_LOOP': + skip = 1 + while last+skip < len(tokens) and isinstance(tokens[last+skip].offset, str): + last += 1 + if last + skip < len(tokens): + offset = tokens[last+skip].offset + assert tokens[first] == 'SETUP_LOOP' + if offset != tokens[first].attr: + return True + return False + return False + class Python30Parser(Python3Parser): def p_30(self, args): From 1e324e0e8db0d97c1ea63c577805a7b00720b83d Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 26 Nov 2016 21:41:45 -0500 Subject: [PATCH 09/15] Misc changes scanner26.py: make scanner2.py and scanner26.py more alike scanner2.py: check that return stmt is last in list. (May change) main.py: show filename on verify error test/*: add more --- test/bytecode_2.6/01_boolean.pyc | Bin 157 -> 0 bytes test/bytecode_2.6/03_elif_vs_continue.pyc | Bin 0 -> 534 bytes .../bug26/03_elif_vs_continue.py | 18 ++++++ uncompyle6/main.py | 7 +- uncompyle6/parser.py | 4 +- uncompyle6/parsers/parse2.py | 11 +++- uncompyle6/scanners/scanner2.py | 61 +++++++++--------- uncompyle6/scanners/scanner26.py | 5 +- 8 files changed, 70 insertions(+), 36 deletions(-) delete mode 100644 test/bytecode_2.6/01_boolean.pyc create mode 100644 test/bytecode_2.6/03_elif_vs_continue.pyc create mode 100644 test/simple_source/bug26/03_elif_vs_continue.py diff --git a/test/bytecode_2.6/01_boolean.pyc b/test/bytecode_2.6/01_boolean.pyc deleted file mode 100644 index d65f034709f11eccfeb7e7fd37fcb0f541dc47dc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 157 zcmcckiI?lqw~DZ21}I41TPoKQ%ZAE?LbBsGXV(}MgW{wA6EbX diff --git a/test/bytecode_2.6/03_elif_vs_continue.pyc b/test/bytecode_2.6/03_elif_vs_continue.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74189e25bbcd4c5e18de63398fce25a8dcfe1bb6 GIT binary patch literal 534 zcmbVJO-sW-5Pg%TABGk@6#s!3JXlfiAW{VDO?qe#iYOr_+q%+ZV|G(2Bp3A$`d9n~ z&aMUTE<0~#-p=g2O}@Sd!Fd1tF2Qzr>@V5$E<;L4fltCc2ZVWOPZOV^%Vzx*n{F6t z4-#m6)Ggcd(t`>>+|;y?6KHdT8kjsz@Zh0oqv@b-qb67BgI*6nmhc=gl8xiEbWW>s!vCa%j;m7#rz zv5-8+r5486{>3U@YS)ygRQytMQx%R9nKUc}P=#~xI^+Zn^dU!DX2nWGT2*BtMo-o9 z#x>?U)LA1Tykod5s; literal 0 HcmV?d00001 diff --git a/test/simple_source/bug26/03_elif_vs_continue.py b/test/simple_source/bug26/03_elif_vs_continue.py new file mode 100644 index 00000000..bcc27bfd --- /dev/null +++ b/test/simple_source/bug26/03_elif_vs_continue.py @@ -0,0 +1,18 @@ +# Bug was using continue fouling up 1st elif, by confusing +# the "pass" for "continue" by not recognizing the if jump +# around it. We fixed by ignoring what's done in Python 2.7 +# Better is better detection of control structures + +def _compile_charset(charset, flags, code, fixup=None): + # compile charset subprogram + emit = code.append + if fixup is None: + fixup = 1 + for op, av in charset: + if op is flags: + pass + elif op is code: + emit(fixup(av)) + else: + raise RuntimeError + emit(5) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 9c3de62a..256958a0 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -189,17 +189,16 @@ def main(in_base, out_base, files, codes, outfile=None, print(e) verify_failed_files += 1 os.rename(outfile, outfile + '_unverified') + sys.stderr.write("### Error Verifying %s\n" % filename) + sys.stderr.write(str(e) + "\n") if not outfile: - print("### Error Verifiying %s" % filename, file=sys.stderr) - print(e, file=sys.stderr) if raise_on_error: raise pass pass pass elif do_verify: - print("\n### uncompile successful, but no file to compare against", - file=sys.stderr) + sys.stderr.write("\n### uncompile successful, but no file to compare against\n") pass else: okay_files += 1 diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index ee9abcb1..5909e95e 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -81,7 +81,7 @@ class PythonParser(GenericASTBuilder): else: prefix = ' ' if hasattr(p_token, 'offset'): - prefix += "%3d " % p_token.offset + prefix += "%3s " % str(p_token.offset) prefix += " " else: prefix = ' ' @@ -485,6 +485,8 @@ class PythonParser(GenericASTBuilder): _mklambda ::= load_closure mklambda _mklambda ::= mklambda + # "and" where the first part of the and is true, + # so there is only the 2nd part to evaluate and2 ::= _jump jmp_false COME_FROM expr COME_FROM expr ::= conditional diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index 6778f014..00565e4c 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -392,13 +392,22 @@ class Python2Parser(PythonParser): pass self.check_reduce['augassign1'] = 'AST' self.check_reduce['augassign2'] = 'AST' + self.check_reduce['_stmts'] = 'AST' return def reduce_is_invalid(self, rule, ast, tokens, first, last): lhs = rule[0] if lhs in ('augassign1', 'augassign2') and ast[0][0] == 'and': return True - # Add more stuff, like COME_FROM checking + elif lhs == '_stmts': + for i, stmt in enumerate(ast): + if stmt == '_stmts': + stmt = stmt[0] + assert stmt == 'stmt' + if stmt[0] == 'return_stmt': + return i+1 != len(ast) + pass + return False return False class Python2ParserSingle(Python2Parser, PythonParserSingle): diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 389f37ad..1a6e9ec4 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -166,9 +166,9 @@ class Scanner2(scan.Scanner): # continue # last_offset = jump_offset come_from_name = 'COME_FROM' - opname = self.opc.opname[self.code[jump_offset]] - if opname.startswith('SETUP_') and self.version == 2.7: - come_from_type = opname[len('SETUP_'):] + op_name = self.opc.opname[self.code[jump_offset]] + if op_name.startswith('SETUP_') and self.version == 2.7: + come_from_type = op_name[len('SETUP_'):] if come_from_type not in ('LOOP', 'EXCEPT'): come_from_name = 'COME_FROM_%s' % come_from_type pass @@ -179,7 +179,7 @@ class Scanner2(scan.Scanner): jump_idx += 1 op = self.code[offset] - opname = self.opc.opname[op] + op_name = self.opc.opname[op] oparg = None; pattr = None has_arg = op_has_argument(op, self.opc) @@ -194,14 +194,14 @@ class Scanner2(scan.Scanner): if iscode(const): oparg = const if const.co_name == '': - assert opname == 'LOAD_CONST' - opname = 'LOAD_LAMBDA' + assert op_name == 'LOAD_CONST' + op_name = 'LOAD_LAMBDA' elif const.co_name == '': - opname = 'LOAD_GENEXPR' + op_name = 'LOAD_GENEXPR' elif const.co_name == '': - opname = 'LOAD_DICTCOMP' + op_name = 'LOAD_DICTCOMP' elif const.co_name == '': - opname = 'LOAD_SETCOMP' + op_name = 'LOAD_SETCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) @@ -237,20 +237,20 @@ class Scanner2(scan.Scanner): self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE: continue else: - if self.is_pypy and not oparg and opname == 'BUILD_MAP': - opname = 'BUILD_MAP_n' + if self.is_pypy and not oparg and op_name == 'BUILD_MAP': + op_name = 'BUILD_MAP_n' else: - opname = '%s_%d' % (opname, oparg) + op_name = '%s_%d' % (op_name, oparg) if op != self.opc.BUILD_SLICE: - customize[opname] = oparg - elif self.is_pypy and opname in ('LOOKUP_METHOD', + customize[op_name] = oparg + elif self.is_pypy and op_name in ('LOOKUP_METHOD', 'JUMP_IF_NOT_DEBUG', 'SETUP_EXCEPT', 'SETUP_FINALLY'): # The value in the dict is in special cases in semantic actions, such # as CALL_FUNCTION. The value is not used in these cases, so we put # in arbitrary value 0. - customize[opname] = 0 + customize[op_name] = 0 elif op == self.opc.JUMP_ABSOLUTE: # Further classify JUMP_ABSOLUTE into backward jumps # which are used in loops, and "CONTINUE" jumps which @@ -269,16 +269,16 @@ class Scanner2(scan.Scanner): and self.code[offset+3] not in (self.opc.END_FINALLY, self.opc.POP_BLOCK) and offset not in self.not_continue): - opname = 'CONTINUE' + op_name = 'CONTINUE' else: - opname = 'JUMP_BACK' + op_name = 'JUMP_BACK' elif op == self.opc.LOAD_GLOBAL: if offset in self.load_asserts: - opname = 'LOAD_ASSERT' + op_name = 'LOAD_ASSERT' elif op == self.opc.RETURN_VALUE: if offset in self.return_end_ifs: - opname = 'RETURN_END_IF' + op_name = 'RETURN_END_IF' if offset in self.linestartoffsets: linestart = self.linestartoffsets[offset] @@ -287,7 +287,7 @@ class Scanner2(scan.Scanner): if offset not in replace: tokens.append(Token( - opname, oparg, pattr, offset, linestart, op, + op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc)) else: tokens.append(Token( @@ -782,21 +782,23 @@ class Scanner2(scan.Scanner): if offset in self.ignore_if: return - if code[pre[rtarget]] == self.opc.JUMP_ABSOLUTE and pre[rtarget] in self.stmts \ - and pre[rtarget] != offset and pre[pre[rtarget]] != offset: - if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK: - if code[pre[pre[rtarget]]] != self.opc.JUMP_ABSOLUTE: - pass - elif self.get_target(pre[pre[rtarget]]) != target: - pass + if self.version == 2.7: + if code[pre[rtarget]] == self.opc.JUMP_ABSOLUTE and pre[rtarget] in self.stmts \ + and pre[rtarget] != offset and pre[pre[rtarget]] != offset: + if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK: + if code[pre[pre[rtarget]]] != self.opc.JUMP_ABSOLUTE: + pass + elif self.get_target(pre[pre[rtarget]]) != target: + pass + else: + rtarget = pre[rtarget] else: rtarget = pre[rtarget] - else: - rtarget = pre[rtarget] # Does the "if" jump just beyond a jump op, then this is probably an if statement pre_rtarget = pre[rtarget] code_pre_rtarget = code[pre_rtarget] + if code_pre_rtarget in self.jump_forward: if_end = self.get_target(pre_rtarget) @@ -824,6 +826,7 @@ class Scanner2(scan.Scanner): self.structs.append({'type': 'if-then', 'start': start-3, 'end': pre_rtarget}) + self.not_continue.add(pre_rtarget) if rtarget < end: diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 4f65d9f1..fbd2764a 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -233,7 +233,7 @@ class Scanner26(scan.Scanner2): if op != self.opc.BUILD_SLICE: customize[op_name] = oparg elif op == self.opc.JUMP_ABSOLUTE: - # Further classifhy JUMP_ABSOLUTE into backward jumps + # Further classify JUMP_ABSOLUTE into backward jumps # which are used in loops, and "CONTINUE" jumps which # may appear in a "continue" statement. The loop-type # and continue-type jumps will help us classify loop @@ -254,6 +254,9 @@ class Scanner26(scan.Scanner2): # if x: continue # the "continue" is not on a new line. if tokens[-1].type == 'JUMP_BACK': + # We need 'intern' since we have + # already have processed the previous + # token. tokens[-1].type = intern('CONTINUE') elif op == self.opc.LOAD_GLOBAL: From 97576e473d06545f9fbb2f602379b64175734a50 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 27 Nov 2016 07:02:17 -0500 Subject: [PATCH 10/15] Python 3 while/else bug --- test/bytecode_3.3/03_while_else.pyc | Bin 0 -> 383 bytes test/simple_source/bug33/03_while_else.py | 8 ++++++++ uncompyle6/parsers/parse3.py | 17 ++++++++++------- uncompyle6/scanners/scanner3.py | 5 +++-- 4 files changed, 21 insertions(+), 9 deletions(-) create mode 100644 test/bytecode_3.3/03_while_else.pyc create mode 100644 test/simple_source/bug33/03_while_else.py diff --git a/test/bytecode_3.3/03_while_else.pyc b/test/bytecode_3.3/03_while_else.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e772deef2a4591d7469d786671cce3dab94c9171 GIT binary patch literal 383 zcmbQo!^hS>Zq<10!k}p=SBHg@Da0 zWd&-FPtMOv0khl^bBa?-S%6|8MWv}4AdA4f;?$h9QV_2wSp!HifQeESAgeetw;(4q zzBs?MC^=O>sWjc#Sl_@nzC0rnD4Gg1O|PJ`lpUxrF}bibvnUm0NHG_XU|{58 Date: Sun, 27 Nov 2016 14:20:35 -0500 Subject: [PATCH 11/15] Limitations of decompiling control structures. --- README.rst | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index b0269ec7..fb9f7be7 100644 --- a/README.rst +++ b/README.rst @@ -97,7 +97,8 @@ Known Bugs/Restrictions ----------------------- The biggest known and possibly fixable (but hard) problem has to do -with handling control flow. In some cases we can detect an erroneous +with handling control flow. All of the Python decomilers I have looked +at have the same problem. In some cases we can detect an erroneous decompilation and report that. About 90% of the decompilation of Python standard library packages in @@ -109,14 +110,17 @@ Other versions drop off in quality too. a Python for that bytecode version, and then comparing the bytecode produced by the decompiled/compiled program. Some allowance is made for inessential differences. But other semantically equivalent -differences are not caught. For example ``if x: foo()`` is -equivalent to ``x and foo()`` and decompilation may turn one into the -other. *Weak Verification* on the other hand doesn't check bytecode -for equivalence but does check to see if the resulting decompiled -source is a valid Python program by running the Python -interpreter. Because the Python language has changed so much, for best -results you should use the same Python Version in checking as used in -the bytecode. +differences are not caught. For example ``1 and 0`` is decompiled to +the equivalent ``0``; remnants of the first true evaluation (1) is +lost when Python compiles this. When Python next compiles ``0`` the +resulting code is simpler. + +*Weak Verification* +on the other hand doesn't check bytecode for equivalence but does +check to see if the resulting decompiled source is a valid Python +program by running the Python interpreter. Because the Python language +has changed so much, for best results you should use the same Python +Version in checking as used in the bytecode. Later distributions average about 200 files. There is some work to do on the lower end Python versions which is more difficult for us to From 3c02fa7e36631fe7346ce21f4d6a711dcc978c7f Mon Sep 17 00:00:00 2001 From: "R. Bernstein" Date: Mon, 28 Nov 2016 07:47:18 -0500 Subject: [PATCH 12/15] Update README.rst --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index fb9f7be7..1a5020b0 100644 --- a/README.rst +++ b/README.rst @@ -43,7 +43,8 @@ information. Requirements ------------ -This project requires Python 2.6 or later, PyPy 3-2.4, or PyPy-5.0.1. +This project requires Python 2.6 or later, PyPy 3-2.4, or PyPy-5.0.1. +Python versions 2.3-2.7 are supported in the python-2.4 branch. The bytecode files it can read has been tested on Python bytecodes from versions 2.1-2.7, and 3.2-3.6 and the above-mentioned PyPy versions. @@ -97,7 +98,7 @@ Known Bugs/Restrictions ----------------------- The biggest known and possibly fixable (but hard) problem has to do -with handling control flow. All of the Python decomilers I have looked +with handling control flow. All of the Python decompilers I have looked at have the same problem. In some cases we can detect an erroneous decompilation and report that. From a5a0f45ddef2f7136dbbc4cf66c1d061e2985e8c Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 28 Nov 2016 07:53:32 -0500 Subject: [PATCH 13/15] Try new spark 2.5.1 grammar syntax shortcuts This package I now declare stable --- __pkginfo__.py | 4 ++-- uncompyle6/parser.py | 6 ++---- uncompyle6/parsers/parse2.py | 17 +++++++---------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/__pkginfo__.py b/__pkginfo__.py index 076581bc..eb2edd2b 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -12,7 +12,7 @@ copyright = """ Copyright (C) 2015, 2016 Rocky Bernstein . """ -classifiers = ['Development Status :: 4 - Beta', +classifiers = ['Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Operating System :: OS Independent', 'Programming Language :: Python', @@ -37,7 +37,7 @@ entry_points={ 'pydisassemble=uncompyle6.bin.pydisassemble:main', ]} ftp_url = None -install_requires = ['spark-parser >= 1.5.0, < 1.6.0', +install_requires = ['spark-parser >= 1.5.1, < 1.6.0', 'xdis >= 3.2.3, < 3.3.0'] license = 'MIT' mailing_list = 'python-debugger@googlegroups.com' diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 5909e95e..05b71188 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -157,8 +157,7 @@ class PythonParser(GenericASTBuilder): """ passstmt ::= - _stmts ::= _stmts stmt - _stmts ::= stmt + _stmts ::= stmt+ # statements with continue c_stmts ::= _stmts @@ -270,8 +269,7 @@ class PythonParser(GenericASTBuilder): # Zero or one COME_FROM # And/or expressions have this - come_from_opt ::= COME_FROM - come_from_opt ::= + come_from_opt ::= COME_FROM? """ def p_dictcomp(self, args): diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index 00565e4c..b0e440c2 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -25,20 +25,18 @@ class Python2Parser(PythonParser): self.new_rules = set() def p_print2(self, args): - ''' + """ stmt ::= print_items_stmt stmt ::= print_nl stmt ::= print_items_nl_stmt print_items_stmt ::= expr PRINT_ITEM print_items_opt print_items_nl_stmt ::= expr PRINT_ITEM print_items_opt PRINT_NEWLINE_CONT - print_items_opt ::= print_items - print_items_opt ::= - print_items ::= print_items print_item - print_items ::= print_item - print_item ::= expr PRINT_ITEM_CONT - print_nl ::= PRINT_NEWLINE - ''' + print_items_opt ::= print_items? + print_items ::= print_item+ + print_item ::= expr PRINT_ITEM_CONT + print_nl ::= PRINT_NEWLINE + """ def p_stmt2(self, args): """ @@ -169,8 +167,7 @@ class Python2Parser(PythonParser): try_middle ::= jmp_abs COME_FROM except_stmts END_FINALLY - except_stmts ::= except_stmts except_stmt - except_stmts ::= except_stmt + except_stmts ::= except_stmt+ except_stmt ::= except_cond1 except_suite except_stmt ::= except From 9cc27001601fc5cdd11d43a71c2ee8cb33e490b6 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 28 Nov 2016 23:49:43 -0500 Subject: [PATCH 14/15] Shorten Python3 grammars with + and * --- uncompyle6/parsers/parse3.py | 3 +-- uncompyle6/parsers/parse36.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 4e3a172a..5d58845c 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -100,8 +100,7 @@ class Python3Parser(PythonParser): del_stmt ::= expr DELETE_ATTR kwarg ::= LOAD_CONST expr - kwargs ::= kwargs kwarg - kwargs ::= + kwargs ::= kwarg* classdef ::= build_class designator diff --git a/uncompyle6/parsers/parse36.py b/uncompyle6/parsers/parse36.py index 47a256db..c9dc2358 100644 --- a/uncompyle6/parsers/parse36.py +++ b/uncompyle6/parsers/parse36.py @@ -17,8 +17,7 @@ class Python36Parser(Python35Parser): def p_36misc(self, args): """ fstring_multi ::= fstring_expr_or_strs BUILD_STRING - fstring_expr_or_strs ::= fstring_expr_or_strs fstring_expr_or_str - fstring_expr_or_strs ::= fstring_expr_or_str + fstring_expr_or_strs ::= fstring_expr_or_str+ """ def add_custom_rules(self, tokens, customize): From d22931cb49f0e28a0fbe48a7c1526b1f170a5b52 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 4 Dec 2016 07:31:34 -0500 Subject: [PATCH 15/15] Get ready for release 2.9.7 Some of the many lint things. Linting is kind of stupid though. --- ChangeLog | 101 +++++++++++++++++++++++++- NEWS | 8 ++ uncompyle6/main.py | 2 - uncompyle6/parser.py | 6 +- uncompyle6/parsers/parse26.py | 5 +- uncompyle6/parsers/parse3.py | 17 ++--- uncompyle6/parsers/parse34.py | 4 +- uncompyle6/parsers/parse35.py | 4 +- uncompyle6/scanner.py | 3 +- uncompyle6/scanners/scanner2.py | 1 - uncompyle6/scanners/scanner23.py | 2 +- uncompyle6/scanners/scanner24.py | 2 +- uncompyle6/scanners/scanner3.py | 1 - uncompyle6/scanners/tok.py | 2 +- uncompyle6/semantics/make_function.py | 2 +- uncompyle6/semantics/pysource.py | 2 +- uncompyle6/verify.py | 2 +- uncompyle6/version.py | 2 +- 18 files changed, 134 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index e1ee5ee0..26f9f2e4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,105 @@ +2016-12-04 rocky + + * uncompyle6/version.py: Get ready for release 2.9.7 + +2016-11-28 rocky + + * uncompyle6/parsers/parse3.py, uncompyle6/parsers/parse36.py: + Shorten Python3 grammars with + and * + +2016-11-28 rocky + + * __pkginfo__.py, uncompyle6/parser.py, + uncompyle6/parsers/parse2.py: Try new spark 2.5.1 grammar syntax + shortcuts This package I now declare stable + +2016-11-28 R. Bernstein + + * README.rst: Update README.rst + +2016-11-27 rocky + + * README.rst: Limitations of decompiling control structures. + +2016-11-27 R. Bernstein + + * : Merge pull request #69 from rocky/ast-reduce-checks AST reduce checks + +2016-11-26 rocky + + * test/simple_source/bug26/03_elif_vs_continue.py, + uncompyle6/main.py, uncompyle6/parser.py, + uncompyle6/parsers/parse2.py, uncompyle6/scanners/scanner2.py, + uncompyle6/scanners/scanner26.py: Misc changes scanner26.py: make scanner2.py and scanner26.py more alike + scanner2.py: check that return stmt is last in list. (May change) + main.py: show filename on verify error test/*: add more + +2016-11-25 rocky + + * __pkginfo__.py, test/Makefile, uncompyle6/parser.py, + uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py: Start + grammar reduction checks + +2016-11-24 rocky + + * uncompyle6/parsers/parse27.py, uncompyle6/scanners/scanner2.py, + uncompyle6/semantics/helper.py, uncompyle6/semantics/pysource.py: + 2.7 grammar bug workaround. Fix docstring bug + +2016-11-24 rocky + + * uncompyle6/semantics/pysource.py: Better line number tracking Indent Python 2 list comprehensions, albeit badly. DRY code a + little via indent_if_source_nl + +2016-11-24 rocky + + * uncompyle6/parsers/parse3.py, uncompyle6/scanners/scanner2.py: + <2.7 "if" detection and dup Python 3 grammar rule + +2016-11-23 rocky + + * __pkginfo__.py, pytest/test_grammar.py, uncompyle6/parser.py, + uncompyle6/parsers/parse26.py: Python 2.6 grammary bug and.. __pkginfo.py__: Bump spark_parser version for parse_flags 'dups' + +2016-11-23 rocky + + * __pkginfo__.py: Note that we now work on 2.4 and 2.5 + +2016-11-23 rocky + + * : commit 6aa1531972de83ecab15b4c96b89c873ea5a7458 Author: rocky + Date: Wed Nov 23 00:48:38 2016 -0500 + +2016-11-22 rocky + + * uncompyle6/parsers/parse3.py, uncompyle6/parsers/parse32.py, + uncompyle6/parsers/parse33.py, uncompyle6/parsers/parse34.py, + uncompyle6/parsers/parse35.py: DRY Python3 grammar + +2016-11-22 rocky + + * uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse27.py, + uncompyle6/scanners/scanner2.py: More detailed COME_FROMs For now we only add COME_FROM_FINALLY and COME_FROM_WITH and even + here only on 2.7 + +2016-11-22 rocky + + * circle.yml, pytest/test_grammar.py, tox.ini, + uncompyle6/parser.py, uncompyle6/parsers/parse2.py, + uncompyle6/parsers/parse27.py: Remove redundant 2.7 (and 2.x) + grammar rules + +2016-11-22 rocky + + * pytest/test_docstring.py, uncompyle6/linenumbers.py, + uncompyle6/semantics/fragments.py, uncompyle6/semantics/helper.py, + uncompyle6/semantics/make_function.py, + uncompyle6/semantics/pysource.py: Split out print_docstring move from pysource.py to new helper.py + 2016-11-20 rocky - * uncompyle6/version.py: Get ready for release 2.9.6 + * ChangeLog, NEWS, uncompyle6/version.py: Get ready for release + 2.9.6 2016-11-20 R. Bernstein diff --git a/NEWS b/NEWS index 17e3d784..38fd8760 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,11 @@ +uncompyle6 2.9.6 2016-12-04 + +- Shorten Python3 grammars with + and * + this requires spark parser 1.5.1 +- Add some AST reduction checks to improve + decompile accuracy. This too requires + spark parser 1.5.1 + uncompyle6 2.9.6 2016-11-20 - Correct MANIFEST.in diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 256958a0..585166f1 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -49,7 +49,6 @@ def uncompyle( raise pysource.SourceWalkerError(str(e)) - def uncompyle_file(filename, outstream=None, showasm=None, showast=False, showgrammar=False): """ @@ -61,7 +60,6 @@ def uncompyle_file(filename, outstream=None, showasm=None, showast=False, (version, timestamp, magic_int, co, is_pypy, source_size) = load_module(filename, code_objects) - if type(co) == list: for con in co: uncompyle(version, con, outstream, showasm, showast, diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 05b71188..0ddc7a83 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -136,9 +136,9 @@ class PythonParser(GenericASTBuilder): # print >> sys.stderr, 'resolve', str(list) return GenericASTBuilder.resolve(self, list) - ############################################## - ## Common Python 2 and Python 3 grammar rules - ############################################## + ############################################### + # Common Python 2 and Python 3 grammar rules # + ############################################### def p_start(self, args): ''' # The start or goal symbol diff --git a/uncompyle6/parsers/parse26.py b/uncompyle6/parsers/parse26.py index c485b00f..bb6cdffa 100644 --- a/uncompyle6/parsers/parse26.py +++ b/uncompyle6/parsers/parse26.py @@ -13,7 +13,6 @@ class Python26Parser(Python2Parser): super(Python26Parser, self).__init__(debug_parser) self.customized = {} - def p_try_except26(self, args): """ except_stmt ::= except_cond3 except_suite @@ -246,8 +245,8 @@ if __name__ == '__main__': """.split())) remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 5d58845c..ea1c3b7b 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -243,7 +243,6 @@ class Python3Parser(PythonParser): c_stmts_opt34 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt """ - def p_def_annotations3(self, args): """ # Annotated functions @@ -437,10 +436,10 @@ class Python3Parser(PythonParser): args_kw = (token.attr >> 8) & 0xff nak = ( len(opname)-len('CALL_FUNCTION') ) // 3 token.type = self.call_fn_name(token) - rule = ('call_function ::= expr ' - + ('pos_arg ' * args_pos) - + ('kwarg ' * args_kw) - + 'expr ' * nak + token.type) + rule = ('call_function ::= expr ' + + ('pos_arg ' * args_pos) + + ('kwarg ' * args_kw) + + 'expr ' * nak + token.type) self.add_unique_rule(rule, token.type, args_pos, customize) rule = ('classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d' % (('expr ' * (args_pos-1)), opname, args_pos)) @@ -632,10 +631,10 @@ class Python3Parser(PythonParser): # number of apply equiv arguments: nak = ( len(opname_base)-len('CALL_METHOD') ) // 3 - rule = ('call_function ::= expr ' - + ('pos_arg ' * args_pos) - + ('kwarg ' * args_kw) - + 'expr ' * nak + opname) + rule = ('call_function ::= expr ' + + ('pos_arg ' * args_pos) + + ('kwarg ' * args_kw) + + 'expr ' * nak + opname) self.add_unique_rule(rule, opname, token.attr, customize) elif opname.startswith('MAKE_CLOSURE'): # DRY with MAKE_FUNCTION diff --git a/uncompyle6/parsers/parse34.py b/uncompyle6/parsers/parse34.py index fa136f71..c3a93266 100644 --- a/uncompyle6/parsers/parse34.py +++ b/uncompyle6/parsers/parse34.py @@ -40,8 +40,8 @@ if __name__ == '__main__': """.split())) remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py index 032a7e18..c1852c82 100644 --- a/uncompyle6/parsers/parse35.py +++ b/uncompyle6/parsers/parse35.py @@ -66,8 +66,8 @@ if __name__ == '__main__': """.split())) remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 2d44490b..8f3c0a5d 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -27,7 +27,8 @@ if PYTHON3: intern = sys.intern L65536 = 65536 - def long(l): l + def long(l): + return l else: L65536 = long(65536) # NOQA diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 1a6e9ec4..6ba44029 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -902,7 +902,6 @@ class Scanner2(scan.Scanner): pass pass - # FIXME: All the < 2.7 conditions are is horrible. We need a better way. if label is not None and label != -1: # In Python < 2.7, the POP_TOP in: diff --git a/uncompyle6/scanners/scanner23.py b/uncompyle6/scanners/scanner23.py index 116c0eb8..0d8de282 100644 --- a/uncompyle6/scanners/scanner23.py +++ b/uncompyle6/scanners/scanner23.py @@ -25,5 +25,5 @@ class Scanner23(scan.Scanner24): # These are the only differences in initialization between # 2.3-2.6 self.version = 2.3 - self.genexpr_name = ''; + self.genexpr_name = '' return diff --git a/uncompyle6/scanners/scanner24.py b/uncompyle6/scanners/scanner24.py index 9ee88378..9f550025 100755 --- a/uncompyle6/scanners/scanner24.py +++ b/uncompyle6/scanners/scanner24.py @@ -25,5 +25,5 @@ class Scanner24(scan.Scanner25): self.opc = opcode_24 self.opname = opcode_24.opname self.version = 2.4 - self.genexpr_name = ''; + self.genexpr_name = '' return diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index b6526cb8..12ced628 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -128,7 +128,6 @@ class Scanner3(Scanner): varargs_ops.add(self.opc.CALL_METHOD) self.varargs_ops = frozenset(varargs_ops) - def opName(self, offset): return self.opc.opname[self.code[offset]] diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index a260a24c..fb1690fc 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -29,7 +29,7 @@ class Token: self.pattr = pattr self.offset = offset self.linestart = linestart - if has_arg == False: + if has_arg is False: self.attr = None self.pattr = None self.opc = opc diff --git a/uncompyle6/semantics/make_function.py b/uncompyle6/semantics/make_function.py index 1b61e2d7..0a941af3 100644 --- a/uncompyle6/semantics/make_function.py +++ b/uncompyle6/semantics/make_function.py @@ -38,7 +38,7 @@ def find_globals(node, globs): def find_none(node): for n in node: if isinstance(n, AST): - if not n in ('return_stmt', 'return_if_stmt'): + if n not in ('return_stmt', 'return_if_stmt'): if find_none(n): return True elif n.type == 'LOAD_CONST' and n.pattr is None: diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 4127c11c..256ce1b4 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -641,6 +641,7 @@ class SourceWalker(GenericASTTraversal, object): }) FSTRING_CONVERSION_MAP = {1: '!s', 2: '!r', 3: '!a'} + def f_conversion(node): node.conversion = FSTRING_CONVERSION_MAP.get(node.data[1].attr, '') @@ -897,7 +898,6 @@ class SourceWalker(GenericASTTraversal, object): pass self.write(')') - def n_LOAD_CONST(self, node): data = node.pattr; datatype = type(data) if isinstance(datatype, int) and data == minint: diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 0063be7b..c65666d4 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -317,7 +317,7 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, i2 += 2 continue elif tokens1[i1].type == 'LOAD_NAME' and tokens2[i2].type == 'LOAD_CONST' \ - and tokens1[i1].pattr == 'None' and tokens2[i2].pattr == None: + and tokens1[i1].pattr == 'None' and tokens2[i2].pattr is None: pass else: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], diff --git a/uncompyle6/version.py b/uncompyle6/version.py index e9ee1f29..5b5f9e44 100644 --- a/uncompyle6/version.py +++ b/uncompyle6/version.py @@ -1,3 +1,3 @@ # This file is suitable for sourcing inside bash as # well as importing into Python -VERSION='2.9.6' +VERSION='2.9.7'