diff --git a/test/bytecode_3.6/03_if_elif.pyc b/test/bytecode_3.6/03_if_elif.pyc-notyet similarity index 100% rename from test/bytecode_3.6/03_if_elif.pyc rename to test/bytecode_3.6/03_if_elif.pyc-notyet diff --git a/test/stdlib/runtests.sh b/test/stdlib/runtests.sh index 26badf00..38380cca 100755 --- a/test/stdlib/runtests.sh +++ b/test/stdlib/runtests.sh @@ -241,6 +241,7 @@ case $PYVERSION in SKIP_TESTS=( [test_atexit.py]=1 # [test_bdb.py]=1 # + [test_buffer.py]=1 # parse error [test_builtin.py]=1 # Fails on its own [test_compile.py]=1 [test_contains.py]=1 # Code "while False: yield None" is optimized away in compilation diff --git a/uncompyle6/parsers/__init__.py b/uncompyle6/parsers/__init__.py index e69de29b..6fe5648b 100644 --- a/uncompyle6/parsers/__init__.py +++ b/uncompyle6/parsers/__init__.py @@ -0,0 +1,11 @@ +"""Here we have parser grammars for the different Python versions. +Instead of full grammars, we have full grammars for certain Python versions +and the others indicate differences between a neighboring version. + +For example Python 2.6, 2.7, 3.2, and 3.7 are largely "base" versions +which work off off parse2.py, parse3.py, and parse37base.py. + +Some examples: +Python 3.3 diffs off of 3.2; 3.1 and 3.0 diff off of 3.2; Python 1.0..Python 2.5 diff off of +Python 2.6 and Python 3.8 diff off of 3.7 +""" diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 5cff63ae..b2e2445f 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -1459,6 +1459,7 @@ class Python3Parser(PythonParser): self.check_reduce["while1stmt"] = "noAST" self.check_reduce["while1elsestmt"] = "noAST" self.check_reduce["ifelsestmt"] = "AST" + self.check_reduce["ifstmt"] = "AST" self.check_reduce["annotate_tuple"] = "noAST" if not PYTHON3: self.check_reduce["kwarg"] = "noAST" @@ -1479,50 +1480,24 @@ class Python3Parser(PythonParser): elif lhs == "kwarg": arg = tokens[first].attr return not (isinstance(arg, str) or isinstance(arg, unicode)) - elif lhs == "while1elsestmt": - - n = len(tokens) - if last == n: - # Adjust for fuzziness in parsing + elif rule == ("ifstmt", ("testexpr", "_ifstmts_jump")): + condition_jump = ast[0].last_child() + if condition_jump.kind.startswith("POP_JUMP_IF"): + condition_jump2 = tokens[min(last-1, len(tokens)-1)] + if condition_jump2.kind.startswith("POP_JUMP_IF"): + return condition_jump.attr == condition_jump2.attr + # if condition_jump.attr < condition_jump2.off2int(): + # print("XXX", first, last) + # for t in range(first, last): print(tokens[t]) + # from trepan.api import debug; debug() + # return condition_jump.attr < condition_jump2.off2int() + return False + elif lhs == "ifelsestmt" and rule[1][2] == "jump_forward_else": + last = min(last, len(tokens)-1) + if tokens[last].off2int() == -1: last -= 1 - - if tokens[last] == "COME_FROM_LOOP": - last -= 1 - elif tokens[last - 1] == "COME_FROM_LOOP": - last -= 2 - if tokens[last] in ("JUMP_BACK", "CONTINUE"): - # These indicate inside a loop, but token[last] - # should not be in a loop. - # FIXME: Not quite right: refine by using target - return True - - # if SETUP_LOOP target spans the else part, then this is - # not while1else. Also do for whileTrue? - last += 1 - while last < n and isinstance(tokens[last].offset, str): - last += 1 - if last == n: - return False - # 3.8+ Doesn't have SETUP_LOOP - return self.version < 3.8 and tokens[first].attr > tokens[last].offset - - elif rule == ( - "try_except", - ( - "SETUP_EXCEPT", - "suite_stmts_opt", - "POP_BLOCK", - "except_handler", - "opt_come_from_except", - ), - ): - come_from_except = ast[-1] - if come_from_except[0] == "COME_FROM": - # There should be at last two COME_FROMs, one from an - # exception handler and one from the try. Otherwise - # we have a try/else. - return True - pass + jump_forward_else = ast[2] + return tokens[first].off2int() <= jump_forward_else[0].attr < tokens[last].off2int() elif lhs == "while1stmt": # If there is a fall through to the COME_FROM_LOOP, then this is @@ -1571,6 +1546,50 @@ class Python3Parser(PythonParser): if offset != tokens[first].attr: return True return False + elif lhs == "while1elsestmt": + + n = len(tokens) + if last == n: + # Adjust for fuzziness in parsing + last -= 1 + + if tokens[last] == "COME_FROM_LOOP": + last -= 1 + elif tokens[last - 1] == "COME_FROM_LOOP": + last -= 2 + if tokens[last] in ("JUMP_BACK", "CONTINUE"): + # These indicate inside a loop, but token[last] + # should not be in a loop. + # FIXME: Not quite right: refine by using target + return True + + # if SETUP_LOOP target spans the else part, then this is + # not while1else. Also do for whileTrue? + last += 1 + while last < n and isinstance(tokens[last].offset, str): + last += 1 + if last == n: + return False + # 3.8+ Doesn't have SETUP_LOOP + return self.version < 3.8 and tokens[first].attr > tokens[last].offset + + elif rule == ( + "try_except", + ( + "SETUP_EXCEPT", + "suite_stmts_opt", + "POP_BLOCK", + "except_handler", + "opt_come_from_except", + ), + ): + come_from_except = ast[-1] + if come_from_except[0] == "COME_FROM": + # There should be at last two COME_FROMs, one from an + # exception handler and one from the try. Otherwise + # we have a try/else. + return True + pass elif rule == ( "ifelsestmt", ( @@ -1586,12 +1605,6 @@ class Python3Parser(PythonParser): if not isinstance(come_froms, Token): return tokens[first].offset > come_froms[-1].attr return False - elif lhs == "ifelsestmt" and rule[1][2] == "jump_forward_else": - last = min(last, len(tokens)-1) - if tokens[last].off2int() == -1: - last -= 1 - jump_forward_else = ast[2] - return tokens[first].off2int() <= jump_forward_else[0].attr < tokens[last].off2int() return False diff --git a/uncompyle6/parsers/treenode.py b/uncompyle6/parsers/treenode.py index 32132893..733db58c 100644 --- a/uncompyle6/parsers/treenode.py +++ b/uncompyle6/parsers/treenode.py @@ -60,5 +60,13 @@ class SyntaxTree(spark_AST): child = self[0] if not isinstance(child, SyntaxTree): return child - return self[0].first_child() + return child.first_child() + return self + + def last_child(self): + if len(self) > 0: + child = self[-1] + if not isinstance(child, SyntaxTree): + return child + return child.last_child() return self diff --git a/uncompyle6/scanners/__init__.py b/uncompyle6/scanners/__init__.py index e69de29b..e3f5a061 100644 --- a/uncompyle6/scanners/__init__.py +++ b/uncompyle6/scanners/__init__.py @@ -0,0 +1,29 @@ +"""Here we have "scanners" for the different Python versions. +"scanner" is a compiler-centric term, but it is really a bit different from +a traditional compiler scanner/lexer. + +Here we start out with text disasembly and change that to be more +ameanable to parsing in which we look only at the opcode name, and not +and instruction's operand. + +In some cases this is done by changing the opcode name. For example +"LOAD_CONST" it customized based on the type of its operand into +"LOAD_ASSERT", "LOAD_CODE", "LOAD_STR". + +instructions that take a variable number of arguments will have the argument count +suffixed to the opcode name. "CALL", "MAKE_FUNCTION", "BUILD_TUPLE", "BUILD_LIST", +work this way for example + +We also add pseudo instructions like "COME_FROM" which have an operand + +Instead of full grammars, we have full grammars for certain Python versions +and the others indicate differences between a neighboring version. + +For example Python 2.6, 2.7, 3.2, and 3.7 are largely "base" versions +which work off of scanner2.py, scanner3.py, and scanner37base.py. + +Some examples: +Python 3.3 diffs off of 3.2; 3.1 and 3.0 diff off of 3.2; Python 1.0..Python 2.5 diff off of +Python 2.6 and Python 3.8 diff off of 3.7 + +"""