Merge pull request #69 from rocky/ast-reduce-checks

AST reduce checks
2025-08-03 16:59:52 +08:00 · 2016-11-27 14:12:08 -05:00
parent abecb21671 97576e473d
commit 69c93cc665
16 changed files with 139 additions and 44 deletions
--- a/pkginfo.py
+++ b/pkginfo.py
@@ -37,7 +37,7 @@ entry_points={
        'pydisassemble=uncompyle6.bin.pydisassemble:main',
    ]}
 ftp_url            = None
-install_requires   = ['spark-parser >= 1.4.3, < 1.5.0',
+install_requires   = ['spark-parser >= 1.5.0, < 1.6.0',
                      'xdis >= 3.2.3, < 3.3.0']
 license            = 'MIT'
 mailing_list       = 'python-debugger@googlegroups.com'
--- a/test/Makefile
+++ b/test/Makefile
@@ -104,7 +104,7 @@ check-bytecode-2.6:

 #: Check deparsing Python 2.7
 check-bytecode-2.7:
-	$(PYTHON) test_pythonlib.py --bytecode-2.7
+	$(PYTHON) test_pythonlib.py --bytecode-2.7 --verify

 #: Check deparsing Python 3.0
 check-bytecode-3.0:
--- a/test/bytecode_2.6/01_boolean.pyc
+++ b/test/bytecode_2.6/01_boolean.pyc
--- a/test/bytecode_2.6/03_elif_vs_continue.pyc
+++ b/test/bytecode_2.6/03_elif_vs_continue.pyc
--- a/test/bytecode_3.0/02_while1_if_while1.pyc-notyet
+++ b/test/bytecode_3.0/02_while1_if_while1.pyc-notyet
--- a/test/bytecode_3.3/03_while_else.pyc
+++ b/test/bytecode_3.3/03_while_else.pyc
--- a/test/bytecode_3.4/04_while1_while1.pyc-notyet
+++ b/test/bytecode_3.4/04_while1_while1.pyc-notyet
--- a/test/simple_source/bug26/03_elif_vs_continue.py
+++ b/test/simple_source/bug26/03_elif_vs_continue.py
@@ -0,0 +1,18 @@
+# Bug was using continue fouling up 1st elif, by confusing
+# the "pass" for "continue" by not recognizing the if jump
+# around it. We fixed by ignoring what's done in Python 2.7
+# Better is better detection of control structures
+
+def _compile_charset(charset, flags, code, fixup=None):
+    # compile charset subprogram
+    emit = code.append
+    if fixup is None:
+        fixup = 1
+    for op, av in charset:
+        if op is flags:
+            pass
+        elif op is code:
+            emit(fixup(av))
+        else:
+            raise RuntimeError
+    emit(5)
--- a/test/simple_source/bug33/03_while_else.py
+++ b/test/simple_source/bug33/03_while_else.py
@@ -0,0 +1,8 @@
+# Bug from 3.4 threading. Bug is handling while/else
+def acquire(self):
+    with self._cond:
+        while self:
+            rc = False
+        else:
+            rc = True
+    return rc
--- a/uncompyle6/main.py
+++ b/uncompyle6/main.py
@@ -189,17 +189,16 @@ def main(in_base, out_base, files, codes, outfile=None,
                        print(e)
                        verify_failed_files += 1
                        os.rename(outfile, outfile + '_unverified')
+                        sys.stderr.write("### Error Verifying %s\n" % filename)
+                        sys.stderr.write(str(e) + "\n")
                        if not outfile:
-                            print("### Error Verifiying %s" % filename,  file=sys.stderr)
-                            print(e, file=sys.stderr)
                            if raise_on_error:
                                raise
                            pass
                        pass
                pass
            elif do_verify:
-                print("\n### uncompile successful, but no file to compare against",
-                      file=sys.stderr)
+                sys.stderr.write("\n### uncompile successful, but no file to compare against\n")
                pass
            else:
                okay_files += 1
--- a/uncompyle6/parser.py
+++ b/uncompyle6/parser.py
@@ -69,6 +69,25 @@ class PythonParser(GenericASTBuilder):
        for i in dir(self):
            setattr(self, i, None)

+    def debug_reduce(self, rule, tokens, parent, i):
+        """Customized format and print for our kind of tokens
+        which gets called in debugging grammar reduce rules
+        """
+        prefix = ''
+        if parent and tokens:
+            p_token = tokens[parent]
+            if hasattr(p_token, 'linestart') and p_token.linestart:
+                prefix = 'L.%3d: ' % p_token.linestart
+            else:
+                prefix = '       '
+            if hasattr(p_token, 'offset'):
+                prefix += "%3s " % str(p_token.offset)
+                prefix += "    "
+        else:
+            prefix = '               '
+
+        print("%s%s ::= %s" % (prefix, rule[0], ' '.join(rule[1])))
+
    def error(self, instructions, index):
        # Find the last line boundary
        for start in range(index, -1, -1):
@@ -466,6 +485,8 @@ class PythonParser(GenericASTBuilder):
        _mklambda ::= load_closure mklambda
        _mklambda ::= mklambda

+        # "and" where the first part of the and is true,
+        # so there is only the 2nd part to evaluate
        and2 ::= _jump jmp_false COME_FROM expr COME_FROM

        expr ::= conditional
--- a/uncompyle6/parsers/parse2.py
+++ b/uncompyle6/parsers/parse2.py
@@ -241,7 +241,7 @@ class Python2Parser(PythonParser):
        """

    def add_custom_rules(self, tokens, customize):
-        '''
+        """
        Special handling for opcodes such as those that take a variable number
        of arguments -- we add a new rule for each:

@@ -260,7 +260,7 @@ class Python2Parser(PythonParser):
            expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP

        PyPy adds custom rules here as well
-        '''
+        """
        for opname, v in list(customize.items()):
            opname_base = opname[:opname.rfind('_')]
            if opname == 'PyPy':
@@ -389,6 +389,26 @@ class Python2Parser(PythonParser):
            else:
                raise Exception('unknown customize token %s' % opname)
            self.add_unique_rule(rule, opname_base, v, customize)
+            pass
+        self.check_reduce['augassign1'] = 'AST'
+        self.check_reduce['augassign2'] = 'AST'
+        self.check_reduce['_stmts'] = 'AST'
+        return
+
+    def reduce_is_invalid(self, rule, ast, tokens, first, last):
+        lhs = rule[0]
+        if lhs in ('augassign1', 'augassign2') and ast[0][0] == 'and':
+            return True
+        elif lhs == '_stmts':
+            for i, stmt in enumerate(ast):
+                if stmt == '_stmts':
+                    stmt = stmt[0]
+                assert stmt == 'stmt'
+                if stmt[0] == 'return_stmt':
+                    return i+1 != len(ast)
+                pass
+            return False
+        return False

 class Python2ParserSingle(Python2Parser, PythonParserSingle):
    pass
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -146,8 +146,6 @@ class Python3Parser(PythonParser):
        ifelsestmtr ::= testexpr return_if_stmts return_stmts

        ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel
-        ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel JUMP_BACK COME_FROM_LOOP
-        ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel COME_FROM_LOOP


        # FIXME: this feels like a hack. Is it just 1 or two
@@ -335,11 +333,12 @@ class Python3Parser(PythonParser):
        whilestmt         ::= SETUP_LOOP testexpr return_stmts          POP_BLOCK
                              COME_FROM_LOOP

+        while1elsestmt    ::= SETUP_LOOP          l_stmts     JUMP_BACK
+                              else_suite
+
        whileelsestmt     ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK
                              else_suite COME_FROM_LOOP

-        while1elsestmt    ::= SETUP_LOOP          l_stmts     JUMP_BACK
-                              else_suite

        whileelselaststmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK
                              else_suitec COME_FROM_LOOP
@@ -348,6 +347,7 @@ class Python3Parser(PythonParser):

        # FIXME: Python 3.? starts adding branch optimization? Put this starting there.
        while1stmt        ::= SETUP_LOOP l_stmts
+        while1stmt        ::= SETUP_LOOP l_stmts COME_FROM_LOOP

        # FIXME: investigate - can code really produce a NOP?
        whileTruestmt     ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP
@@ -680,8 +680,30 @@ class Python3Parser(PythonParser):
                rule = ('mkfunc ::= %sload_closure LOAD_CONST %s'
                        % ('expr ' * args_pos, opname))
                self.add_unique_rule(rule, opname, token.attr, customize)
+                pass
+        self.check_reduce['augassign1'] = 'AST'
+        self.check_reduce['augassign2'] = 'AST'
+        self.check_reduce['while1stmt'] = 'noAST'
        return

+    def reduce_is_invalid(self, rule, ast, tokens, first, last):
+        lhs = rule[0]
+        if lhs in ('augassign1', 'augassign2') and ast[0][0] == 'and':
+            return True
+        elif lhs == 'while1stmt':
+            if tokens[last] in ('COME_FROM_LOOP', 'JUMP_BACK'):
+                # jump_back should be right afer SETUP_LOOP. Test?
+                last += 1
+            while last < len(tokens) and isinstance(tokens[last].offset, str):
+                last += 1
+            if last < len(tokens):
+                offset = tokens[last].offset
+                assert tokens[first] == 'SETUP_LOOP'
+                if offset != tokens[first].attr:
+                    return True
+            return False
+        return False
+
 class Python30Parser(Python3Parser):

    def p_30(self, args):
--- a/uncompyle6/scanners/scanner2.py
+++ b/uncompyle6/scanners/scanner2.py
@@ -166,9 +166,9 @@ class Scanner2(scan.Scanner):
                    #     continue
                    # last_offset = jump_offset
                    come_from_name = 'COME_FROM'
-                    opname = self.opc.opname[self.code[jump_offset]]
-                    if opname.startswith('SETUP_') and self.version == 2.7:
-                        come_from_type = opname[len('SETUP_'):]
+                    op_name = self.opc.opname[self.code[jump_offset]]
+                    if op_name.startswith('SETUP_') and self.version == 2.7:
+                        come_from_type = op_name[len('SETUP_'):]
                        if come_from_type not in ('LOOP', 'EXCEPT'):
                            come_from_name = 'COME_FROM_%s' % come_from_type
                        pass
@@ -179,7 +179,7 @@ class Scanner2(scan.Scanner):
                    jump_idx += 1

            op = self.code[offset]
-            opname = self.opc.opname[op]
+            op_name = self.opc.opname[op]

            oparg = None; pattr = None
            has_arg = op_has_argument(op, self.opc)
@@ -194,14 +194,14 @@ class Scanner2(scan.Scanner):
                    if iscode(const):
                        oparg = const
                        if const.co_name == '<lambda>':
-                            assert opname == 'LOAD_CONST'
-                            opname = 'LOAD_LAMBDA'
+                            assert op_name == 'LOAD_CONST'
+                            op_name = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
-                            opname = 'LOAD_GENEXPR'
+                            op_name = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
-                            opname = 'LOAD_DICTCOMP'
+                            op_name = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
-                            opname = 'LOAD_SETCOMP'
+                            op_name = 'LOAD_SETCOMP'
                        # verify() uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
@@ -237,20 +237,20 @@ class Scanner2(scan.Scanner):
                    self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE:
                    continue
                else:
-                    if self.is_pypy and not oparg and opname == 'BUILD_MAP':
-                        opname = 'BUILD_MAP_n'
+                    if self.is_pypy and not oparg and op_name == 'BUILD_MAP':
+                        op_name = 'BUILD_MAP_n'
                    else:
-                        opname = '%s_%d' % (opname, oparg)
+                        op_name = '%s_%d' % (op_name, oparg)
                    if op != self.opc.BUILD_SLICE:
-                        customize[opname] = oparg
-            elif self.is_pypy and opname in ('LOOKUP_METHOD',
+                        customize[op_name] = oparg
+            elif self.is_pypy and op_name in ('LOOKUP_METHOD',
                                             'JUMP_IF_NOT_DEBUG',
                                             'SETUP_EXCEPT',
                                             'SETUP_FINALLY'):
                # The value in the dict is in special cases in semantic actions, such
                # as CALL_FUNCTION. The value is not used in these cases, so we put
                # in arbitrary value 0.
-                customize[opname] = 0
+                customize[op_name] = 0
            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
@@ -269,16 +269,16 @@ class Scanner2(scan.Scanner):
                        and self.code[offset+3] not in (self.opc.END_FINALLY,
                                                        self.opc.POP_BLOCK)
                        and offset not in self.not_continue):
-                        opname = 'CONTINUE'
+                        op_name = 'CONTINUE'
                    else:
-                        opname = 'JUMP_BACK'
+                        op_name = 'JUMP_BACK'

            elif op == self.opc.LOAD_GLOBAL:
                if offset in self.load_asserts:
-                    opname = 'LOAD_ASSERT'
+                    op_name = 'LOAD_ASSERT'
            elif op == self.opc.RETURN_VALUE:
                if offset in self.return_end_ifs:
-                    opname = 'RETURN_END_IF'
+                    op_name = 'RETURN_END_IF'

            if offset in self.linestartoffsets:
                linestart = self.linestartoffsets[offset]
@@ -287,7 +287,7 @@ class Scanner2(scan.Scanner):

            if offset not in replace:
                tokens.append(Token(
-                    opname, oparg, pattr, offset, linestart, op,
+                    op_name, oparg, pattr, offset, linestart, op,
                    has_arg, self.opc))
            else:
                tokens.append(Token(
@@ -782,6 +782,7 @@ class Scanner2(scan.Scanner):
            if offset in self.ignore_if:
                return

+            if self.version == 2.7:
                if code[pre[rtarget]] == self.opc.JUMP_ABSOLUTE and pre[rtarget] in self.stmts \
                        and pre[rtarget] != offset and pre[pre[rtarget]] != offset:
                    if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK:
@@ -797,6 +798,7 @@ class Scanner2(scan.Scanner):
            # Does the "if" jump just beyond a jump op, then this is probably an if statement
            pre_rtarget = pre[rtarget]
            code_pre_rtarget = code[pre_rtarget]
+
            if code_pre_rtarget in self.jump_forward:
                if_end = self.get_target(pre_rtarget)

@@ -824,6 +826,7 @@ class Scanner2(scan.Scanner):
                self.structs.append({'type':  'if-then',
                                       'start': start-3,
                                       'end':   pre_rtarget})
+
                self.not_continue.add(pre_rtarget)

                if rtarget < end:
--- a/uncompyle6/scanners/scanner26.py
+++ b/uncompyle6/scanners/scanner26.py
@@ -233,7 +233,7 @@ class Scanner26(scan.Scanner2):
                    if op != self.opc.BUILD_SLICE:
                        customize[op_name] = oparg
            elif op == self.opc.JUMP_ABSOLUTE:
-                # Further classifhy JUMP_ABSOLUTE into backward jumps
+                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
@@ -254,6 +254,9 @@ class Scanner26(scan.Scanner2):
                        #   if x: continue
                        # the "continue" is not on a new line.
                        if tokens[-1].type == 'JUMP_BACK':
+                            # We need 'intern' since we have
+                            # already have processed the previous
+                            # token.
                            tokens[-1].type = intern('CONTINUE')

            elif op == self.opc.LOAD_GLOBAL:
--- a/uncompyle6/scanners/scanner3.py
+++ b/uncompyle6/scanners/scanner3.py
@@ -324,9 +324,10 @@ class Scanner3(Scanner):
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
-                        # There are other situations were we don't catch
+                        # There are other situations where we don't catch
                        # CONTINUE as well.
-                        if tokens[-1].type == 'JUMP_BACK':
+                        if tokens[-1].type == 'JUMP_BACK' and tokens[-1].attr <= argval:
+                            # intern is used because we are changing the *previous* token
                            tokens[-1].type = intern('CONTINUE')

            elif op == self.opc.RETURN_VALUE: