Merge branch 'master' into python-2.4

2025-08-04 01:09:52 +08:00 · 2019-05-24 10:37:51 -04:00
parent 60d96b6a5a 47ed0795b2
commit 7f46d8bb2a
17 changed files with 103 additions and 126 deletions
--- a/README.rst
+++ b/README.rst
@@ -93,8 +93,8 @@ This uses setup.py, so it follows the standard Python routine:
 A GNU makefile is also provided so :code:`make install` (possibly as root or
 sudo) will do the steps above.

-Testing
-------
+Running Tests
+-------------

 ::

@@ -133,18 +133,8 @@ You can also cross compare the results with pycdc_ . Since they work
 differently, bugs here often aren't in that, and vice versa.


-Known Bugs/Restrictions
-----------------------
-
-The biggest known and possibly fixable (but hard) problem has to do
-with handling control flow. (Python has probably the most diverse and
-screwy set of compound statements I've ever seen; there
-are "else" clauses on loops and try blocks that I suspect many
-programmers don't know about.)
-
-All of the Python decompilers that I have looked at have problems
-decompiling Python's control flow. In some cases we can detect an
-erroneous decompilation and report that.
+Verification
+------------

 In older versions of Python it was possible to verify bytecode by
 decompiling bytecode, and then compiling using the Python interpreter
@@ -167,6 +157,19 @@ And already Python has a set of programs like this: the test suite
 for the standard library that comes with Python. We have some
 code in `test/stdlib` to facilitate this kind of checking.

+Known Bugs/Restrictions
+-----------------------
+
+The biggest known and possibly fixable (but hard) problem has to do
+with handling control flow. (Python has probably the most diverse and
+screwy set of compound statements I've ever seen; there
+are "else" clauses on loops and try blocks that I suspect many
+programmers don't know about.)
+
+All of the Python decompilers that I have looked at have problems
+decompiling Python's control flow. In some cases we can detect an
+erroneous decompilation and report that.
+
 Python support is strongest in Python 2 for 2.7 and drops off as you
 get further away from that. Support is also probably pretty good for
 python 2.3-2.4 since a lot of the goodness of early the version of the
@@ -222,7 +225,7 @@ See Also
 * https://github.com/zrax/pycdc : purports to support all versions of Python. It is written in C++ and is most accurate for Python versions around 2.7 and 3.3 when the code was more actively developed. Accuracy for more recent versions of Python 3 and early versions of Python are especially lacking. See its `issue tracker <https://github.com/zrax/pycdc/issues>`_ for details. Currently lightly maintained.
 * https://code.google.com/archive/p/unpyc3/ : supports Python 3.2 only. The above projects use a different decompiling technique than what is used here. Currently unmaintained.
 * https://github.com/figment/unpyc3/ : fork of above, but supports Python 3.3 only. Includes some fixes like supporting function annotations. Currently unmaintained.
-* https://github.com/wibiti/uncompyle2 : supports Python 2.7 only, but does that fairly well. There are situtations where `uncompyle6` results are incorrect while `uncompyle2` results are not, but more often uncompyle6 is correct when uncompyle2 is not. Because `uncompyle6` adheres to accuracy over idiomatic Python, `uncompyle2` can produce more natural-looking code when it is correct. Currently `uncompyle2` is lightly maintained. See its issue `tracker <https://github.com/wibiti/uncompyle2/issues>`_ for more details
+* https://github.com/wibiti/uncompyle2 : supports Python 2.7 only, but does that fairly well. There are situations where `uncompyle6` results are incorrect while `uncompyle2` results are not, but more often uncompyle6 is correct when uncompyle2 is not. Because `uncompyle6` adheres to accuracy over idiomatic Python, `uncompyle2` can produce more natural-looking code when it is correct. Currently `uncompyle2` is lightly maintained. See its issue `tracker <https://github.com/wibiti/uncompyle2/issues>`_ for more details
 * `How to report a bug <https://github.com/rocky/python-uncompyle6/blob/master/HOW-TO-REPORT-A-BUG.md>`_
 * The HISTORY_ file.
 * https://github.com/rocky/python-xdis : Cross Python version disassembler
--- a/pytest/test_docstring.py
+++ b/pytest/test_docstring.py
@@ -1,78 +0,0 @@
-import sys
-from uncompyle6 import PYTHON3
-if PYTHON3:
-    from io import StringIO
-    minint = -sys.maxsize-1
-    maxint = sys.maxsize
-else:
-    from StringIO import StringIO
-    minint = -sys.maxint-1
-    maxint = sys.maxint
-from uncompyle6.semantics.helper import print_docstring
-
-class PrintFake:
-    def __init__(self):
-        self.pending_newlines = 0
-        self.f = StringIO()
-
-    def write(self, *data):
-        if (len(data) == 0) or (len(data) == 1 and data[0] == ''):
-            return
-        out = ''.join((str(j) for j in data))
-        n = 0
-        for i in out:
-            if i == '\n':
-                n += 1
-                if n == len(out):
-                    self.pending_newlines = max(self.pending_newlines, n)
-                    return
-            elif n:
-                self.pending_newlines = max(self.pending_newlines, n)
-                out = out[n:]
-                break
-            else:
-                break
-
-        if self.pending_newlines > 0:
-            self.f.write('\n'*self.pending_newlines)
-            self.pending_newlines = 0
-
-        for i in out[::-1]:
-            if i == '\n':
-                self.pending_newlines += 1
-            else:
-                break
-
-        if self.pending_newlines:
-            out = out[:-self.pending_newlines]
-        self.f.write(out)
-    def println(self, *data):
-        if data and not(len(data) == 1 and data[0] == ''):
-            self.write(*data)
-        self.pending_newlines = max(self.pending_newlines, 1)
-        return
-    pass
-
-def test_docstring():
-
-    for doc, expect in (
-            ("Now is the time",
-             '  """Now is the time"""'),
-           ("""
-Now is the time
-""",
-            '''  """
-  Now is the time
-  """''')
-
-            # (r'''func placeholder - ' and with ("""\nstring\n  """)''',
-            #  """  r'''func placeholder - ' and with (\"\"\"\nstring\n\"\"\")'''"""),
-            # (r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """,
-            # """  r\"\"\"func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" \"\"\"""")
-            ):
-
-        o = PrintFake()
-        # print(doc)
-        # print(expect)
-        print_docstring(o, '  ', doc)
-        assert expect == o.f.getvalue()
--- a/test/Makefile
+++ b/test/Makefile
@@ -177,7 +177,7 @@ grammar-coverage-2.6:
 grammar-coverage-2.7:
 	-rm $(COVER_DIR)/spark-grammar-2.7.cover || true
 	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pythonlib.py --bytecode-2.7
-	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pyenvlib.py --2.7.14 --max=600
+	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pyenvlib.py --2.7.16 --max=600

 #: Get grammar coverage for Python 3.0
 grammar-coverage-3.0:
@@ -220,7 +220,12 @@ grammar-coverage-3.5:
 grammar-coverage-3.6:
 	rm $(COVER_DIR)/spark-grammar-3.6.cover || /bin/true
 	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pythonlib.py --bytecode-3.6
-	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pyenvlib.py --3.6.4 --max=280
+	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pyenvlib.py --3.6.8 --max=280
+
+#: Get grammar coverage for Python 3.7
+grammar-coverage-3.7:
+	rm $(COVER_DIR)/spark-grammar-3.7.cover || /bin/true
+	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.7.cover $(PYTHON) test_pyenvlib.py --3.7.3 --max=500

 #: Check deparsing Python 2.6
 check-bytecode-2.6:
--- a/test/bytecode_2.7/00_docstring.pyc
+++ b/test/bytecode_2.7/00_docstring.pyc
--- a/test/bytecode_2.7_run/00_docstring.pyc
+++ b/test/bytecode_2.7_run/00_docstring.pyc
--- a/test/bytecode_3.7/00_docstring.pyc
+++ b/test/bytecode_3.7/00_docstring.pyc
--- a/test/bytecode_3.7_run/00_docstring.pyc
+++ b/test/bytecode_3.7_run/00_docstring.pyc
--- a/test/grammar-cover/grammar.sh
+++ b/test/grammar-cover/grammar.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # Remake Python grammar statistics

-typeset -A ALL_VERS=([2.4]=2.4.6 [2.5]=2.5.6 [2.6]=2.6.9 [2.7]=2.7.14 [3.2]=3.2.6 [3.3]=3.3.6 [3.4]=3.4.8 [3.5]=3.5.5 [3.6]=3.6.4)
+typeset -A ALL_VERS=([2.4]=2.4.6 [2.5]=2.5.6 [2.6]=2.6.9 [2.7]=2.7.16 [3.2]=3.2.6 [3.3]=3.3.6 [3.4]=3.4.8 [3.5]=3.5.6 [3.6]=3.6.8, [3.7]=3.7.3)

 if (( $# == 0 )); then
    echo 1>&2 "usage: $0 two-digit-version"
--- a/test/simple_source/stmts/00_docstring.py
+++ b/test/simple_source/stmts/00_docstring.py
@@ -1,4 +1,6 @@
 # uncompyle2 bug was not escaping """ properly
+
+# RUNNABLE!
 r'''func placeholder - with ("""\nstring\n""")'''
 def foo():
    r'''func placeholder - ' and with ("""\nstring\n""")'''
@@ -20,3 +22,22 @@ def baz():
        >>> t.rundict(m1.__dict__, 'rundict_test_pvt')  # None are skipped.
        TestResults(failed=0, attempted=8)
    """
+    assert __doc__ == r'''func placeholder - with ("""\nstring\n""")'''
+    assert foo.__doc__ == r'''func placeholder - ' and with ("""\nstring\n""")'''
+    assert bar.__doc__ == r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """
+    assert baz.__doc__ == \
+    """
+        ...     '''>>> assert 1 == 1
+        ...     '''
+        ... \"""
+        >>> exec test_data in m1.__dict__
+        >>> exec test_data in m2.__dict__
+        >>> m1.__dict__.update({"f2": m2._f, "g2": m2.g, "h2": m2.H})
+
+        Tests that objects outside m1 are excluded:
+        \"""
+        >>> t.rundict(m1.__dict__, 'rundict_test_pvt')  # None are skipped.
+        TestResults(failed=0, attempted=8)
+    """
+
+baz()
--- a/uncompyle6/bin/uncompile.py
+++ b/uncompyle6/bin/uncompile.py
@@ -80,7 +80,7 @@ def main_bin():
    timestampfmt = "# %Y.%m.%d %H:%M:%S %Z"

    try:
-        opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtdrVo:p:',
+        opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtTdrVo:p:',
                                    'help asm compile= grammar linemaps recurse '
                                    'timestamp tree tree+ '
                                    'fragments verify verify-run version '
@@ -114,7 +114,7 @@ def main_bin():
        elif opt in ('--tree', '-t'):
            options['showast'] = True
            options['do_verify'] = None
-        elif opt in ('--tree+',):
+        elif opt in ('--tree+', '-T'):
            options['showast'] = 'Full'
            options['do_verify'] = None
        elif opt in ('--grammar', '-g'):
--- a/uncompyle6/main.py
+++ b/uncompyle6/main.py
@@ -280,6 +280,19 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None,
            sys.stdout.write("\n")
            sys.stderr.write("\nLast file: %s   " % (infile))
            raise
+        except RuntimeError as e:
+            sys.stdout.write("\n%s\n" % str(e))
+            if str(e).startswith('Unsupported Python'):
+                sys.stdout.write("\n")
+                sys.stderr.write("\n# Unsupported bytecode in file %s\n# %s\n" % (infile, e))
+            else:
+                if outfile:
+                    outstream.close()
+                    os.remove(outfile)
+                sys.stdout.write("\n")
+                sys.stderr.write("\nLast file: %s   " % (infile))
+                raise
+
        # except:
        #     failed_files += 1
        #     if current_outfile:
@@ -337,9 +350,9 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None,
                    # mem_usage = __memUsage()
                    print mess, infile
        if current_outfile:
-            sys.stdout.write("%s\r" %
-                             status_msg(do_verify, tot_files, okay_files, failed_files,
-                                        verify_failed_files, do_verify))
+            sys.stdout.write("%s -- %s\r" %
+                             (infile, status_msg(do_verify, tot_files, okay_files, failed_files,
+                                                 verify_failed_files, do_verify)))
            try:
                # FIXME: Something is weird with Pypy here
                sys.stdout.flush()
--- a/uncompyle6/parser.py
+++ b/uncompyle6/parser.py
@@ -801,7 +801,6 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
 if __name__ == '__main__':
    def parse_test(co):
        from uncompyle6 import PYTHON_VERSION, IS_PYPY
-        ast = python_parser('2.7.13', co, showasm=True, is_pypy=True)
        ast = python_parser(PYTHON_VERSION, co, showasm=True, is_pypy=IS_PYPY)
        print(ast)
        return
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -650,10 +650,6 @@ class Python3Parser(PythonParser):
                        # FIXME: Use the attr
                        # so this doesn't run into exponential parsing time.
                        if opname.startswith('BUILD_MAP_UNPACK'):
-                            self.add_unique_rule(rule, opname, token.attr, customize)
-                            rule = 'dict_entry ::= ' + 'expr ' * (token.attr*2)
-                            self.add_unique_rule(rule, opname, token.attr, customize)
-
                            # FIXME: start here. The LHS should be unmap_dict, not dict.
                            # FIXME: really we need a combination of dict_entry-like things.
                            # It just so happens the most common case is not to mix
--- a/uncompyle6/parsers/parse34.py
+++ b/uncompyle6/parsers/parse34.py
@@ -47,7 +47,7 @@ class Python34Parser(Python33Parser):

        # Python 3.4+ optimizes the trailing two JUMPS away

-        # Is this 3.4 only?
+        # This is 3.4 only
        yield_from ::= expr GET_ITER LOAD_CONST YIELD_FROM

        _ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM
@@ -55,6 +55,7 @@ class Python34Parser(Python33Parser):

    def customize_grammar_rules(self, tokens, customize):
        self.remove_rules("""
+        yield_from    ::= expr expr YIELD_FROM
        # 3.4.2 has this. 3.4.4 may now
        # while1stmt ::= SETUP_LOOP l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP
        """)
--- a/uncompyle6/semantics/aligner.py
+++ b/uncompyle6/semantics/aligner.py
@@ -36,7 +36,6 @@ class AligningWalker(SourceWalker, object):
        self.pending_newlines = max(self.pending_newlines, 1)

    def write(self, *data):
-        from trepan.api import debug; debug()
        if (len(data) == 1) and data[0] == self.indent:
            diff = max(self.pending_newlines,
                       self.desired_line_number - self.current_line_number)
--- a/uncompyle6/semantics/helper.py
+++ b/uncompyle6/semantics/helper.py
@@ -99,14 +99,9 @@ def strip_quotes(str):


 def print_docstring(self, indent, docstring):
-    try:
-        if docstring.find('"""') == -1:
    quote = '"""'
-        else:
+    if docstring.find("'''") == -1:
        quote = "'''"
-            docstring = docstring.replace("'''", "\\'''")
-    except:
-        return False
    self.write(indent)
    if not PYTHON3 and not isinstance(docstring, str):
        # Must be unicode in Python2
@@ -132,18 +127,31 @@ def print_docstring(self, indent, docstring):
        and (docstring[-1] != '"'
             or docstring[-2] == '\t')):
        self.write('r') # raw string
-        # restore backslashes unescaped since raw
+        # Restore backslashes unescaped since raw
        docstring = docstring.replace('\t', '\\')
    else:
        # Escape '"' if it's the last character, so it doesn't
        # ruin the ending triple quote
        if len(docstring) and docstring[-1] == '"':
            docstring = docstring[:-1] + '\\"'
-        # Restore escaped backslashes
-        docstring = docstring.replace('\t', '\\\\')
+
        # Escape triple quote when needed
-    if quote == '""""':
-        docstring = docstring.replace('"""', '\\"\\"\\"')
+        if quote == '"""':
+            if self.version > 2.7:
+                replace_str = '\\"""'
+            else:
+                replace_str = '\\"\\"\\"'
+            docstring = docstring.replace(quote, replace_str)
+        else:
+            assert quote == "'''"
+            if self.version > 2.7:
+                replace_str = "\\'''"
+            else:
+                replace_str = "\\'\\'\\'"
+            docstring = docstring.replace(quote, replace_str)
+
+        docstring = docstring.replace('\t', '\\\\')
+
    lines = docstring.split('\n')
    calculate_indent = maxint
    for line in lines[1:]:
@@ -152,6 +160,7 @@ def print_docstring(self, indent, docstring):
            calculate_indent = min(calculate_indent, len(line) - len(stripped))
    calculate_indent = min(calculate_indent, len(lines[-1]) - len(lines[-1].lstrip()))
    # Remove indentation (first line is special):
+
    trimmed = [lines[0]]
    if calculate_indent < maxint:
        trimmed += [line[calculate_indent:] for line in lines[1:]]
@@ -164,7 +173,12 @@ def print_docstring(self, indent, docstring):
    else:
        self.println(trimmed[0])
        for line in trimmed[1:-1]:
+            if line:
                self.println( indent, line )
+            else:
+                self.println( "\n\n" )
+                pass
+            pass
        self.println(indent, trimmed[-1], quote)
    return True

--- a/uncompyle6/semantics/pysource.py
+++ b/uncompyle6/semantics/pysource.py
@@ -1100,6 +1100,9 @@ class SourceWalker(GenericASTTraversal, object):
            comp_store = ast[3]

        have_not = False
+
+        # Iterate to find the innermost store
+        # We'll come back to the list iteration below.
        while n in ('list_iter', 'comp_iter'):
            # iterate one nesting deeper
            if self.version == 3.0 and len(n) == 3:
@@ -1109,7 +1112,7 @@ class SourceWalker(GenericASTTraversal, object):
                n = n[0]

            if n in ('list_for', 'comp_for'):
-                if n[2] == 'store':
+                if n[2] == 'store' and not store:
                    store = n[2]
                n = n[3]
            elif n in ('list_if', 'list_if_not', 'comp_if', 'comp_if_not'):
@@ -1153,11 +1156,12 @@ class SourceWalker(GenericASTTraversal, object):
        self.write(' in ')
        self.preorder(node[-3])

+        # Here is where we handle nested list iterations.
        if ast == 'list_comp' and self.version != 3.0:
            list_iter = ast[1]
            assert list_iter == 'list_iter'
-            if list_iter == 'list_for':
-                self.preorder(list_iter[3])
+            if list_iter[0] == 'list_for':
+                self.preorder(list_iter[0][3])
                self.prec = p
                return
            pass