Merge branch 'master' into python-2.4

2025-08-03 00:45:53 +08:00 · 2019-05-24 10:37:51 -04:00
parent 60d96b6a5a 47ed0795b2
commit 7f46d8bb2a
17 changed files with 103 additions and 126 deletions
--- a/README.rst
+++ b/README.rst
@@ -93,8 +93,8 @@ This uses setup.py, so it follows the standard Python routine:
 A GNU makefile is also provided so :code:`make install` (possibly as root or
 sudo) will do the steps above.

-Testing
-------
+Running Tests
+-------------

 ::

@@ -133,18 +133,8 @@ You can also cross compare the results with pycdc_ . Since they work
 differently, bugs here often aren't in that, and vice versa.


-Known Bugs/Restrictions
-----------------------
-
-The biggest known and possibly fixable (but hard) problem has to do
-with handling control flow. (Python has probably the most diverse and
-screwy set of compound statements I've ever seen; there
-are "else" clauses on loops and try blocks that I suspect many
-programmers don't know about.)
-
-All of the Python decompilers that I have looked at have problems
-decompiling Python's control flow. In some cases we can detect an
-erroneous decompilation and report that.
+Verification
+------------

 In older versions of Python it was possible to verify bytecode by
 decompiling bytecode, and then compiling using the Python interpreter
@@ -167,6 +157,19 @@ And already Python has a set of programs like this: the test suite
 for the standard library that comes with Python. We have some
 code in `test/stdlib` to facilitate this kind of checking.

+Known Bugs/Restrictions
+-----------------------
+
+The biggest known and possibly fixable (but hard) problem has to do
+with handling control flow. (Python has probably the most diverse and
+screwy set of compound statements I've ever seen; there
+are "else" clauses on loops and try blocks that I suspect many
+programmers don't know about.)
+
+All of the Python decompilers that I have looked at have problems
+decompiling Python's control flow. In some cases we can detect an
+erroneous decompilation and report that.
+
 Python support is strongest in Python 2 for 2.7 and drops off as you
 get further away from that. Support is also probably pretty good for
 python 2.3-2.4 since a lot of the goodness of early the version of the
@@ -194,7 +197,7 @@ Between Python 3.5, 3.6 and 3.7 there have been major changes to the

 Currently not all Python magic numbers are supported. Specifically in
 some versions of Python, notably Python 3.6, the magic number has
-changes several times within a version. 
+changes several times within a version.

 **We support only released versions, not candidate versions.** Note however
 that the magic of a released version is usually the same as the *last* candidate version prior to release.
@@ -222,7 +225,7 @@ See Also
 * https://github.com/zrax/pycdc : purports to support all versions of Python. It is written in C++ and is most accurate for Python versions around 2.7 and 3.3 when the code was more actively developed. Accuracy for more recent versions of Python 3 and early versions of Python are especially lacking. See its `issue tracker <https://github.com/zrax/pycdc/issues>`_ for details. Currently lightly maintained.
 * https://code.google.com/archive/p/unpyc3/ : supports Python 3.2 only. The above projects use a different decompiling technique than what is used here. Currently unmaintained.
 * https://github.com/figment/unpyc3/ : fork of above, but supports Python 3.3 only. Includes some fixes like supporting function annotations. Currently unmaintained.
-* https://github.com/wibiti/uncompyle2 : supports Python 2.7 only, but does that fairly well. There are situtations where `uncompyle6` results are incorrect while `uncompyle2` results are not, but more often uncompyle6 is correct when uncompyle2 is not. Because `uncompyle6` adheres to accuracy over idiomatic Python, `uncompyle2` can produce more natural-looking code when it is correct. Currently `uncompyle2` is lightly maintained. See its issue `tracker <https://github.com/wibiti/uncompyle2/issues>`_ for more details
+* https://github.com/wibiti/uncompyle2 : supports Python 2.7 only, but does that fairly well. There are situations where `uncompyle6` results are incorrect while `uncompyle2` results are not, but more often uncompyle6 is correct when uncompyle2 is not. Because `uncompyle6` adheres to accuracy over idiomatic Python, `uncompyle2` can produce more natural-looking code when it is correct. Currently `uncompyle2` is lightly maintained. See its issue `tracker <https://github.com/wibiti/uncompyle2/issues>`_ for more details
 * `How to report a bug <https://github.com/rocky/python-uncompyle6/blob/master/HOW-TO-REPORT-A-BUG.md>`_
 * The HISTORY_ file.
 * https://github.com/rocky/python-xdis : Cross Python version disassembler
--- a/pytest/test_docstring.py
+++ b/pytest/test_docstring.py
@@ -1,78 +0,0 @@
-import sys
-from uncompyle6 import PYTHON3
-if PYTHON3:
-    from io import StringIO
-    minint = -sys.maxsize-1
-    maxint = sys.maxsize
-else:
-    from StringIO import StringIO
-    minint = -sys.maxint-1
-    maxint = sys.maxint
-from uncompyle6.semantics.helper import print_docstring
-
-class PrintFake:
-    def __init__(self):
-        self.pending_newlines = 0
-        self.f = StringIO()
-
-    def write(self, *data):
-        if (len(data) == 0) or (len(data) == 1 and data[0] == ''):
-            return
-        out = ''.join((str(j) for j in data))
-        n = 0
-        for i in out:
-            if i == '\n':
-                n += 1
-                if n == len(out):
-                    self.pending_newlines = max(self.pending_newlines, n)
-                    return
-            elif n:
-                self.pending_newlines = max(self.pending_newlines, n)
-                out = out[n:]
-                break
-            else:
-                break
-
-        if self.pending_newlines > 0:
-            self.f.write('\n'*self.pending_newlines)
-            self.pending_newlines = 0
-
-        for i in out[::-1]:
-            if i == '\n':
-                self.pending_newlines += 1
-            else:
-                break
-
-        if self.pending_newlines:
-            out = out[:-self.pending_newlines]
-        self.f.write(out)
-    def println(self, *data):
-        if data and not(len(data) == 1 and data[0] == ''):
-            self.write(*data)
-        self.pending_newlines = max(self.pending_newlines, 1)
-        return
-    pass
-
-def test_docstring():
-
-    for doc, expect in (
-            ("Now is the time",
-             '  """Now is the time"""'),
-           ("""
-Now is the time
-""",
-            '''  """
-  Now is the time
-  """''')
-
-            # (r'''func placeholder - ' and with ("""\nstring\n  """)''',
-            #  """  r'''func placeholder - ' and with (\"\"\"\nstring\n\"\"\")'''"""),
-            # (r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """,
-            # """  r\"\"\"func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" \"\"\"""")
-            ):
-
-        o = PrintFake()
-        # print(doc)
-        # print(expect)
-        print_docstring(o, '  ', doc)
-        assert expect == o.f.getvalue()
--- a/test/Makefile
+++ b/test/Makefile
@@ -177,7 +177,7 @@ grammar-coverage-2.6:
 grammar-coverage-2.7:
 	-rm $(COVER_DIR)/spark-grammar-2.7.cover || true
 	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pythonlib.py --bytecode-2.7
-	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pyenvlib.py --2.7.14 --max=600
+	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pyenvlib.py --2.7.16 --max=600

 #: Get grammar coverage for Python 3.0
 grammar-coverage-3.0:
@@ -220,7 +220,12 @@ grammar-coverage-3.5:
 grammar-coverage-3.6:
 	rm $(COVER_DIR)/spark-grammar-3.6.cover || /bin/true
 	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pythonlib.py --bytecode-3.6
-	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pyenvlib.py --3.6.4 --max=280
+	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pyenvlib.py --3.6.8 --max=280
+
+#: Get grammar coverage for Python 3.7
+grammar-coverage-3.7:
+	rm $(COVER_DIR)/spark-grammar-3.7.cover || /bin/true
+	SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.7.cover $(PYTHON) test_pyenvlib.py --3.7.3 --max=500

 #: Check deparsing Python 2.6
 check-bytecode-2.6:
--- a/test/bytecode_2.7/00_docstring.pyc
+++ b/test/bytecode_2.7/00_docstring.pyc
--- a/test/bytecode_2.7_run/00_docstring.pyc
+++ b/test/bytecode_2.7_run/00_docstring.pyc
--- a/test/bytecode_3.7/00_docstring.pyc
+++ b/test/bytecode_3.7/00_docstring.pyc
--- a/test/bytecode_3.7_run/00_docstring.pyc
+++ b/test/bytecode_3.7_run/00_docstring.pyc
--- a/test/grammar-cover/grammar.sh
+++ b/test/grammar-cover/grammar.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # Remake Python grammar statistics

-typeset -A ALL_VERS=([2.4]=2.4.6 [2.5]=2.5.6 [2.6]=2.6.9 [2.7]=2.7.14 [3.2]=3.2.6 [3.3]=3.3.6 [3.4]=3.4.8 [3.5]=3.5.5 [3.6]=3.6.4)
+typeset -A ALL_VERS=([2.4]=2.4.6 [2.5]=2.5.6 [2.6]=2.6.9 [2.7]=2.7.16 [3.2]=3.2.6 [3.3]=3.3.6 [3.4]=3.4.8 [3.5]=3.5.6 [3.6]=3.6.8, [3.7]=3.7.3)

 if (( $# == 0 )); then
    echo 1>&2 "usage: $0 two-digit-version"
--- a/test/simple_source/stmts/00_docstring.py
+++ b/test/simple_source/stmts/00_docstring.py
@@ -1,4 +1,6 @@
 # uncompyle2 bug was not escaping """ properly
+
+# RUNNABLE!
 r'''func placeholder - with ("""\nstring\n""")'''
 def foo():
    r'''func placeholder - ' and with ("""\nstring\n""")'''
@@ -20,3 +22,22 @@ def baz():
        >>> t.rundict(m1.__dict__, 'rundict_test_pvt')  # None are skipped.
        TestResults(failed=0, attempted=8)
    """
+    assert __doc__ == r'''func placeholder - with ("""\nstring\n""")'''
+    assert foo.__doc__ == r'''func placeholder - ' and with ("""\nstring\n""")'''
+    assert bar.__doc__ == r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """
+    assert baz.__doc__ == \
+    """
+        ...     '''>>> assert 1 == 1
+        ...     '''
+        ... \"""
+        >>> exec test_data in m1.__dict__
+        >>> exec test_data in m2.__dict__
+        >>> m1.__dict__.update({"f2": m2._f, "g2": m2.g, "h2": m2.H})
+
+        Tests that objects outside m1 are excluded:
+        \"""
+        >>> t.rundict(m1.__dict__, 'rundict_test_pvt')  # None are skipped.
+        TestResults(failed=0, attempted=8)
+    """
+
+baz()
--- a/uncompyle6/bin/uncompile.py
+++ b/uncompyle6/bin/uncompile.py
@@ -80,7 +80,7 @@ def main_bin():
    timestampfmt = "# %Y.%m.%d %H:%M:%S %Z"

    try:
-        opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtdrVo:p:',
+        opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtTdrVo:p:',
                                    'help asm compile= grammar linemaps recurse '
                                    'timestamp tree tree+ '
                                    'fragments verify verify-run version '
@@ -114,7 +114,7 @@ def main_bin():
        elif opt in ('--tree', '-t'):
            options['showast'] = True
            options['do_verify'] = None
-        elif opt in ('--tree+',):
+        elif opt in ('--tree+', '-T'):
            options['showast'] = 'Full'
            options['do_verify'] = None
        elif opt in ('--grammar', '-g'):
--- a/uncompyle6/main.py
+++ b/uncompyle6/main.py
@@ -280,6 +280,19 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None,
            sys.stdout.write("\n")
            sys.stderr.write("\nLast file: %s   " % (infile))
            raise
+        except RuntimeError as e:
+            sys.stdout.write("\n%s\n" % str(e))
+            if str(e).startswith('Unsupported Python'):
+                sys.stdout.write("\n")
+                sys.stderr.write("\n# Unsupported bytecode in file %s\n# %s\n" % (infile, e))
+            else:
+                if outfile:
+                    outstream.close()
+                    os.remove(outfile)
+                sys.stdout.write("\n")
+                sys.stderr.write("\nLast file: %s   " % (infile))
+                raise
+
        # except:
        #     failed_files += 1
        #     if current_outfile:
@@ -337,9 +350,9 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None,
                    # mem_usage = __memUsage()
                    print mess, infile
        if current_outfile:
-            sys.stdout.write("%s\r" %
-                             status_msg(do_verify, tot_files, okay_files, failed_files,
-                                        verify_failed_files, do_verify))
+            sys.stdout.write("%s -- %s\r" %
+                             (infile, status_msg(do_verify, tot_files, okay_files, failed_files,
+                                                 verify_failed_files, do_verify)))
            try:
                # FIXME: Something is weird with Pypy here
                sys.stdout.flush()
--- a/uncompyle6/parser.py
+++ b/uncompyle6/parser.py
@@ -801,7 +801,6 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
 if __name__ == '__main__':
    def parse_test(co):
        from uncompyle6 import PYTHON_VERSION, IS_PYPY
-        ast = python_parser('2.7.13', co, showasm=True, is_pypy=True)
        ast = python_parser(PYTHON_VERSION, co, showasm=True, is_pypy=IS_PYPY)
        print(ast)
        return
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -650,10 +650,6 @@ class Python3Parser(PythonParser):
                        # FIXME: Use the attr
                        # so this doesn't run into exponential parsing time.
                        if opname.startswith('BUILD_MAP_UNPACK'):
-                            self.add_unique_rule(rule, opname, token.attr, customize)
-                            rule = 'dict_entry ::= ' + 'expr ' * (token.attr*2)
-                            self.add_unique_rule(rule, opname, token.attr, customize)
-
                            # FIXME: start here. The LHS should be unmap_dict, not dict.
                            # FIXME: really we need a combination of dict_entry-like things.
                            # It just so happens the most common case is not to mix
--- a/uncompyle6/parsers/parse34.py
+++ b/uncompyle6/parsers/parse34.py
@@ -47,7 +47,7 @@ class Python34Parser(Python33Parser):

        # Python 3.4+ optimizes the trailing two JUMPS away

-        # Is this 3.4 only?
+        # This is 3.4 only
        yield_from ::= expr GET_ITER LOAD_CONST YIELD_FROM

        _ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM
@@ -55,6 +55,7 @@ class Python34Parser(Python33Parser):

    def customize_grammar_rules(self, tokens, customize):
        self.remove_rules("""
+        yield_from    ::= expr expr YIELD_FROM
        # 3.4.2 has this. 3.4.4 may now
        # while1stmt ::= SETUP_LOOP l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP
        """)
--- a/uncompyle6/semantics/aligner.py
+++ b/uncompyle6/semantics/aligner.py
@@ -36,7 +36,6 @@ class AligningWalker(SourceWalker, object):
        self.pending_newlines = max(self.pending_newlines, 1)

    def write(self, *data):
-        from trepan.api import debug; debug()
        if (len(data) == 1) and data[0] == self.indent:
            diff = max(self.pending_newlines,
                       self.desired_line_number - self.current_line_number)
--- a/uncompyle6/semantics/helper.py
+++ b/uncompyle6/semantics/helper.py
@@ -99,14 +99,9 @@ def strip_quotes(str):


 def print_docstring(self, indent, docstring):
-    try:
-        if docstring.find('"""') == -1:
-            quote = '"""'
-        else:
-            quote = "'''"
-            docstring = docstring.replace("'''", "\\'''")
-    except:
-        return False
+    quote = '"""'
+    if docstring.find("'''") == -1:
+        quote = "'''"
    self.write(indent)
    if not PYTHON3 and not isinstance(docstring, str):
        # Must be unicode in Python2
@@ -132,18 +127,31 @@ def print_docstring(self, indent, docstring):
        and (docstring[-1] != '"'
             or docstring[-2] == '\t')):
        self.write('r') # raw string
-        # restore backslashes unescaped since raw
+        # Restore backslashes unescaped since raw
        docstring = docstring.replace('\t', '\\')
    else:
        # Escape '"' if it's the last character, so it doesn't
        # ruin the ending triple quote
        if len(docstring) and docstring[-1] == '"':
            docstring = docstring[:-1] + '\\"'
-        # Restore escaped backslashes
+
+        # Escape triple quote when needed
+        if quote == '"""':
+            if self.version > 2.7:
+                replace_str = '\\"""'
+            else:
+                replace_str = '\\"\\"\\"'
+            docstring = docstring.replace(quote, replace_str)
+        else:
+            assert quote == "'''"
+            if self.version > 2.7:
+                replace_str = "\\'''"
+            else:
+                replace_str = "\\'\\'\\'"
+            docstring = docstring.replace(quote, replace_str)
+
        docstring = docstring.replace('\t', '\\\\')
-    # Escape triple quote when needed
-    if quote == '""""':
-        docstring = docstring.replace('"""', '\\"\\"\\"')
+
    lines = docstring.split('\n')
    calculate_indent = maxint
    for line in lines[1:]:
@@ -152,6 +160,7 @@ def print_docstring(self, indent, docstring):
            calculate_indent = min(calculate_indent, len(line) - len(stripped))
    calculate_indent = min(calculate_indent, len(lines[-1]) - len(lines[-1].lstrip()))
    # Remove indentation (first line is special):
+
    trimmed = [lines[0]]
    if calculate_indent < maxint:
        trimmed += [line[calculate_indent:] for line in lines[1:]]
@@ -164,7 +173,12 @@ def print_docstring(self, indent, docstring):
    else:
        self.println(trimmed[0])
        for line in trimmed[1:-1]:
-            self.println( indent, line )
+            if line:
+                self.println( indent, line )
+            else:
+                self.println( "\n\n" )
+                pass
+            pass
        self.println(indent, trimmed[-1], quote)
    return True

--- a/uncompyle6/semantics/pysource.py
+++ b/uncompyle6/semantics/pysource.py
@@ -1100,6 +1100,9 @@ class SourceWalker(GenericASTTraversal, object):
            comp_store = ast[3]

        have_not = False
+
+        # Iterate to find the innermost store
+        # We'll come back to the list iteration below.
        while n in ('list_iter', 'comp_iter'):
            # iterate one nesting deeper
            if self.version == 3.0 and len(n) == 3:
@@ -1109,7 +1112,7 @@ class SourceWalker(GenericASTTraversal, object):
                n = n[0]

            if n in ('list_for', 'comp_for'):
-                if n[2] == 'store':
+                if n[2] == 'store' and not store:
                    store = n[2]
                n = n[3]
            elif n in ('list_if', 'list_if_not', 'comp_if', 'comp_if_not'):
@@ -1153,11 +1156,12 @@ class SourceWalker(GenericASTTraversal, object):
        self.write(' in ')
        self.preorder(node[-3])

+        # Here is where we handle nested list iterations.
        if ast == 'list_comp' and self.version != 3.0:
            list_iter = ast[1]
            assert list_iter == 'list_iter'
-            if list_iter == 'list_for':
-                self.preorder(list_iter[3])
+            if list_iter[0] == 'list_for':
+                self.preorder(list_iter[0][3])
                self.prec = p
                return
            pass