diff --git a/NEWS.md b/NEWS.md index 37e60cdf..9de9b502 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,18 @@ +3.3.2 2019-05-03 Better Friday +============================== + +As before, lots of decomplation bugs fixed. The focus has primarily +been on Python 3.6. We can now parse the entire 3.6.8 Python library +and verify that without an error. The same is true for 3.5.8. A number +of the bugs fixed though are not contained to these versions. In fact +some span back as far as 2.x + +But as before, many more remain in the 3.7 and 3.8 range which will +get addressed in future releases + +Pypy 3.6 support was started. Pypy 3.x detection fixed (via xdis) + + 3.3.1 2019-04-19 Good Friday ========================== diff --git a/admin-tools/pyenv-newer-versions b/admin-tools/pyenv-newer-versions index a27b2e0a..8155cd0c 100644 --- a/admin-tools/pyenv-newer-versions +++ b/admin-tools/pyenv-newer-versions @@ -5,4 +5,4 @@ if [[ $0 == ${BASH_SOURCE[0]} ]] ; then echo "This script should be *sourced* rather than run directly through bash" exit 1 fi -export PYVERSIONS='3.6.8 3.7.2 2.6.9 3.3.7 2.7.15 3.2.6 3.1.5 3.4.8' +export PYVERSIONS='3.6.8 3.7.3 2.6.9 3.3.7 2.7.16 3.2.6 3.1.5 3.4.8' diff --git a/pytest/test_pysource.py b/pytest/test_pysource.py index 71119a86..5d0ee550 100644 --- a/pytest/test_pysource.py +++ b/pytest/test_pysource.py @@ -123,11 +123,17 @@ def test_tables(): "Full entry: %s" % (name, k, arg, typ, entry[arg], type(entry[arg]), entry) ) - assert len(tup) == 2 + assert 2 <= len(tup) <= 3 for j, x in enumerate(tup): - assert isinstance(x, int), ( - "%s[%s][%d][%d] type '%s' is '%s should be an int but is %s. Full entry: %s" % - (name, k, arg, j, typ, x, type(x), entry) + if len(tup) == 3 and j == 1: + assert isinstance(x, str), ( + "%s[%s][%d][%d] type '%s' is '%s should be an string but is %s. Full entry: %s" % + (name, k, arg, j, typ, x, type(x), entry) + ) + else: + assert isinstance(x, int), ( + "%s[%s][%d][%d] type '%s' is '%s should be an int but is %s. Full entry: %s" % + (name, k, arg, j, typ, x, type(x), entry) ) pass arg += 1 diff --git a/test/bytecode_3.6_run/04_subscript.pyc b/test/bytecode_3.6_run/04_subscript.pyc new file mode 100644 index 00000000..6e354680 Binary files /dev/null and b/test/bytecode_3.6_run/04_subscript.pyc differ diff --git a/test/simple_source/bug36/01_fstring.py b/test/simple_source/bug36/01_fstring.py index 1c03b3ae..a15cb516 100644 --- a/test/simple_source/bug36/01_fstring.py +++ b/test/simple_source/bug36/01_fstring.py @@ -30,3 +30,12 @@ chunk2 = 'd' chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \ + b'\r\n' assert chunk == b'3\r\nabc\r\n' + +# From 3.6.8 idlelib/pyshell.py +# Bug was handling ''' +import os +filename = '.' +source = 'foo' +source = (f"__file__ = r'''{os.path.abspath(filename)}'''\n" + + source + "\ndel __file__") +print(source) diff --git a/test/simple_source/expression/04_subscript.py b/test/simple_source/expression/04_subscript.py new file mode 100644 index 00000000..3f2b56ce --- /dev/null +++ b/test/simple_source/expression/04_subscript.py @@ -0,0 +1,15 @@ +# From 3.6.8 idlelib/query.py +# Bug was handling parenthesis around subscript in an assignment. + +# RUNNABLE! +a = {'text': 1} +b = {'text': 3} +for widget, entry, expect in ( + (a, b, 1), + (None, b, 3) + ): + assert (widget or entry)['text'] == expect + (widget or entry)['text'] = 'A' + +assert a['text'] == 'A', "a[text] = %s != 'A'" % a['text'] +assert b['text'] == 'A', "a[text] = %s != 'A'" % b['text'] diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 71edd0e2..ee3214e9 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2018 Rocky Bernstein +# Copyright (c) 2015-2019 Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -587,14 +587,14 @@ class PythonParser(GenericASTBuilder): ## designLists ::= ## Will need to redo semantic actiion - store ::= STORE_FAST - store ::= STORE_NAME - store ::= STORE_GLOBAL - store ::= STORE_DEREF - store ::= expr STORE_ATTR - store ::= store_subscr - store_subscr ::= expr expr STORE_SUBSCR - store ::= unpack + store ::= STORE_FAST + store ::= STORE_NAME + store ::= STORE_GLOBAL + store ::= STORE_DEREF + store ::= expr STORE_ATTR + store ::= store_subscript + store_subscript ::= expr expr STORE_SUBSCR + store ::= unpack ''' diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index dcfe33be..c0ed747d 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -1215,7 +1215,7 @@ class Python3Parser(PythonParser): pass elif lhs == 'while1stmt': - # If there is a fall through to the COME_FROM_LOOP. then this is + # If there is a fall through to the COME_FROM_LOOP, then this is # not a while 1. So the instruction before should either be a # JUMP_BACK or the instruction before should not be the target of a # jump. (Well that last clause i not quite right; that target could be diff --git a/uncompyle6/parsers/parse36.py b/uncompyle6/parsers/parse36.py index ceac64dd..d24eb639 100644 --- a/uncompyle6/parsers/parse36.py +++ b/uncompyle6/parsers/parse36.py @@ -121,9 +121,12 @@ class Python36Parser(Python35Parser): try_except36 ::= SETUP_EXCEPT returns except_handler36 opt_come_from_except try_except36 ::= SETUP_EXCEPT suite_stmts + try_except36 ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + except_handler36 opt_come_from_except # 3.6 omits END_FINALLY sometimes except_handler36 ::= COME_FROM_EXCEPT except_stmts + except_handler36 ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts except_handler ::= jmp_abs COME_FROM_EXCEPT except_stmts stmt ::= tryfinally36 @@ -169,6 +172,7 @@ class Python36Parser(Python35Parser): JUMP_ABSOLUTE END_FINALLY COME_FROM for_block pb_ja else_suite COME_FROM_LOOP + """) self.check_reduce['call_kw'] = 'AST' diff --git a/uncompyle6/scanners/pypy36.py b/uncompyle6/scanners/pypy36.py new file mode 100644 index 00000000..158d8b54 --- /dev/null +++ b/uncompyle6/scanners/pypy36.py @@ -0,0 +1,22 @@ +# Copyright (c) 2019 by Rocky Bernstein +""" +Python PyPy 3.6 decompiler scanner. + +Does some additional massaging of xdis-disassembled instructions to +make things easier for decompilation. +""" + +import uncompyle6.scanners.scanner36 as scan + +# bytecode verification, verify(), uses JUMP_OPS from here +from xdis.opcodes import opcode_35 as opc # is this right? +JUMP_OPs = opc.JUMP_OPS + +# We base this off of 3.5 +class ScannerPyPy36(scan.Scanner36): + def __init__(self, show_asm): + # There are no differences in initialization between + # pypy 3.6 and 3.6 + scan.Scanner36.__init__(self, show_asm, is_pypy=True) + self.version = 3.6 + return diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index 6183e060..ae6691e1 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -27,6 +27,77 @@ else: maxint = sys.maxint +# Operator precidence +# See https://docs.python.org/2/reference/expressions.html +# or https://docs.python.org/3/reference/expressions.html +# for a list. + +# Things at the top of this list below with low-value precidence will +# tend to have parenthesis around them. Things at the bottom +# of the list will tend not to have parenthesis around them. +PRECEDENCE = { + 'list': 0, + 'dict': 0, + 'unary_convert': 0, + 'dict_comp': 0, + 'set_comp': 0, + 'set_comp_expr': 0, + 'list_comp': 0, + 'generator_exp': 0, + + 'attribute': 2, + 'subscript': 2, + 'subscript2': 2, + 'store_subscript': 2, + 'delete_subscr': 2, + 'slice0': 2, + 'slice1': 2, + 'slice2': 2, + 'slice3': 2, + 'buildslice2': 2, + 'buildslice3': 2, + 'call': 2, + + 'BINARY_POWER': 4, + + 'unary_expr': 6, + + 'BINARY_MULTIPLY': 8, + 'BINARY_DIVIDE': 8, + 'BINARY_TRUE_DIVIDE': 8, + 'BINARY_FLOOR_DIVIDE': 8, + 'BINARY_MODULO': 8, + + 'BINARY_ADD': 10, + 'BINARY_SUBTRACT': 10, + + 'BINARY_LSHIFT': 12, + 'BINARY_RSHIFT': 12, + + 'BINARY_AND': 14, + 'BINARY_XOR': 16, + 'BINARY_OR': 18, + + 'compare': 20, + 'unary_not': 22, + 'and': 24, + 'ret_and': 24, + + 'or': 26, + 'ret_or': 26, + + 'conditional': 28, + 'conditional_lamdba': 28, + 'conditional_not_lamdba': 28, + 'conditionalnot': 28, + 'ret_cond': 28, + + '_mklambda': 30, + + 'yield': 101, + 'yield_from': 101 +} + LINE_LENGTH = 80 # Some parse trees created below are used for comparing code @@ -150,15 +221,17 @@ TABLE_DIRECT = { 'DELETE_FAST': ( '%|del %{pattr}\n', ), 'DELETE_NAME': ( '%|del %{pattr}\n', ), 'DELETE_GLOBAL': ( '%|del %{pattr}\n', ), - 'delete_subscr': ( '%|del %c[%c]\n', - (0, 'expr'), (1, 'expr') ), - 'subscript': ( '%c[%p]', - (0, 'expr'), - (1, 100) ), - 'subscript2': ( '%c[%c]', - (0, 'expr'), + 'delete_subscr': ( '%|del %p[%c]\n', + (0, 'expr', PRECEDENCE['subscript']), (1, 'expr') ), + 'subscript': ( '%p[%c]', + (0, 'expr', PRECEDENCE['subscript']), (1, 'expr') ), - 'store_subscr': ( '%c[%c]', 0, 1), + 'subscript2': ( '%p[%c]', + (0, 'expr', PRECEDENCE['subscript']), + (1, 'expr') ), + 'store_subscript': ( '%p[%c]', + (0, 'expr', PRECEDENCE['subscript']), + (1, 'expr') ), 'STORE_FAST': ( '%{pattr}', ), 'STORE_NAME': ( '%{pattr}', ), 'STORE_GLOBAL': ( '%{pattr}', ), @@ -180,12 +253,15 @@ TABLE_DIRECT = { 'list_iter': ( '%c', 0 ), 'list_for': ( ' for %c in %c%c', 2, 0, 3 ), 'list_if': ( ' if %c%c', 0, 2 ), - 'list_if_not': ( ' if not %p%c', (0, 22), 2 ), + 'list_if_not': ( ' if not %p%c', + (0, 'expr', PRECEDENCE['unary_not']), + 2 ), 'lc_body': ( '', ), # ignore when recursing 'comp_iter': ( '%c', 0 ), 'comp_if': ( ' if %c%c', 0, 2 ), - 'comp_if_not': ( ' if not %p%c', (0, 22), 2 ), + 'comp_if_not': ( ' if not %p%c', + (0, 'expr', PRECEDENCE['unary_not']), 2 ), 'comp_body': ( '', ), # ignore when recusing 'set_comp_body': ( '%c', 0 ), 'gen_comp_body': ( '%c', 0 ), @@ -208,8 +284,10 @@ TABLE_DIRECT = { 'conditional': ( '%p if %p else %p', (2, 27), (0, 27), (4, 27) ), 'conditional_true': ( '%p if 1 else %p', (0, 27), (2, 27) ), 'ret_cond': ( '%p if %p else %p', (2, 27), (0, 27), (-1, 27) ), - 'conditional_not': ( '%p if not %p else %p', (2, 27), (0, 22), (4, 27) ), - 'ret_cond_not': ( '%p if not %p else %p', (2, 27), (0, 22), (-1, 27) ), + 'conditional_not': ( '%p if not %p else %p', + (2, 27), + (0, "expr", PRECEDENCE['unary_not']), + (4, 27) ), 'conditional_lambda': ( '%c if %c else %c', (2, 'expr'), 0, 4 ), @@ -257,7 +335,8 @@ TABLE_DIRECT = { 'ifstmt': ( '%|if %c:\n%+%c%-', 0, 1 ), 'iflaststmt': ( '%|if %c:\n%+%c%-', 0, 1 ), 'iflaststmtl': ( '%|if %c:\n%+%c%-', 0, 1 ), - 'testtrue': ( 'not %p', (0, 22) ), + 'testtrue': ( 'not %p', + (0, PRECEDENCE['unary_not']) ), 'ifelsestmt': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelsestmtc': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), @@ -337,76 +416,6 @@ MAP = { 'exprlist': MAP_R0, } -# Operator precidence -# See https://docs.python.org/2/reference/expressions.html -# or https://docs.python.org/3/reference/expressions.html -# for a list. - -# Things at the top of this list below with low-value precidence will -# tend to have parenthesis around them. Things at the bottom -# of the list will tend not to have parenthesis around them. -PRECEDENCE = { - 'list': 0, - 'dict': 0, - 'unary_convert': 0, - 'dict_comp': 0, - 'set_comp': 0, - 'set_comp_expr': 0, - 'list_comp': 0, - 'generator_exp': 0, - - 'attribute': 2, - 'subscript': 2, - 'subscript2': 2, - 'slice0': 2, - 'slice1': 2, - 'slice2': 2, - 'slice3': 2, - 'buildslice2': 2, - 'buildslice3': 2, - 'call': 2, - - 'BINARY_POWER': 4, - - 'unary_expr': 6, - - 'BINARY_MULTIPLY': 8, - 'BINARY_DIVIDE': 8, - 'BINARY_TRUE_DIVIDE': 8, - 'BINARY_FLOOR_DIVIDE': 8, - 'BINARY_MODULO': 8, - - 'BINARY_ADD': 10, - 'BINARY_SUBTRACT': 10, - - 'BINARY_LSHIFT': 12, - 'BINARY_RSHIFT': 12, - - 'BINARY_AND': 14, - 'BINARY_XOR': 16, - 'BINARY_OR': 18, - - 'compare': 20, - 'unary_not': 22, - 'and': 24, - 'ret_and': 24, - - 'or': 26, - 'ret_or': 26, - - 'conditional': 28, - 'conditional_lamdba': 28, - 'conditional_not_lamdba': 28, - 'conditionalnot': 28, - 'ret_cond': 28, - 'ret_cond_not': 28, - - '_mklambda': 30, - - 'yield': 101, - 'yield_from': 101 -} - ASSIGN_TUPLE_PARAM = lambda param_name: \ SyntaxTree('expr', [ Token('LOAD_FAST', pattr=param_name) ]) diff --git a/uncompyle6/semantics/customize36.py b/uncompyle6/semantics/customize36.py index 8e144df9..79d185a2 100644 --- a/uncompyle6/semantics/customize36.py +++ b/uncompyle6/semantics/customize36.py @@ -21,6 +21,11 @@ from uncompyle6.semantics.helper import flatten_list from uncompyle6.semantics.consts import ( INDENT_PER_LEVEL, PRECEDENCE, TABLE_DIRECT, TABLE_R) +def escape_format(s): + return s.replace('\r', '\\r').\ + replace('\n', '\\n').\ + replace("'''", '"""') + ####################### # Python 3.6+ Changes # ####################### @@ -38,20 +43,20 @@ def customize_for_version36(self, version): PRECEDENCE['unmap_dict'] = 0 TABLE_DIRECT.update({ - 'tryfinally36': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', - (1, 'returns'), 3 ), - 'fstring_expr': ( "{%c%{conversion}}", - (0, 'expr') ), + 'tryfinally36': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', + (1, 'returns'), 3 ), + 'fstring_expr': ( "{%c%{conversion}}", + (0, 'expr') ), # FIXME: the below assumes the format strings # don't have ''' in them. Fix this properly - 'fstring_single': ( "f'''{%c%{conversion}}'''", 0), + 'fstring_single': ( "f'''{%c%{conversion}}'''", 0), 'formatted_value_attr': ( "f'''{%c%{conversion}}%{string}'''", (0, 'expr')), - 'fstring_multi': ( "f'''%c'''", 0), - 'func_args36': ( "%c(**", 0), - 'try_except36': ( '%|try:\n%+%c%-%c\n\n', 1, 2 ), - 'except_return': ( '%|except:\n%+%c%-', 3 ), - 'unpack_list': ( '*%c', (0, 'list') ), + 'fstring_multi': ( "f'''%c'''", 0), + 'func_args36': ( "%c(**", 0), + 'try_except36': ( '%|try:\n%+%c%-%c\n\n', 1, -2 ), + 'except_return': ( '%|except:\n%+%c%-', 3 ), + 'unpack_list': ( '*%c', (0, 'list') ), 'tryfinally_return_stmt': ( '%|try:\n%+%c%-%|finally:\n%+%|return%-\n\n', 1 ), @@ -347,7 +352,11 @@ def customize_for_version36(self, version): def n_formatted_value(node): if node[0] == 'LOAD_CONST': - self.write(node[0].attr) + value = node[0].attr + if isinstance(value, tuple): + self.write(node[0].attr) + else: + self.write(escape_format(node[0].attr)) self.prune() else: self.default(node) @@ -375,7 +384,7 @@ def customize_for_version36(self, version): f_conversion(node) fmt_node = node.data[3] if fmt_node == 'expr' and fmt_node[0] == 'LOAD_CONST': - node.string = fmt_node[0].attr.replace('\r', '\\r').replace('\n', '\\n') + node.string = escape_format(fmt_node[0].attr) else: node.string = fmt_node diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index b40993e8..2bc5d9a5 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -88,7 +88,8 @@ Python. # # %p like %c but sets the operator precedence. # Its argument then is a tuple indicating the node -# index and the precidence value, an integer. +# index and the precedence value, an integer. If 3 items are given, +# the second item is the nonterminal name and the precedence is given last. # # %C evaluate children recursively, with sibling children separated by the # given string. It needs a 3-tuple: a starting node, the maximimum @@ -616,7 +617,7 @@ class SourceWalker(GenericASTTraversal, object): node[-2][0].kind = 'build_tuple2' self.default(node) - n_store_subscr = n_subscript = n_delete_subscr + n_store_subscript = n_subscript = n_delete_subscr # Note: this node is only in Python 2.x # FIXME: figure out how to get this into customization @@ -1873,7 +1874,18 @@ class SourceWalker(GenericASTTraversal, object): arg += 1 elif typ == 'p': p = self.prec - (index, self.prec) = entry[arg] + tup = entry[arg] + assert isinstance(tup, tuple) + if len(tup) == 3: + (index, nonterm_name, self.prec) = tup + assert node[index] == nonterm_name, ( + "at %s[%d], expected '%s' node; got '%s'" % ( + node.kind, arg, nonterm_name, node[index].kind) + ) + else: + assert len(tup) == 2 + (index, self.prec) = entry[arg] + self.preorder(node[index]) self.prec = p arg += 1 diff --git a/uncompyle6/version.py b/uncompyle6/version.py index efedabf1..36971cbe 100644 --- a/uncompyle6/version.py +++ b/uncompyle6/version.py @@ -12,4 +12,4 @@ # along with this program. If not, see . # This file is suitable for sourcing inside bash as # well as importing into Python -VERSION='3.3.1' # noqa +VERSION='3.3.2' # noqa