From 0dfbb27af5033dce31ba0d922d6964ec7781638e Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 12 Oct 2017 20:36:24 -0400 Subject: [PATCH 01/41] Administrivia - generalize shell code --- admin-tools/make-dist-newer.sh | 2 +- admin-tools/make-dist-older.sh | 4 ++-- test/bytecode_2.7/05_abc_class.pyc | Bin 502 -> 568 bytes test/bytecode_2.7/06_setif_comprehension.pyc | Bin 393 -> 439 bytes 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/admin-tools/make-dist-newer.sh b/admin-tools/make-dist-newer.sh index 273d1dd1..ca403b88 100755 --- a/admin-tools/make-dist-newer.sh +++ b/admin-tools/make-dist-newer.sh @@ -32,7 +32,7 @@ for pyversion in $PYVERSIONS; do first_two=$(echo $pyversion | cut -d'.' -f 1-2 | sed -e 's/\.//') rm -fr build python setup.py bdist_egg bdist_wheel - mv -v dist/uncompyle6-$VERSION-{py2.py3,py$first_two}-none-any.whl + mv -v dist/${PACKAGE}-$VERSION-{py2.py3,py$first_two}-none-any.whl done python ./setup.py sdist diff --git a/admin-tools/make-dist-older.sh b/admin-tools/make-dist-older.sh index 83e72ac7..aee0ce92 100755 --- a/admin-tools/make-dist-older.sh +++ b/admin-tools/make-dist-older.sh @@ -33,7 +33,7 @@ done # Tarballs can get created from the above setup, so make sure to remove them since we want # the tarball from master. -tarball=dist/uncompyle6-$VERSION-tar.gz +tarball=dist/${PACKAGE}-$VERSION-tar.gz if [[ -f $tarball ]]; then - rm -v dist/uncompyle6-$VERSION-tar.gz + rm -v dist/${PACKAGE}-$VERSION-tar.gz fi diff --git a/test/bytecode_2.7/05_abc_class.pyc b/test/bytecode_2.7/05_abc_class.pyc index b102a340d9d2d8ae25b273ea9e55bb6b927fd049..936ba1b0baa15a71efc8e19d8aa798078b6c5f88 100644 GIT binary patch delta 77 zcmeyyyn{uI`7oYIcofz%#WCkc;2GJA6d>EM~ChIaXPn;;tev+A?hLPda#LWeaC6oU$ os!sM~3}g*r2dbNVfYC-8%qrmka&2<+Q%ZAE?Kpw#VjyM!0Ek}|*#H0l diff --git a/test/bytecode_2.7/06_setif_comprehension.pyc b/test/bytecode_2.7/06_setif_comprehension.pyc index a5d2ff1a912424040f662239fd413f7ec494f7af..22e70f45091560722105097eeb692bc7211ec7ab 100644 GIT binary patch delta 76 zcmeBV-p;Je{F#@l&Mhk(knkdeZ`(89ow%D`}vk>S)tZ7)WxiDkKrC6jX* TjVFsUhO!2+0ToT2&nN)^6crAn From 2fc38866932d5e255b676ec4684f2426bfa5ed22 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 13 Oct 2017 07:52:56 -0400 Subject: [PATCH 02/41] Small changes --- HISTORY.md | 17 ++++++++++++++--- uncompyle6/semantics/pysource.py | 6 +++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index cadcc7d3..d8871c82 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -98,9 +98,20 @@ so. Then hamled made a few commits earler on, while Eike Siewertsen made a few commits later on. But mostly wibiti, and Guenther Starnberger got the code to where uncompyle2 was around 2012. -In `uncompyle`, decompilation of python bytecode 2.5 & 2.6 is done by -transforming the byte code into a pseudo-2.7 Python bytecode and is -based on code from Eloi Vanderbeken. +While John Aycock and Hartmut Goebel were well versed in compiler +technology, those that have come afterwards don't seem to have been as +facile in it. Furthermore, documentation or guidance on how the +decompiler code worked, comparison to a conventional compiler +pipeline, how to add new constructs, or debug grammars was weak. Some +of the grammar tracing and error reporting was a bit weak as well. + +Given this, perhaps it is not surprising that subsequent changes +tended to shy away from using the built-in compiler technology +mechanisms and addressed problems and extensions by some other means. + +Specifically, in `uncompyle`, decompilation of python bytecode 2.5 & 2.6 +is done by transforming the byte code into a pseudo-2.7 Python +bytecode and is based on code from Eloi Vanderbeken. This project, `uncompyle6`, abandons that approach for various reasons. However the main reason is that we need offsets in fragment diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index a0a2772a..894bc979 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -1828,7 +1828,11 @@ class SourceWalker(GenericASTTraversal, object): specifications such as %c, %C, and so on. """ - # self.println("----> ", startnode.kind, ', ', entry[0]) + # print("-----") + # print(startnode) + # print(entry[0]) + # print('======') + fmt = entry[0] arg = 1 i = 0 From 9dd881fae1762838067760701965b4c787f93dfe Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 13 Oct 2017 11:16:10 -0400 Subject: [PATCH 03/41] Start allowing node names in template engine These are now used to assert we have the right node type. Simplify import_from --- pytest/test_pysource.py | 12 ++++- uncompyle6/parser.py | 12 ++--- uncompyle6/parsers/parse24.py | 2 +- uncompyle6/semantics/consts.py | 3 +- uncompyle6/semantics/fragments.py | 12 ++++- uncompyle6/semantics/pysource.py | 86 ++++++++++++++++--------------- 6 files changed, 74 insertions(+), 53 deletions(-) diff --git a/pytest/test_pysource.py b/pytest/test_pysource.py index 09955f77..3373cbce 100644 --- a/pytest/test_pysource.py +++ b/pytest/test_pysource.py @@ -56,10 +56,18 @@ def test_tables(): elif typ in frozenset(['c', 'p', 'P', 'C', 'D']): # One arg - should be int or tuple of int if typ == 'c': - assert isinstance(entry[arg], int), ( + item = entry[arg] + if isinstance(item, tuple): + assert isinstance(item[1], str), ( + "%s[%s][%d] kind %s is '%s' should be str but is %s. " + "Full entry: %s" % + (name, k, arg, typ, item[1], type(item[1]), entry) + ) + item = item[0] + assert isinstance(item, int), ( "%s[%s][%d] kind %s is '%s' should be an int but is %s. " "Full entry: %s" % - (name, k, arg, typ, entry[arg], type(entry[arg]), entry) + (name, k, arg, typ, item, type(item), entry) ) elif typ in frozenset(['C', 'D']): tup = entry[arg] diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index c66c13c2..2a81acfd 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -394,15 +394,15 @@ class PythonParser(GenericASTBuilder): stmt ::= importstar stmt ::= importmultiple - importlist2 ::= importlist2 import_as - importlist2 ::= import_as - import_as ::= IMPORT_NAME designator - import_as ::= IMPORT_NAME load_attrs designator - import_as ::= IMPORT_FROM designator + importlist ::= importlist import_as + importlist ::= import_as + import_as ::= IMPORT_NAME designator + import_as ::= IMPORT_NAME load_attrs designator + import_as ::= IMPORT_FROM designator importstmt ::= LOAD_CONST LOAD_CONST import_as importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR - importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP + importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist POP_TOP importmultiple ::= LOAD_CONST LOAD_CONST import_as imports_cont imports_cont ::= imports_cont import_cont diff --git a/uncompyle6/parsers/parse24.py b/uncompyle6/parsers/parse24.py index 98341947..3fdb00ad 100644 --- a/uncompyle6/parsers/parse24.py +++ b/uncompyle6/parsers/parse24.py @@ -27,7 +27,7 @@ class Python24Parser(Python25Parser): # keep positions similar to simplify semantic actions importstmt ::= filler LOAD_CONST import_as - importfrom ::= filler LOAD_CONST IMPORT_NAME importlist2 POP_TOP + importfrom ::= filler LOAD_CONST IMPORT_NAME importlist POP_TOP importstar ::= filler LOAD_CONST IMPORT_NAME IMPORT_STAR importmultiple ::= filler LOAD_CONST import_as imports_cont diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index e85863af..bbea3d27 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -258,7 +258,8 @@ TABLE_DIRECT = { 'kv2': ( '%c: %c', 1, 2 ), 'mapexpr': ( '{%[1]C}', (0, maxint, ', ') ), 'importstmt': ( '%|import %c\n', 2), - 'importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), + 'importfrom': ( '%|from %[2]{pattr} import %c\n', + (3, 'importlist') ), 'importstar': ( '%|from %[2]{pattr} import *\n', ), } diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 5d453c22..2010f4f8 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -1510,7 +1510,17 @@ class FragmentsWalker(pysource.SourceWalker, object): arg += 1 elif typ == 'c': start = len(self.f.getvalue()) - self.preorder(node[entry[arg]]) + + index = entry[arg] + if isinstance(index, tuple): + assert node[index[0]] == index[1], ( + "at %s[%d], %s vs %s" % ( + node.kind, arg, node[index[0]].kind, index[1]) + ) + index = index[0] + if isinstance(index, int): + self.preorder(node[index]) + finish = len(self.f.getvalue()) # FIXME rocky: figure out how to get this to be table driven diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 894bc979..245f8ca1 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -68,7 +68,10 @@ Python. # Escapes in the format string are: # # %c evaluate the node recursively. Its argument is a single -# integer representing a node index. +# integer or tuple representing a node index. +# If a tuple is given, the first item is the node index while +# the second item is a string giving the node/noterminal name. +# This name will be checked at runtime against the node type. # # %p like %c but sets the operator precedence. # Its argument then is a tuple indicating the node @@ -131,7 +134,7 @@ from uncompyle6.semantics.consts import ( LINE_LENGTH, RETURN_LOCALS, NONE, RETURN_NONE, PASS, ASSIGN_DOC_STRING, NAME_MODULE, TAB, INDENT_PER_LEVEL, TABLE_R, TABLE_DIRECT, MAP_DIRECT, - MAP, PRECEDENCE, ASSIGN_TUPLE_PARAM, escape, maxint, minint) + MAP, PRECEDENCE, ASSIGN_TUPLE_PARAM, escape, minint) from uncompyle6.show import ( @@ -278,47 +281,39 @@ class SourceWalker(GenericASTTraversal, object): 'DELETE_DEREF': ( '%{pattr}', 0 ), }) - if version < 2.0: - TABLE_DIRECT.update({ - 'importlist': ( '%C', (0, maxint, ', ') ), - }) - else: - TABLE_DIRECT.update({ - 'importlist2': ( '%C', (0, maxint, ', ') ), - }) - if version <= 2.4: - if version == 2.3: - TABLE_DIRECT.update({ - 'if1_stmt': ( '%|if 1\n%+%c%-', 5 ) - }) - - global NAME_MODULE - NAME_MODULE = AST('stmt', - [ AST('assign', - [ AST('expr', - [Token('LOAD_GLOBAL', pattr='__name__', - offset=0, has_arg=True)]), - AST('designator', - [ Token('STORE_NAME', pattr='__module__', - offset=3, has_arg=True)]) - ])]) - pass - if version <= 2.3: + if version <= 2.4: + if version == 2.3: TABLE_DIRECT.update({ - 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 4 ) + 'if1_stmt': ( '%|if 1\n%+%c%-', 5 ) }) - elif version >= 2.5: - ######################## - # Import style for 2.5+ - ######################## - TABLE_DIRECT.update({ - 'importmultiple': ( '%|import %c%c\n', 2, 3 ), - 'import_cont' : ( ', %c', 2 ), - # With/as is allowed as "from future" thing in 2.5 - 'withstmt': ( '%|with %c:\n%+%c%-', 0, 3), - 'withasstmt': ( '%|with %c as %c:\n%+%c%-', 0, 2, 3), - }) + global NAME_MODULE + NAME_MODULE = AST('stmt', + [ AST('assign', + [ AST('expr', + [Token('LOAD_GLOBAL', pattr='__name__', + offset=0, has_arg=True)]), + AST('designator', + [ Token('STORE_NAME', pattr='__module__', + offset=3, has_arg=True)]) + ])]) + pass + if version <= 2.3: + TABLE_DIRECT.update({ + 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 4 ) + }) + + elif version >= 2.5: + ######################## + # Import style for 2.5+ + ######################## + TABLE_DIRECT.update({ + 'importmultiple': ( '%|import %c%c\n', 2, 3 ), + 'import_cont' : ( ', %c', 2 ), + # With/as is allowed as "from future" thing in 2.5 + 'withstmt': ( '%|with %c:\n%+%c%-', 0, 3), + 'withasstmt': ( '%|with %c as %c:\n%+%c%-', 0, 2, 3), + }) ######################################## # Python 2.6+ @@ -1864,8 +1859,15 @@ class SourceWalker(GenericASTTraversal, object): node[0].attr == 1): self.write(',') elif typ == 'c': - entry_node = node[entry[arg]] - self.preorder(entry_node) + index = entry[arg] + if isinstance(index, tuple): + assert node[index[0]] == index[1], ( + "at %s[%d], %s vs %s" % ( + node.kind, arg, node[index[0]].kind, index[1]) + ) + index = index[0] + if isinstance(index, int): + self.preorder(node[index]) arg += 1 elif typ == 'p': p = self.prec From 03d1c48088b6fd4082536a5f9045221257d1588c Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 13 Oct 2017 11:35:17 -0400 Subject: [PATCH 04/41] More node checking in tables --- uncompyle6/parser.py | 35 +++++------ uncompyle6/semantics/consts.py | 112 ++++++++++++++++++--------------- 2 files changed, 76 insertions(+), 71 deletions(-) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 2a81acfd..5939eb0c 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -474,27 +474,24 @@ class PythonParser(GenericASTBuilder): expr ::= buildslice3 expr ::= yield - # Possibly Python < 2.3 - # expr ::= SET_LINENO - binary_expr ::= expr expr binary_op - binary_op ::= BINARY_ADD - binary_op ::= BINARY_MULTIPLY - binary_op ::= BINARY_AND - binary_op ::= BINARY_OR - binary_op ::= BINARY_XOR - binary_op ::= BINARY_SUBTRACT - binary_op ::= BINARY_TRUE_DIVIDE - binary_op ::= BINARY_FLOOR_DIVIDE - binary_op ::= BINARY_MODULO - binary_op ::= BINARY_LSHIFT - binary_op ::= BINARY_RSHIFT - binary_op ::= BINARY_POWER + binary_op ::= BINARY_ADD + binary_op ::= BINARY_MULTIPLY + binary_op ::= BINARY_AND + binary_op ::= BINARY_OR + binary_op ::= BINARY_XOR + binary_op ::= BINARY_SUBTRACT + binary_op ::= BINARY_TRUE_DIVIDE + binary_op ::= BINARY_FLOOR_DIVIDE + binary_op ::= BINARY_MODULO + binary_op ::= BINARY_LSHIFT + binary_op ::= BINARY_RSHIFT + binary_op ::= BINARY_POWER - unary_expr ::= expr unary_op - unary_op ::= UNARY_POSITIVE - unary_op ::= UNARY_NEGATIVE - unary_op ::= UNARY_INVERT + unary_expr ::= expr unary_op + unary_op ::= UNARY_POSITIVE + unary_op ::= UNARY_NEGATIVE + unary_op ::= UNARY_INVERT unary_not ::= expr UNARY_NOT diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index bbea3d27..6041eddb 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -74,64 +74,72 @@ TABLE_DIRECT = { 'BINARY_MATRIX_MULTIPLY': ( '@' ,), 'BINARY_TRUE_DIVIDE': ( '/' ,), # Not in <= 2.1 'BINARY_FLOOR_DIVIDE': ( '//' ,), - 'BINARY_MODULO': ( '%%',), - 'BINARY_POWER': ( '**',), - 'BINARY_LSHIFT': ( '<<',), - 'BINARY_RSHIFT': ( '>>',), - 'BINARY_AND': ( '&' ,), - 'BINARY_OR': ( '|' ,), - 'BINARY_XOR': ( '^' ,), - 'INPLACE_ADD': ( '+=' ,), - 'INPLACE_SUBTRACT': ( '-=' ,), - 'INPLACE_MULTIPLY': ( '*=' ,), + 'BINARY_MODULO': ( '%%',), + 'BINARY_POWER': ( '**',), + 'BINARY_LSHIFT': ( '<<',), + 'BINARY_RSHIFT': ( '>>',), + 'BINARY_AND': ( '&' ,), + 'BINARY_OR': ( '|' ,), + 'BINARY_XOR': ( '^' ,), + 'INPLACE_ADD': ( '+=' ,), + 'INPLACE_SUBTRACT': ( '-=' ,), + 'INPLACE_MULTIPLY': ( '*=' ,), 'INPLACE_MATRIX_MULTIPLY': ( '@=' ,), - 'INPLACE_DIVIDE': ( '/=' ,), + 'INPLACE_DIVIDE': ( '/=' ,), 'INPLACE_TRUE_DIVIDE': ( '/=' ,), # Not in <= 2.1; 2.6 generates INPLACE_DIVIDE only? 'INPLACE_FLOOR_DIVIDE': ( '//=' ,), - 'INPLACE_MODULO': ( '%%=',), - 'INPLACE_POWER': ( '**=',), - 'INPLACE_LSHIFT': ( '<<=',), - 'INPLACE_RSHIFT': ( '>>=',), - 'INPLACE_AND': ( '&=' ,), - 'INPLACE_OR': ( '|=' ,), - 'INPLACE_XOR': ( '^=' ,), - 'binary_expr': ( '%c %c %c', 0, -1, 1 ), + 'INPLACE_MODULO': ( '%%=',), + 'INPLACE_POWER': ( '**=',), + 'INPLACE_LSHIFT': ( '<<=',), + 'INPLACE_RSHIFT': ( '>>=',), + 'INPLACE_AND': ( '&=' ,), + 'INPLACE_OR': ( '|=' ,), + 'INPLACE_XOR': ( '^=' ,), + 'binary_expr': ( '%c %c %c', 0, + (-1, 'binary_op'), + ( 1, 'expr' ) ), - 'UNARY_POSITIVE': ( '+',), - 'UNARY_NEGATIVE': ( '-',), - 'UNARY_INVERT': ( '~'), - 'unary_expr': ( '%c%c', 1, 0), + 'UNARY_POSITIVE': ( '+',), + 'UNARY_NEGATIVE': ( '-',), + 'UNARY_INVERT': ( '~'), + 'unary_expr': ( '%c%c', + (1, 'unary_op'), + (0, 'expr') ), - 'unary_not': ( 'not %c', 0 ), - 'unary_convert': ( '`%c`', 0 ), - 'get_iter': ( 'iter(%c)', 0 ), - 'slice0': ( '%c[:]', 0 ), - 'slice1': ( '%c[%p:]', 0, (1, 100) ), - 'slice2': ( '%c[:%p]', 0, (1, 100) ), - 'slice3': ( '%c[%p:%p]', 0, (1, 100), (2, 100) ), + 'unary_not': ( 'not %c', + (0, 'expr' ) ), + 'unary_convert': ( '`%c`', + (0, 'expr' ), ), + 'get_iter': ( 'iter(%c)', 0 ), + 'slice0': ( '%c[:]', 0 ), + 'slice1': ( '%c[%p:]', + 0, (1, 100) ), + 'slice2': ( '%c[:%p]', + 0, (1, 100) ), + 'slice3': ( '%c[%p:%p]', + 0, (1, 100), (2, 100) ), - 'IMPORT_FROM': ( '%{pattr}', ), - 'load_attr': ( '%c.%[1]{pattr}', 0), - 'LOAD_FAST': ( '%{pattr}', ), - 'LOAD_NAME': ( '%{pattr}', ), - 'LOAD_CLASSNAME': ( '%{pattr}', ), - 'LOAD_GLOBAL': ( '%{pattr}', ), - 'LOAD_DEREF': ( '%{pattr}', ), - 'LOAD_LOCALS': ( 'locals()', ), - 'LOAD_ASSERT': ( '%{pattr}', ), -# 'LOAD_CONST': ( '%{pattr}', ), # handled by n_LOAD_CONST - 'DELETE_FAST': ( '%|del %{pattr}\n', ), - 'DELETE_NAME': ( '%|del %{pattr}\n', ), - 'DELETE_GLOBAL': ( '%|del %{pattr}\n', ), - 'delete_subscr': ( '%|del %c[%c]\n', 0, 1,), - 'binary_subscr': ( '%c[%p]', 0, (1, 100)), - 'binary_subscr2': ( '%c[%p]', 0, (1, 100)), - 'store_subscr': ( '%c[%c]', 0, 1), - 'STORE_FAST': ( '%{pattr}', ), - 'STORE_NAME': ( '%{pattr}', ), - 'STORE_GLOBAL': ( '%{pattr}', ), - 'STORE_DEREF': ( '%{pattr}', ), - 'unpack': ( '%C%,', (1, maxint, ', ') ), + 'IMPORT_FROM': ( '%{pattr}', ), + 'load_attr': ( '%c.%[1]{pattr}', 0), + 'LOAD_FAST': ( '%{pattr}', ), + 'LOAD_NAME': ( '%{pattr}', ), + 'LOAD_CLASSNAME': ( '%{pattr}', ), + 'LOAD_GLOBAL': ( '%{pattr}', ), + 'LOAD_DEREF': ( '%{pattr}', ), + 'LOAD_LOCALS': ( 'locals()', ), + 'LOAD_ASSERT': ( '%{pattr}', ), + 'DELETE_FAST': ( '%|del %{pattr}\n', ), + 'DELETE_NAME': ( '%|del %{pattr}\n', ), + 'DELETE_GLOBAL': ( '%|del %{pattr}\n', ), + 'delete_subscr': ( '%|del %c[%c]\n', 0, 1,), + 'binary_subscr': ( '%c[%p]', 0, (1, 100)), + 'binary_subscr2': ( '%c[%p]', 0, (1, 100)), + 'store_subscr': ( '%c[%c]', 0, 1), + 'STORE_FAST': ( '%{pattr}', ), + 'STORE_NAME': ( '%{pattr}', ), + 'STORE_GLOBAL': ( '%{pattr}', ), + 'STORE_DEREF': ( '%{pattr}', ), + 'unpack': ( '%C%,', (1, maxint, ', ') ), # This nonterminal we create on the fly in semantic routines 'unpack_w_parens': ( '(%C%,)', (1, maxint, ', ') ), From 1761ba2581d573b0d7f2d2c4114f59d8c289d342 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 13 Oct 2017 15:43:41 -0400 Subject: [PATCH 05/41] Tag more semantic actions with nonterminals --- test/Makefile | 4 ++++ uncompyle6/semantics/consts.py | 17 ++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/test/Makefile b/test/Makefile index 97f66f6a..1e233a85 100644 --- a/test/Makefile +++ b/test/Makefile @@ -49,6 +49,10 @@ check-3.5: check-bytecode check-3.6: check-bytecode $(PYTHON) test_pythonlib.py --bytecode-3.6 --weak-verify $(COMPILE) +# FIXME +#: this is called when running under pypy3.5-5.8.0 +5.8: + #: Check deparsing only, but from a different Python version check-disasm: $(PYTHON) dis-compare.py diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index 6041eddb..55343656 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -67,10 +67,10 @@ TABLE_R0 = { } TABLE_DIRECT = { - 'BINARY_ADD': ( '+' ,), - 'BINARY_SUBTRACT': ( '-' ,), - 'BINARY_MULTIPLY': ( '*' ,), - 'BINARY_DIVIDE': ( '/' ,), + 'BINARY_ADD': ( '+' ,), + 'BINARY_SUBTRACT': ( '-' ,), + 'BINARY_MULTIPLY': ( '*' ,), + 'BINARY_DIVIDE': ( '/' ,), 'BINARY_MATRIX_MULTIPLY': ( '@' ,), 'BINARY_TRUE_DIVIDE': ( '/' ,), # Not in <= 2.1 'BINARY_FLOOR_DIVIDE': ( '//' ,), @@ -110,10 +110,13 @@ TABLE_DIRECT = { (0, 'expr' ) ), 'unary_convert': ( '`%c`', (0, 'expr' ), ), - 'get_iter': ( 'iter(%c)', 0 ), - 'slice0': ( '%c[:]', 0 ), + 'get_iter': ( 'iter(%c)', + (0, 'expr'), ), + 'slice0': ( '%c[:]', + (0, 'expr'), ), 'slice1': ( '%c[%p:]', - 0, (1, 100) ), + (0, 'expr'), + (1, 100) ), 'slice2': ( '%c[:%p]', 0, (1, 100) ), 'slice3': ( '%c[%p:%p]', From 5e7632c33e3e5793bd21dd2a3572f721c5fcd85a Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 24 Oct 2017 22:56:23 -0400 Subject: [PATCH 06/41] Bump uncompyle. Pypy 5.8.0-beta tolerance --- Makefile | 4 ++-- __pkginfo__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 736e7389..3cd37b33 100644 --- a/Makefile +++ b/Makefile @@ -42,9 +42,9 @@ check-3.7: pytest check-2.6: $(MAKE) -C test $@ -#:PyPy 2.6.1 or PyPy 5.0.1 +#:PyPy 2.6.1 PyPy 5.0.1, or PyPy 5.8.0-beta0 # Skip for now -2.6 5.0 5.3: +2.6 5.0 5.3 5.8: #:PyPy pypy3-2.4.0 Python 3: pypy-3.2 2.4: diff --git a/__pkginfo__.py b/__pkginfo__.py index d341f282..c9b1bc7c 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -39,7 +39,7 @@ entry_points = { 'pydisassemble=uncompyle6.bin.pydisassemble:main', ]} ftp_url = None -install_requires = ['spark-parser >= 1.7.0, < 1.8.0', +install_requires = ['spark-parser >= 1.7.1, < 1.8.0', 'xdis >= 3.6.0, < 3.7.0', 'six'] license = 'MIT' mailing_list = 'python-debugger@googlegroups.com' From 5df09540b5af86e36d527d471f4048249c5cf9e7 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 29 Oct 2017 11:46:28 -0400 Subject: [PATCH 07/41] Python 3.6-inspired instruction size cleanup Revise and generalize for Python 3.6+ instructions vs < 3.6 instuctions. Used more of the generalized methods in xdis and remove some (but not all) of the magic numbers. This is a lot of changes, but not all of the refactoring needed. Much crap still remains. Also, there are still bugs in handling 3.6 bytecodes. --- uncompyle6/scanner.py | 33 ++++++++- uncompyle6/scanners/scanner2.py | 25 ++++--- uncompyle6/scanners/scanner3.py | 116 +++++++++++++++++-------------- uncompyle6/scanners/scanner30.py | 46 ++++++------ 4 files changed, 131 insertions(+), 89 deletions(-) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 58e60863..1347762d 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -18,6 +18,7 @@ from uncompyle6 import PYTHON3, IS_PYPY from uncompyle6.scanners.tok import Token from xdis.bytecode import op_size from xdis.magics import py_str2float +from xdis.util import code2num # The byte code versions we support PYTHON_VERSIONS = (1.5, @@ -85,7 +86,8 @@ class Scanner(object): return True if self.code[offset] != self.opc.JUMP_ABSOLUTE: return False - return offset < self.get_target(offset) + # FIXME 0 isn't always correct + return offset < self.get_target(offset, 0) def get_target(self, pos, op=None): if op is None: @@ -95,6 +97,10 @@ class Scanner(object): target += pos + 3 return target + # FIXME: the below can be removed after xdis version 3.6.1 has been released + def extended_arg_val(self, val): + return val << self.opc.EXTENDED_ARG_SHIFT + def get_argument(self, pos): arg = self.code[pos+1] + self.code[pos+2] * 256 return arg @@ -169,13 +175,20 @@ class Scanner(object): result_offset = None current_distance = len(code) + extended_arg = 0 for offset in self.op_range(start, end): op = code[offset] + + if op == self.opc.EXTENDED_ARG: + arg = code2num(code, offset+1) | extended_arg + extended_arg = self.extended_arg_val(arg) + continue + if op in instr: if target is None: result_offset = offset else: - dest = self.get_target(offset) + dest = self.get_target(offset, extended_arg) if dest == target: current_distance = 0 result_offset = offset @@ -204,17 +217,31 @@ class Scanner(object): instr = [instr] result = [] + extended_arg = 0 for offset in self.op_range(start, end): + op = code[offset] + + if op == self.opc.EXTENDED_ARG: + arg = code2num(code, offset+1) | extended_arg + extended_arg = self.extended_arg_val(arg) + continue + if op in instr: if target is None: result.append(offset) else: - t = self.get_target(offset) + t = self.get_target(offset, extended_arg) if include_beyond_target and t >= target: result.append(offset) elif t == target: result.append(offset) + pass + pass + pass + extended_arg = 0 + pass + return result def op_range(self, start, end): diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 9c2d62b0..538666d6 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -25,9 +25,9 @@ from __future__ import print_function from collections import namedtuple from array import array -from uncompyle6.scanner import L65536 from xdis.code import iscode -from xdis.bytecode import op_has_argument, op_size +from xdis.bytecode import op_has_argument, op_size, instruction_size +from xdis.util import code2num from uncompyle6.scanner import Scanner @@ -193,7 +193,7 @@ class Scanner2(Scanner): oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == self.opc.EXTENDED_ARG: - extended_arg = oparg * L65536 + extended_arg += self.extended_arg_val(oparg) continue if op in self.opc.CONST_OPS: const = co.co_consts[oparg] @@ -485,7 +485,7 @@ class Scanner2(Scanner): elif op in self.setup_ops: count_SETUP_ += 1 - def detect_control_flow(self, offset, op): + def detect_control_flow(self, offset, op, extended_arg): """ Detect type of block structures and their boundaries to fix optimized jumps in python2.3+ @@ -509,14 +509,13 @@ class Scanner2(Scanner): parent = struct if op == self.opc.SETUP_LOOP: - # We categorize loop types: 'for', 'while', 'while 1' with # possibly suffixes '-loop' and '-else' # Try to find the jump_back instruction of the loop. # It could be a return instruction. - start = offset+3 - target = self.get_target(offset, op) + start += instruction_size(op, self.opc) + target = self.get_target(offset) + extended_arg end = self.restrict_to_parent(target, parent) self.setup_loop_targets[offset] = target self.setup_loops[target] = offset @@ -988,12 +987,18 @@ class Scanner2(Scanner): self.thens = {} # JUMP_IF's that separate the 'then' part of an 'if' targets = {} + extended_arg = 0 for offset in self.op_range(0, n): op = code[offset] + if op == self.opc.EXTENDED_ARG: + arg = code2num(code, offset+1) | extended_arg + extended_arg += self.extended_arg_val(arg) + continue + # Determine structures and fix jumps in Python versions # since 2.3 - self.detect_control_flow(offset, op) + self.detect_control_flow(offset, op, extended_arg) if op_has_argument(op, self.opc): label = self.fixed_jumps.get(offset) @@ -1043,7 +1048,9 @@ class Scanner2(Scanner): label = self.fixed_jumps[offset] targets[label] = targets.get(label, []) + [offset] pass - pass + + extended_arg = 0 + pass # for loop # DEBUG: if debug in ('both', 'after'): diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 2c052426..d4c66118 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -27,7 +27,9 @@ from array import array from uncompyle6.scanner import Scanner from xdis.code import iscode -from xdis.bytecode import Bytecode, op_has_argument, op_size +from xdis.bytecode import Bytecode, op_has_argument, instruction_size +from xdis.util import code2num + from uncompyle6.scanner import Token, parse_fn_counts import xdis @@ -139,13 +141,6 @@ class Scanner3(Scanner): # FIXME: remove the above in favor of: # self.varargs_ops = frozenset(self.opc.hasvargs) - def extended_arg_val(self, val): - if self.version < 3.6: - return val * (1<<16) - else: - return val * (1<<8) - - def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, @@ -218,6 +213,7 @@ class Scanner3(Scanner): # Get jump targets # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets(show_asm) + ## print("XXX2", jump_targets) last_op_was_break = False extended_arg = 0 @@ -378,7 +374,8 @@ class Scanner3(Scanner): # as CONTINUE, but that's okay since we add a grammar # rule for that. pattr = argval - target = self.get_target(inst.offset) + # FIXME: 0 isn't always correct + target = self.get_target(inst.offset, 0) if target <= inst.offset: next_opname = self.opname[self.code[inst.offset+3]] if (inst.offset in self.stmts and @@ -473,7 +470,7 @@ class Scanner3(Scanner): self.prev = self.prev_op = [0] for offset in self.op_range(0, codelen): op = code[offset] - for _ in range(op_size(op, self.opc)): + for _ in range(instruction_size(op, self.opc)): self.prev_op.append(offset) def find_jump_targets(self, debug): @@ -509,12 +506,18 @@ class Scanner3(Scanner): self.setup_loops = {} # setup_loop offset given target targets = {} + extended_arg = 0 for offset in self.op_range(0, n): op = code[offset] + if op == self.opc.EXTENDED_ARG: + arg = code2num(code, offset+1) | extended_arg + extended_arg = self.extended_arg_val(arg) + continue + # Determine structures and fix jumps in Python versions # since 2.3 - self.detect_control_flow(offset, targets) + self.detect_control_flow(offset, targets, extended_arg) has_arg = (op >= op3.HAVE_ARGUMENT) if has_arg: @@ -539,7 +542,10 @@ class Scanner3(Scanner): label = self.fixed_jumps[offset] targets[label] = targets.get(label, []) + [offset] pass - pass + + extended_arg = 0 + pass # for loop + # DEBUG: if debug in ('both', 'after'): import pprint as pp @@ -569,7 +575,7 @@ class Scanner3(Scanner): if elem != code[i]: match = False break - i += op_size(code[i], self.opc) + i += instruction_size(code[i], self.opc) if match is True: i = self.prev_op[i] @@ -595,7 +601,8 @@ class Scanner3(Scanner): and stmt_offset not in pass_stmts): # If absolute jump occurs in forward direction or it takes off from the # same line as previous statement, this is not a statement - target = self.get_target(stmt_offset) + # FIXME: 0 isn't always correct + target = self.get_target(stmt_offset, 0) if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: stmts.remove(stmt_offset) continue @@ -629,7 +636,7 @@ class Scanner3(Scanner): # Finish filling the list for last statement slist += [codelen] * (codelen-len(slist)) - def get_target(self, offset): + def get_target(self, offset, extended_arg): """ Get target offset for op located at given . """ @@ -646,10 +653,11 @@ class Scanner3(Scanner): pass pass target += rel_offset + target += extended_arg return target - def detect_control_flow(self, offset, targets): + def detect_control_flow(self, offset, targets, extended_arg): """ Detect structures and their boundaries to fix optimized jumps in python2.3+ @@ -681,23 +689,20 @@ class Scanner3(Scanner): # Try to find the jump_back instruction of the loop. # It could be a return instruction. - if self.version <= 3.5: - start = offset+3 - else: - start = offset+2 - target = self.get_target(offset) + start += instruction_size(op, self.opc) + target = self.get_target(offset, extended_arg) end = self.restrict_to_parent(target, parent) - self.setup_loop_targets[offset] = target self.setup_loops[target] = offset if target != end: self.fixed_jumps[offset] = end + (line_no, next_line_byte) = self.lines[offset] jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False) if jump_back: - jump_forward_offset = jump_back+3 + jump_forward_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) else: jump_forward_offset = None @@ -714,7 +719,7 @@ class Scanner3(Scanner): if not jump_back: return - jump_back += 2 + jump_back += 2 # FIXME ??? if_offset = None if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf: if_offset = self.prev[next_line_byte] @@ -724,20 +729,22 @@ class Scanner3(Scanner): else: loop_type = 'for' target = next_line_byte - end = jump_back + 3 + end = xdis.next_offset(code[jump_back], self.opc, jump_back) else: - if self.get_target(jump_back) >= next_line_byte: + if self.get_target(jump_back, 0) >= next_line_byte: jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) if end > jump_back+4 and self.is_jump_forward(end): if self.is_jump_forward(jump_back+4): - if self.get_target(jump_back+4) == self.get_target(end): + if self.get_target(jump_back+4, extended_arg) == self.get_target(end, extended_arg): self.fixed_jumps[offset] = jump_back+4 end = jump_back+4 elif target < offset: self.fixed_jumps[offset] = jump_back+4 end = jump_back+4 - target = self.get_target(jump_back) + # I think 0 right because jump_back has been adjusted for any EXTENDED_ARG + # it encounters + target = self.get_target(jump_back, 0) if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER): loop_type = 'for' @@ -747,23 +754,24 @@ class Scanner3(Scanner): if test == offset: loop_type = 'while 1' - elif self.code[test] in op3.hasjabs+op3.hasjrel: + elif self.code[test] in self.opc.JUMP_OPs: self.ignore_if.add(test) - test_target = self.get_target(test) + test_target = self.get_target(test, extended_arg) if test_target > (jump_back+3): jump_back = test_target self.not_continue.add(jump_back) self.loops.append(target) self.structs.append({'type': loop_type + '-loop', - 'start': target, - 'end': jump_back}) - if jump_back+3 != end: + 'start': target, + 'end': jump_back}) + after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) + if after_jump_offset != end: self.structs.append({'type': loop_type + '-else', - 'start': jump_back+3, - 'end': end}) + 'start': after_jump_offset, + 'end': end}) elif op in self.pop_jump_tf: - start = offset + op_size(op, self.opc) - target = self.get_target(offset) + start = offset + instruction_size(op, self.opc) + target = self.get_target(offset, extended_arg) rtarget = self.restrict_to_parent(target, parent) prev_op = self.prev_op @@ -806,12 +814,12 @@ class Scanner3(Scanner): if match: is_jump_forward = self.is_jump_forward(pre_rtarget) if (is_jump_forward and pre_rtarget not in self.stmts and - self.restrict_to_parent(self.get_target(pre_rtarget), parent) == rtarget): + self.restrict_to_parent(self.get_target(pre_rtarget, extended_arg), parent) == rtarget): if (code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and - target == self.get_target(prev_op[pre_rtarget]) and + target == self.get_target(prev_op[pre_rtarget], extended_arg) and (prev_op[pre_rtarget] not in self.stmts or - self.get_target(prev_op[pre_rtarget]) > prev_op[pre_rtarget]) and + self.get_target(prev_op[pre_rtarget], extended_arg) > prev_op[pre_rtarget]) and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], self.pop_jump_tf, target)))): pass elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE @@ -830,7 +838,7 @@ class Scanner3(Scanner): self.opc.POP_JUMP_IF_FALSE) last_jump_good = True for j in jump_ifs: - if target == self.get_target(j): + if target == self.get_target(j, extended_arg): if self.lines[j].next == j + 3 and last_jump_good: fix = j break @@ -846,7 +854,7 @@ class Scanner3(Scanner): next = self.next_stmt[offset] if prev_op[next] == offset: pass - elif self.is_jump_forward(next) and target == self.get_target(next): + elif self.is_jump_forward(next) and target == self.get_target(next, extended_arg): if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE: if (code[next] == self.opc.JUMP_FORWARD or target != rtarget @@ -855,7 +863,7 @@ class Scanner3(Scanner): self.fixed_jumps[offset] = prev_op[next] return elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and - self.get_target(target) == self.get_target(next)): + self.get_target(target, extended_arg) == self.get_target(next, extended_arg)): self.fixed_jumps[offset] = prev_op[next] return @@ -887,7 +895,7 @@ class Scanner3(Scanner): # like whether the target is "END_FINALLY" # or if the condition jump is to a forward location if self.is_jump_forward(pre_rtarget): - if_end = self.get_target(pre_rtarget) + if_end = self.get_target(pre_rtarget, 0) # If the jump target is back, we are looping if (if_end < pre_rtarget and @@ -914,7 +922,7 @@ class Scanner3(Scanner): 'start': rtarget, 'end': end}) self.else_start[rtarget] = end - elif self.is_jump_back(pre_rtarget): + elif self.is_jump_back(pre_rtarget, 0): if_end = rtarget self.structs.append({'type': 'if-then', 'start': start, @@ -941,9 +949,9 @@ class Scanner3(Scanner): # not from SETUP_EXCEPT next_op = rtarget if code[next_op] == self.opc.POP_BLOCK: - next_op += op_size(self.code[next_op], self.opc) + next_op += instruction_size(self.code[next_op], self.opc) if code[next_op] == self.opc.JUMP_ABSOLUTE: - next_op += op_size(self.code[next_op], self.opc) + next_op += instruction_size(self.code[next_op], self.opc) if next_op in targets: for try_op in targets[next_op]: come_from_op = code[try_op] @@ -962,12 +970,12 @@ class Scanner3(Scanner): self.fixed_jumps[offset] = rtarget elif op == self.opc.SETUP_EXCEPT: - target = self.get_target(offset) + target = self.get_target(offset, extended_arg) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op == self.opc.POP_EXCEPT: next_offset = xdis.next_offset(op, self.opc, offset) - target = self.get_target(next_offset) + target = self.get_target(next_offset, extended_arg) if target > next_offset: next_op = code[next_offset] if (self.opc.JUMP_ABSOLUTE == next_op and @@ -976,11 +984,11 @@ class Scanner3(Scanner): self.except_targets[target] = next_offset elif op == self.opc.SETUP_FINALLY: - target = self.get_target(offset) + target = self.get_target(offset, extended_arg) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op in self.jump_if_pop: - target = self.get_target(offset) + target = self.get_target(offset, extended_arg) if target > offset: unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target) if unop_target and code[unop_target+3] != self.opc.ROT_TWO: @@ -1004,7 +1012,7 @@ class Scanner3(Scanner): # If we have: # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x: # then RETURN_VALUE is not RETURN_END_IF - rtarget = self.get_target(offset) + rtarget = self.get_target(offset, extended_arg) rtarget_prev = self.prev[rtarget] if (code[rtarget_prev] == self.opc.RETURN_VALUE and rtarget_prev in self.return_end_ifs): @@ -1017,7 +1025,7 @@ class Scanner3(Scanner): pass return - def is_jump_back(self, offset): + def is_jump_back(self, offset, extended_arg): """ Return True if the code at offset is some sort of jump back. That is, it is ether "JUMP_FORWARD" or an absolute jump that @@ -1025,7 +1033,7 @@ class Scanner3(Scanner): """ if self.code[offset] != self.opc.JUMP_ABSOLUTE: return False - return offset > self.get_target(offset) + return offset > self.get_target(offset, extended_arg) def next_except_jump(self, start): """ diff --git a/uncompyle6/scanners/scanner30.py b/uncompyle6/scanners/scanner30.py index 94c5605c..4193b5cd 100644 --- a/uncompyle6/scanners/scanner30.py +++ b/uncompyle6/scanners/scanner30.py @@ -10,9 +10,8 @@ from __future__ import print_function # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_30 as opc -from xdis.bytecode import op_size - -JUMP_OPS = opc.JUMP_OPS +from xdis.bytecode import instruction_size, next_offset +import xdis JUMP_TF = frozenset([opc.JUMP_IF_FALSE, opc.JUMP_IF_TRUE]) @@ -24,7 +23,7 @@ class Scanner30(Scanner3): return pass - def detect_control_flow(self, offset, targets): + def detect_control_flow(self, offset, targets, extended_arg): """ Detect structures and their boundaries to fix optimized jumps Python 3.0 is more like Python 2.6 than it is Python 3.x. @@ -55,8 +54,8 @@ class Scanner30(Scanner3): # Try to find the jump_back instruction of the loop. # It could be a return instruction. - start = offset+3 - target = self.get_target(offset) + start += instruction_size(op, self.opc) + target = self.get_target(offset, extended_arg) end = self.restrict_to_parent(target, parent) self.setup_loop_targets[offset] = target self.setup_loops[target] = offset @@ -69,7 +68,7 @@ class Scanner30(Scanner3): next_line_byte, False) if jump_back: - jump_forward_offset = jump_back+3 + jump_forward_offset = next_offset(code[jump_back], self.opc, jump_back) else: jump_forward_offset = None @@ -99,7 +98,7 @@ class Scanner30(Scanner3): target = next_line_byte end = jump_back + 3 else: - if self.get_target(jump_back) >= next_line_byte: + if self.get_target(jump_back, 0) >= next_line_byte: jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) if end > jump_back+4 and self.is_jump_forward(end): if self.is_jump_forward(jump_back+4): @@ -110,7 +109,7 @@ class Scanner30(Scanner3): self.fixed_jumps[offset] = jump_back+4 end = jump_back+4 - target = self.get_target(jump_back) + target = self.get_target(jump_back, 0) if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER): loop_type = 'for' @@ -120,7 +119,7 @@ class Scanner30(Scanner3): if test == offset: loop_type = 'while 1' - elif self.code[test] in opc.JUMP_OPs: + elif self.code[test] in self.opc.JUMP_OPs: self.ignore_if.add(test) test_target = self.get_target(test) if test_target > (jump_back+3): @@ -128,15 +127,16 @@ class Scanner30(Scanner3): self.not_continue.add(jump_back) self.loops.append(target) self.structs.append({'type': loop_type + '-loop', - 'start': target, - 'end': jump_back}) - if jump_back+3 != end: + 'start': target, + 'end': jump_back}) + after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) + if after_jump_offset != end: self.structs.append({'type': loop_type + '-else', - 'start': jump_back+3, - 'end': end}) - elif op in JUMP_TF: - start = offset + op_size(op, self.opc) - target = self.get_target(offset) + 'start': after_jump_offset, + 'end': end}) + elif op in self.pop_jump_tf: + start = offset + instruction_size(op, self.opc) + target = self.get_target(offset, extended_arg) rtarget = self.restrict_to_parent(target, parent) prev_op = self.prev_op @@ -256,7 +256,7 @@ class Scanner30(Scanner3): # like whether the target is "END_FINALLY" # or if the condition jump is to a forward location if self.is_jump_forward(pre_rtarget): - if_end = self.get_target(pre_rtarget) + if_end = self.get_target(pre_rtarget, 0) # If the jump target is back, we are looping if (if_end < pre_rtarget and @@ -280,7 +280,7 @@ class Scanner30(Scanner3): # 'start': rtarget, # 'end': end}) # self.else_start[rtarget] = end - elif self.is_jump_back(pre_rtarget): + elif self.is_jump_back(pre_rtarget, 0): if_end = rtarget self.structs.append({'type': 'if-then', 'start': start, @@ -307,9 +307,9 @@ class Scanner30(Scanner3): # not from SETUP_EXCEPT next_op = rtarget if code[next_op] == self.opc.POP_BLOCK: - next_op += op_size(self.code[next_op], self.opc) + next_op += instruction_size(self.code[next_op], self.opc) if code[next_op] == self.opc.JUMP_ABSOLUTE: - next_op += op_size(self.code[next_op], self.opc) + next_op += instruction_size(self.code[next_op], self.opc) if next_op in targets: for try_op in targets[next_op]: come_from_op = code[try_op] @@ -329,7 +329,7 @@ class Scanner30(Scanner3): end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op == self.opc.SETUP_FINALLY: - target = self.get_target(offset) + target = self.get_target(offset, extended_arg) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op in self.jump_if_pop: From 95268cb14e1436f2b6661380dca973b24010f31c Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 29 Oct 2017 21:34:34 -0400 Subject: [PATCH 08/41] In verify, JUMP_BACK is the same as CONTINUE... at least for now. See FIXME in verify --- uncompyle6/verify.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index f992164d..248653fc 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -308,6 +308,11 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, and int(tokens1[i1].offset) not in targets1: i1 += 1 continue + elif tokens1[i1].kind == 'JUMP_BACK' and tokens2[i2].kind == 'CONTINUE': + # FIXME: should make sure that offset is inside loop, not outside of it + i1 += 2 + i2 += 2 + continue elif tokens1[i1].kind == 'JUMP_FORWARD' and tokens2[i2].kind == 'JUMP_BACK' \ and tokens1[i1+1].kind == 'JUMP_BACK' and tokens2[i2+1].kind == 'JUMP_BACK' \ and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3: From b83d6c64edd7346df737a48f87ee41b02c31d2aa Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 29 Oct 2017 23:52:58 -0400 Subject: [PATCH 09/41] Python 3.6 control flow bug... Much more is needed, but it's a start --- test/bytecode_3.6/10_extended_arg_loop.pyc | Bin 0 -> 964 bytes .../bug36/10_extended_arg_loop.py | 49 ++++++++++++++++++ uncompyle6/parsers/parse36.py | 5 ++ uncompyle6/scanners/scanner3.py | 16 ++++-- 4 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 test/bytecode_3.6/10_extended_arg_loop.pyc create mode 100644 test/simple_source/bug36/10_extended_arg_loop.py diff --git a/test/bytecode_3.6/10_extended_arg_loop.pyc b/test/bytecode_3.6/10_extended_arg_loop.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac0dfa517a34e6a041ef6156bfa743374e38fbb0 GIT binary patch literal 964 zcmYjQ&rcIU6n-;1U6!S-1qlWdlf7ut7zHFT#t>p+IG7kDKmzQ+&C;2+Te?4(nbnrG zCnS3G-|%1BtEYN09z1Z-H(SKc>_YJ~udh;%uOX;SB8jI|ni zgj-uM_%%EpA)|aTG6p?sgzRH$WU04%kSueC7z--K_w3%>B@p1{s>B>#U4`B}pSvUk zX7?Py`F#f3J#*z3f;9y8;ADy|Aog>Lap#tu${)IlE9|ZzsPXxdt!~t;z?gBjcgJM8 z&Ebl!0%|lW99}d1N0kw~hZuo`-u7vnxnGI2XS*Mxf+OGyWe31}tMB@BW8J6E)+JsN zT)4Zbm&vpv!`OEZg^Z&l)PhgS2)nEVy}Tygq=M}ye{e_>H5Us@4OPLiPtjidmf z0Lwo3uFR7_wPciOpS4mc{Dv5aR<5HIaUJSh`SV!S5=y1ABrsvnO5-?8xbj()=yqYn zs#DCpdAB2FDvRoVp6DnR(-rGSN*BzuE1Y)J7fBe4g7wlUDcpl09xL7%M7n9?FL>JV zo&Vb6FiNtaDe;$*RK{UH&^K3!^88AMm$Xqbt8X9|@wnHIRTO7^5vVkmEwOf*cQ&7| zZL9}kphdz3H>VEzX_`IFh6OVXs#vDB(E(+|Msq18l(;wI offset: - self.fixed_jumps[offset] = rtarget + if self.version >= 3.6: + if target > offset: + self.fixed_jumps[offset] = target + pass + else: + # FIXME: This is probably a bug in < 3.6 and we should + # instead use the above code. But until we smoke things + # out we'll stick with it. + if rtarget > offset: + self.fixed_jumps[offset] = rtarget elif op == self.opc.SETUP_EXCEPT: target = self.get_target(offset, extended_arg) From 54332ddffb0cb4cd1cb83aa2dc5781a7f7588ccc Mon Sep 17 00:00:00 2001 From: Mike Mason Date: Fri, 3 Nov 2017 09:05:52 -0500 Subject: [PATCH 10/41] Corrected python3 import from queue --- uncompyle6/bin/uncompile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index 2f861de6..45828e43 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -175,7 +175,7 @@ def main_bin(): try: from Queue import Empty except ImportError: - from Queue import Empty + from queue import Empty fqueue = Queue(len(files)+numproc) for f in files: From 6746e5167dbb6a324d2eb012669efb1daf272e33 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 4 Nov 2017 11:13:55 -0400 Subject: [PATCH 11/41] Add Python 3.6.3 scanner lookup --- uncompyle6/scanner.py | 1 + uncompyle6/semantics/fragments.py | 1 + 2 files changed, 2 insertions(+) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 1347762d..acbe7ed9 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -320,5 +320,6 @@ if __name__ == "__main__": import inspect, uncompyle6 co = inspect.currentframe().f_code scanner = get_scanner('2.7.13', True) + scanner = get_scanner(sys.version[:5], False) scanner = get_scanner(uncompyle6.PYTHON_VERSION, IS_PYPY, True) tokens, customize = scanner.ingest(co, {}) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 2010f4f8..1d6fe1f9 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -134,6 +134,7 @@ class FragmentsWalker(pysource.SourceWalker, object): # FIXME: is there a better way? global MAP_DIRECT_FRAGMENT MAP_DIRECT_FRAGMENT = dict(TABLE_DIRECT, **TABLE_DIRECT_FRAGMENT), + return f = property(lambda s: s.params['f'], lambda s, x: s.params.__setitem__('f', x), From 4f0a668b7c7572e926186ce440d9f3ca2ee4a338 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 4 Nov 2017 12:29:27 -0400 Subject: [PATCH 12/41] Add flag to tolerate deparse errors... and keep going. The fragment parser should ignore errors in nested function definitions --- uncompyle6/semantics/fragments.py | 5 +++-- uncompyle6/semantics/make_function.py | 16 ++++++++++------ uncompyle6/semantics/pysource.py | 6 +++++- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 1d6fe1f9..7c04618c 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -114,11 +114,12 @@ class FragmentsWalker(pysource.SourceWalker, object): def __init__(self, version, scanner, showast=False, debug_parser=PARSER_DEFAULT_DEBUG, - compile_mode='exec', is_pypy=False): + compile_mode='exec', is_pypy=False, tolerate_errors=True): pysource.SourceWalker.__init__(self, version=version, out=StringIO(), scanner=scanner, showast=showast, debug_parser=debug_parser, - compile_mode=compile_mode, is_pypy=is_pypy) + compile_mode=compile_mode, is_pypy=is_pypy, + tolerate_errors=tolerate_errors) # hide_internal suppresses displaying the additional instructions that sometimes # exist in code but but were not written in the source code. diff --git a/uncompyle6/semantics/make_function.py b/uncompyle6/semantics/make_function.py index 99e4a9ec..bcdb798a 100644 --- a/uncompyle6/semantics/make_function.py +++ b/uncompyle6/semantics/make_function.py @@ -8,6 +8,7 @@ from uncompyle6.scanner import Code from uncompyle6.parsers.astnode import AST from uncompyle6 import PYTHON3 from uncompyle6.semantics.parser_error import ParserError +from uncompyle6.parser import ParserError as ParserError2 from uncompyle6.semantics.helper import print_docstring if PYTHON3: @@ -128,9 +129,10 @@ def make_function3_annotate(self, node, isLambda, nested=1, code._customize, isLambda = isLambda, noneInNames = ('None' in code.co_names)) - except ParserError as p: + except (ParserError, ParserError2) as p: self.write(str(p)) - self.ERROR = p + if not self.tolerate_errors: + self.ERROR = p return kw_pairs = args_node.attr[1] @@ -356,9 +358,10 @@ def make_function2(self, node, isLambda, nested=1, codeNode=None): code._customize, isLambda = isLambda, noneInNames = ('None' in code.co_names)) - except ParserError as p: + except (ParserError, ParserError2) as p: self.write(str(p)) - self.ERROR = p + if not self.tolerate_errors: + self.ERROR = p return kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0 @@ -531,9 +534,10 @@ def make_function3(self, node, isLambda, nested=1, codeNode=None): code._customize, isLambda = isLambda, noneInNames = ('None' in code.co_names)) - except ParserError as p: + except (ParserError, ParserError2) as p: self.write(str(p)) - self.ERROR = p + if not self.tolerate_errors: + self.ERROR = p return kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0 diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 245f8ca1..3a99af93 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -166,7 +166,7 @@ class SourceWalker(GenericASTTraversal, object): def __init__(self, version, out, scanner, showast=False, debug_parser=PARSER_DEFAULT_DEBUG, compile_mode='exec', is_pypy=False, - linestarts={}): + linestarts={}, tolerate_errors=False): """version is the Python version (a float) of the Python dialect of both the AST and language we should produce. @@ -214,6 +214,10 @@ class SourceWalker(GenericASTTraversal, object): self.line_number = 0 self.ast_errors = [] + # Sometimes we may want to continue decompiling when there are errors + # and sometimes not + self.tolerate_errors = tolerate_errors + # hide_internal suppresses displaying the additional instructions that sometimes # exist in code but but were not written in the source code. # An example is: From dea17cd7f18f8e0916b085c53ea443a8c8a6223c Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 00:38:22 -0500 Subject: [PATCH 13/41] xdis _disassemble->disassemble --- uncompyle6/scanners/scanner2.py | 2 +- uncompyle6/scanners/scanner26.py | 2 +- uncompyle6/scanners/scanner3.py | 2 +- uncompyle6/scanners/scanner36.py | 341 ++++++++++++++++++++++++++++++- 4 files changed, 343 insertions(+), 4 deletions(-) diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 538666d6..a006088e 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -91,7 +91,7 @@ class Scanner2(Scanner): from xdis.bytecode import Bytecode bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): - print(instr._disassemble()) + print(instr.disassemble()) # list of tokens/instructions tokens = [] diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 4936273c..b2d49b02 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -93,7 +93,7 @@ class Scanner26(scan.Scanner2): from xdis.bytecode import Bytecode bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): - print(instr._disassemble()) + print(instr.disassemble()) # Container for tokens tokens = [] diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index fb531367..e511a925 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -162,7 +162,7 @@ class Scanner3(Scanner): if show_asm in ('both', 'before'): bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): - print(instr._disassemble()) + print(instr.disassemble()) # list of tokens/instructions tokens = [] diff --git a/uncompyle6/scanners/scanner36.py b/uncompyle6/scanners/scanner36.py index 08746908..d3b5f862 100644 --- a/uncompyle6/scanners/scanner36.py +++ b/uncompyle6/scanners/scanner36.py @@ -13,6 +13,12 @@ from __future__ import print_function from uncompyle6.scanners.scanner3 import Scanner3 +from uncompyle6.scanner import Token, parse_fn_counts +from xdis.code import iscode +from xdis.bytecode import Bytecode +import xdis +from array import array + # bytecode verification, verify(), uses JUMP_OPS from here from xdis.opcodes import opcode_36 as opc JUMP_OPS = opc.JUMP_OPS @@ -24,7 +30,7 @@ class Scanner36(Scanner3): return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm) + tokens, customize = self.ingest_internal(co, classname, code_objects, show_asm) for t in tokens: # The lowest bit of flags indicates whether the # var-keyword argument is placed at the top of the stack @@ -40,6 +46,339 @@ class Scanner36(Scanner3): pass return tokens, customize + def ingest_internal(self, co, classname=None, code_objects={}, show_asm=None): + """ + Pick out tokens from an uncompyle6 code object, and transform them, + returning a list of uncompyle6 'Token's. + + The transformations are made to assist the deparsing grammar. + Specificially: + - various types of LOAD_CONST's are categorized in terms of what they load + - COME_FROM instructions are added to assist parsing control structures + - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + + Also, when we encounter certain tokens, we add them to a set which will cause custom + grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST + cause specific rules for the specific number of arguments they take. + """ + + # FIXME: remove this when all subsidiary functions have been removed. + # We should be able to get everything from the self.insts list. + self.code = array('B', co.co_code) + + show_asm = self.show_asm if not show_asm else show_asm + # show_asm = 'both' + if show_asm in ('both', 'before'): + bytecode = Bytecode(co, self.opc) + for instr in bytecode.get_instructions(co): + print(instr.disassemble()) + + # list of tokens/instructions + tokens = [] + + # "customize" is a dict whose keys are nonterminals + # and the value is the argument stack entries for that + # nonterminal. The count is a little hoaky. It is mostly + # not used, but sometimes it is. + customize = {} + if self.is_pypy: + customize['PyPy'] = 0 + + self.build_lines_data(co) + self.build_prev_op() + + bytecode = Bytecode(co, self.opc) + + # FIXME: put as its own method? + # Scan for assertions. Later we will + # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. + # 'LOAD_ASSERT' is used in assert statements. + self.load_asserts = set() + self.insts = list(bytecode) + n = len(self.insts) + for i, inst in enumerate(self.insts): + # We need to detect the difference between + # "raise AssertionError" and "assert" + # If we have a JUMP_FORWARD after the + # RAISE_VARARGS then we have a "raise" statement + # else we have an "assert" statement. + if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: + next_inst = self.insts[i+1] + if (next_inst.opname == 'LOAD_GLOBAL' and + next_inst.argval == 'AssertionError'): + if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')): + self.load_asserts.add(next_inst.offset) + pass + pass + + # Get jump targets + # Format: {target offset: [jump offsets]} + jump_targets = self.find_jump_targets(show_asm) + # print("XXX2", jump_targets) + last_op_was_break = False + + for i, inst in enumerate(bytecode): + + argval = inst.argval + op = inst.opcode + if op == self.opc.EXTENDED_ARG: + continue + + if inst.offset in jump_targets: + jump_idx = 0 + # We want to process COME_FROMs to the same offset to be in *descending* + # offset order so we have the larger range or biggest instruction interval + # last. (I think they are sorted in increasing order, but for safety + # we sort them). That way, specific COME_FROM tags will match up + # properly. For example, a "loop" with an "if" nested in it should have the + # "loop" tag last so the grammar rule matches that properly. + for jump_offset in sorted(jump_targets[inst.offset], reverse=True): + come_from_name = 'COME_FROM' + opname = self.opname_for_offset(jump_offset) + if opname.startswith('SETUP_'): + come_from_type = opname[len('SETUP_'):] + come_from_name = 'COME_FROM_%s' % come_from_type + pass + elif inst.offset in self.except_targets: + come_from_name = 'COME_FROM_EXCEPT_CLAUSE' + tokens.append(Token(come_from_name, + None, repr(jump_offset), + offset='%s_%s' % (inst.offset, jump_idx), + has_arg = True, opc=self.opc)) + jump_idx += 1 + pass + pass + elif inst.offset in self.else_start: + end_offset = self.else_start[inst.offset] + tokens.append(Token('ELSE', + None, repr(end_offset), + offset='%s' % (inst.offset), + has_arg = True, opc=self.opc)) + + pass + + pattr = inst.argrepr + opname = inst.opname + + if opname in ['LOAD_CONST']: + const = argval + if iscode(const): + if const.co_name == '': + opname = 'LOAD_LAMBDA' + elif const.co_name == '': + opname = 'LOAD_GENEXPR' + elif const.co_name == '': + opname = 'LOAD_DICTCOMP' + elif const.co_name == '': + opname = 'LOAD_SETCOMP' + elif const.co_name == '': + opname = 'LOAD_LISTCOMP' + # verify() uses 'pattr' for comparison, since 'attr' + # now holds Code(const) and thus can not be used + # for comparison (todo: think about changing this) + # pattr = 'code_object @ 0x%x %s->%s' %\ + # (id(const), const.co_filename, const.co_name) + pattr = '' + else: + pattr = const + pass + elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'): + if self.version >= 3.6: + # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION' + flags = argval + opname = 'MAKE_FUNCTION_%d' % (flags) + attr = [] + for flag in self.MAKE_FUNCTION_FLAGS: + bit = flags & 1 + if bit: + if pattr: + pattr += ", " + flag + else: + pattr += flag + attr.append(bit) + flags >>= 1 + attr = attr[:4] # remove last value: attr[5] == False + else: + pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval) + pattr = ("%d positional, %d keyword pair, %d annotated" % + (pos_args, name_pair_args, annotate_args)) + if name_pair_args > 0: + opname = '%s_N%d' % (opname, name_pair_args) + pass + if annotate_args > 0: + opname = '%s_A_%d' % (opname, annotate_args) + pass + opname = '%s_%d' % (opname, pos_args) + attr = (pos_args, name_pair_args, annotate_args) + tokens.append( + Token( + opname = opname, + attr = attr, + pattr = pattr, + offset = inst.offset, + linestart = inst.starts_line, + op = op, + has_arg = inst.has_arg, + opc = self.opc + ) + ) + continue + elif op in self.varargs_ops: + pos_args = argval + if self.is_pypy and not pos_args and opname == 'BUILD_MAP': + opname = 'BUILD_MAP_n' + else: + opname = '%s_%d' % (opname, pos_args) + elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'): + # The value in the dict is in special cases in semantic actions, such + # as CALL_FUNCTION. The value is not used in these cases, so we put + # in arbitrary value 0. + customize[opname] = 0 + elif opname == 'UNPACK_EX': + # FIXME: try with scanner and parser by + # changing argval + before_args = argval & 0xFF + after_args = (argval >> 8) & 0xff + pattr = "%d before vararg, %d after" % (before_args, after_args) + argval = (before_args, after_args) + opname = '%s_%d+%d' % (opname, before_args, after_args) + + elif op == self.opc.JUMP_ABSOLUTE: + # Further classify JUMP_ABSOLUTE into backward jumps + # which are used in loops, and "CONTINUE" jumps which + # may appear in a "continue" statement. The loop-type + # and continue-type jumps will help us classify loop + # boundaries The continue-type jumps help us get + # "continue" statements with would otherwise be turned + # into a "pass" statement because JUMPs are sometimes + # ignored in rules as just boundary overhead. In + # comprehensions we might sometimes classify JUMP_BACK + # as CONTINUE, but that's okay since we add a grammar + # rule for that. + pattr = argval + # FIXME: 0 isn't always correct + target = self.get_target(inst.offset, 0) + if target <= inst.offset: + next_opname = self.opname[self.code[inst.offset+3]] + if (inst.offset in self.stmts and + (self.version != 3.0 or (hasattr(inst, 'linestart'))) and + (next_opname not in ('END_FINALLY', 'POP_BLOCK', + # Python 3.0 only uses POP_TOP + 'POP_TOP'))): + opname = 'CONTINUE' + else: + opname = 'JUMP_BACK' + # FIXME: this is a hack to catch stuff like: + # if x: continue + # the "continue" is not on a new line. + # There are other situations where we don't catch + # CONTINUE as well. + if tokens[-1].kind == 'JUMP_BACK' and tokens[-1].attr <= argval: + if tokens[-2].kind == 'BREAK_LOOP': + del tokens[-1] + else: + # intern is used because we are changing the *previous* token + tokens[-1].kind = intern('CONTINUE') + if last_op_was_break and opname == 'CONTINUE': + last_op_was_break = False + continue + elif op == self.opc.RETURN_VALUE: + if inst.offset in self.return_end_ifs: + opname = 'RETURN_END_IF' + elif inst.offset in self.load_asserts: + opname = 'LOAD_ASSERT' + + last_op_was_break = opname == 'BREAK_LOOP' + tokens.append( + Token( + opname = opname, + attr = argval, + pattr = pattr, + offset = inst.offset, + linestart = inst.starts_line, + op = op, + has_arg = inst.has_arg, + opc = self.opc + ) + ) + pass + + if show_asm in ('both', 'after'): + for t in tokens: + print(t) + print() + return tokens, customize + + def find_jump_targets(self, debug): + """ + Detect all offsets in a byte code which are jump targets + where we might insert a COME_FROM instruction. + + Return the list of offsets. + + Return the list of offsets. An instruction can be jumped + to in from multiple instructions. + """ + code = self.code + n = len(code) + self.structs = [{'type': 'root', + 'start': 0, + 'end': n-1}] + + # All loop entry points + self.loops = [] + + # Map fixed jumps to their real destination + self.fixed_jumps = {} + self.except_targets = {} + self.ignore_if = set() + self.build_statement_indices() + self.else_start = {} + + # Containers filled by detect_control_flow() + self.not_continue = set() + self.return_end_ifs = set() + self.setup_loop_targets = {} # target given setup_loop offset + self.setup_loops = {} # setup_loop offset given target + + targets = {} + extended_arg = 0 + for i, inst in enumerate(self.insts): + offset = inst.offset + op = inst.opcode + + self.detect_control_flow(offset, targets, extended_arg) + + if inst.has_arg: + label = self.fixed_jumps.get(offset) + oparg = inst.arg + next_offset = xdis.next_offset(op, self.opc, offset) + + if label is None: + if op in self.opc.hasjrel and op != self.opc.FOR_ITER: + label = next_offset + oparg + elif op in self.opc.hasjabs: + if op in self.jump_if_pop: + if oparg > offset: + label = oparg + + if label is not None and label != -1: + targets[label] = targets.get(label, []) + [offset] + elif op == self.opc.END_FINALLY and offset in self.fixed_jumps: + label = self.fixed_jumps[offset] + targets[label] = targets.get(label, []) + [offset] + pass + + extended_arg = 0 + pass # for loop + + # DEBUG: + if debug in ('both', 'after'): + import pprint as pp + pp.pprint(self.structs) + + return targets + pass if __name__ == "__main__": From 6dbdaedf7abda769daef2a19a0795c177d7b59a4 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 00:45:04 -0500 Subject: [PATCH 14/41] Revert change that should have been in a branch --- uncompyle6/scanners/scanner36.py | 341 +------------------------------ 1 file changed, 1 insertion(+), 340 deletions(-) diff --git a/uncompyle6/scanners/scanner36.py b/uncompyle6/scanners/scanner36.py index d3b5f862..08746908 100644 --- a/uncompyle6/scanners/scanner36.py +++ b/uncompyle6/scanners/scanner36.py @@ -13,12 +13,6 @@ from __future__ import print_function from uncompyle6.scanners.scanner3 import Scanner3 -from uncompyle6.scanner import Token, parse_fn_counts -from xdis.code import iscode -from xdis.bytecode import Bytecode -import xdis -from array import array - # bytecode verification, verify(), uses JUMP_OPS from here from xdis.opcodes import opcode_36 as opc JUMP_OPS = opc.JUMP_OPS @@ -30,7 +24,7 @@ class Scanner36(Scanner3): return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - tokens, customize = self.ingest_internal(co, classname, code_objects, show_asm) + tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm) for t in tokens: # The lowest bit of flags indicates whether the # var-keyword argument is placed at the top of the stack @@ -46,339 +40,6 @@ class Scanner36(Scanner3): pass return tokens, customize - def ingest_internal(self, co, classname=None, code_objects={}, show_asm=None): - """ - Pick out tokens from an uncompyle6 code object, and transform them, - returning a list of uncompyle6 'Token's. - - The transformations are made to assist the deparsing grammar. - Specificially: - - various types of LOAD_CONST's are categorized in terms of what they load - - COME_FROM instructions are added to assist parsing control structures - - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. - """ - - # FIXME: remove this when all subsidiary functions have been removed. - # We should be able to get everything from the self.insts list. - self.code = array('B', co.co_code) - - show_asm = self.show_asm if not show_asm else show_asm - # show_asm = 'both' - if show_asm in ('both', 'before'): - bytecode = Bytecode(co, self.opc) - for instr in bytecode.get_instructions(co): - print(instr.disassemble()) - - # list of tokens/instructions - tokens = [] - - # "customize" is a dict whose keys are nonterminals - # and the value is the argument stack entries for that - # nonterminal. The count is a little hoaky. It is mostly - # not used, but sometimes it is. - customize = {} - if self.is_pypy: - customize['PyPy'] = 0 - - self.build_lines_data(co) - self.build_prev_op() - - bytecode = Bytecode(co, self.opc) - - # FIXME: put as its own method? - # Scan for assertions. Later we will - # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. - # 'LOAD_ASSERT' is used in assert statements. - self.load_asserts = set() - self.insts = list(bytecode) - n = len(self.insts) - for i, inst in enumerate(self.insts): - # We need to detect the difference between - # "raise AssertionError" and "assert" - # If we have a JUMP_FORWARD after the - # RAISE_VARARGS then we have a "raise" statement - # else we have an "assert" statement. - if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: - next_inst = self.insts[i+1] - if (next_inst.opname == 'LOAD_GLOBAL' and - next_inst.argval == 'AssertionError'): - if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')): - self.load_asserts.add(next_inst.offset) - pass - pass - - # Get jump targets - # Format: {target offset: [jump offsets]} - jump_targets = self.find_jump_targets(show_asm) - # print("XXX2", jump_targets) - last_op_was_break = False - - for i, inst in enumerate(bytecode): - - argval = inst.argval - op = inst.opcode - if op == self.opc.EXTENDED_ARG: - continue - - if inst.offset in jump_targets: - jump_idx = 0 - # We want to process COME_FROMs to the same offset to be in *descending* - # offset order so we have the larger range or biggest instruction interval - # last. (I think they are sorted in increasing order, but for safety - # we sort them). That way, specific COME_FROM tags will match up - # properly. For example, a "loop" with an "if" nested in it should have the - # "loop" tag last so the grammar rule matches that properly. - for jump_offset in sorted(jump_targets[inst.offset], reverse=True): - come_from_name = 'COME_FROM' - opname = self.opname_for_offset(jump_offset) - if opname.startswith('SETUP_'): - come_from_type = opname[len('SETUP_'):] - come_from_name = 'COME_FROM_%s' % come_from_type - pass - elif inst.offset in self.except_targets: - come_from_name = 'COME_FROM_EXCEPT_CLAUSE' - tokens.append(Token(come_from_name, - None, repr(jump_offset), - offset='%s_%s' % (inst.offset, jump_idx), - has_arg = True, opc=self.opc)) - jump_idx += 1 - pass - pass - elif inst.offset in self.else_start: - end_offset = self.else_start[inst.offset] - tokens.append(Token('ELSE', - None, repr(end_offset), - offset='%s' % (inst.offset), - has_arg = True, opc=self.opc)) - - pass - - pattr = inst.argrepr - opname = inst.opname - - if opname in ['LOAD_CONST']: - const = argval - if iscode(const): - if const.co_name == '': - opname = 'LOAD_LAMBDA' - elif const.co_name == '': - opname = 'LOAD_GENEXPR' - elif const.co_name == '': - opname = 'LOAD_DICTCOMP' - elif const.co_name == '': - opname = 'LOAD_SETCOMP' - elif const.co_name == '': - opname = 'LOAD_LISTCOMP' - # verify() uses 'pattr' for comparison, since 'attr' - # now holds Code(const) and thus can not be used - # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = '' - else: - pattr = const - pass - elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'): - if self.version >= 3.6: - # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION' - flags = argval - opname = 'MAKE_FUNCTION_%d' % (flags) - attr = [] - for flag in self.MAKE_FUNCTION_FLAGS: - bit = flags & 1 - if bit: - if pattr: - pattr += ", " + flag - else: - pattr += flag - attr.append(bit) - flags >>= 1 - attr = attr[:4] # remove last value: attr[5] == False - else: - pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval) - pattr = ("%d positional, %d keyword pair, %d annotated" % - (pos_args, name_pair_args, annotate_args)) - if name_pair_args > 0: - opname = '%s_N%d' % (opname, name_pair_args) - pass - if annotate_args > 0: - opname = '%s_A_%d' % (opname, annotate_args) - pass - opname = '%s_%d' % (opname, pos_args) - attr = (pos_args, name_pair_args, annotate_args) - tokens.append( - Token( - opname = opname, - attr = attr, - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line, - op = op, - has_arg = inst.has_arg, - opc = self.opc - ) - ) - continue - elif op in self.varargs_ops: - pos_args = argval - if self.is_pypy and not pos_args and opname == 'BUILD_MAP': - opname = 'BUILD_MAP_n' - else: - opname = '%s_%d' % (opname, pos_args) - elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'): - # The value in the dict is in special cases in semantic actions, such - # as CALL_FUNCTION. The value is not used in these cases, so we put - # in arbitrary value 0. - customize[opname] = 0 - elif opname == 'UNPACK_EX': - # FIXME: try with scanner and parser by - # changing argval - before_args = argval & 0xFF - after_args = (argval >> 8) & 0xff - pattr = "%d before vararg, %d after" % (before_args, after_args) - argval = (before_args, after_args) - opname = '%s_%d+%d' % (opname, before_args, after_args) - - elif op == self.opc.JUMP_ABSOLUTE: - # Further classify JUMP_ABSOLUTE into backward jumps - # which are used in loops, and "CONTINUE" jumps which - # may appear in a "continue" statement. The loop-type - # and continue-type jumps will help us classify loop - # boundaries The continue-type jumps help us get - # "continue" statements with would otherwise be turned - # into a "pass" statement because JUMPs are sometimes - # ignored in rules as just boundary overhead. In - # comprehensions we might sometimes classify JUMP_BACK - # as CONTINUE, but that's okay since we add a grammar - # rule for that. - pattr = argval - # FIXME: 0 isn't always correct - target = self.get_target(inst.offset, 0) - if target <= inst.offset: - next_opname = self.opname[self.code[inst.offset+3]] - if (inst.offset in self.stmts and - (self.version != 3.0 or (hasattr(inst, 'linestart'))) and - (next_opname not in ('END_FINALLY', 'POP_BLOCK', - # Python 3.0 only uses POP_TOP - 'POP_TOP'))): - opname = 'CONTINUE' - else: - opname = 'JUMP_BACK' - # FIXME: this is a hack to catch stuff like: - # if x: continue - # the "continue" is not on a new line. - # There are other situations where we don't catch - # CONTINUE as well. - if tokens[-1].kind == 'JUMP_BACK' and tokens[-1].attr <= argval: - if tokens[-2].kind == 'BREAK_LOOP': - del tokens[-1] - else: - # intern is used because we are changing the *previous* token - tokens[-1].kind = intern('CONTINUE') - if last_op_was_break and opname == 'CONTINUE': - last_op_was_break = False - continue - elif op == self.opc.RETURN_VALUE: - if inst.offset in self.return_end_ifs: - opname = 'RETURN_END_IF' - elif inst.offset in self.load_asserts: - opname = 'LOAD_ASSERT' - - last_op_was_break = opname == 'BREAK_LOOP' - tokens.append( - Token( - opname = opname, - attr = argval, - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line, - op = op, - has_arg = inst.has_arg, - opc = self.opc - ) - ) - pass - - if show_asm in ('both', 'after'): - for t in tokens: - print(t) - print() - return tokens, customize - - def find_jump_targets(self, debug): - """ - Detect all offsets in a byte code which are jump targets - where we might insert a COME_FROM instruction. - - Return the list of offsets. - - Return the list of offsets. An instruction can be jumped - to in from multiple instructions. - """ - code = self.code - n = len(code) - self.structs = [{'type': 'root', - 'start': 0, - 'end': n-1}] - - # All loop entry points - self.loops = [] - - # Map fixed jumps to their real destination - self.fixed_jumps = {} - self.except_targets = {} - self.ignore_if = set() - self.build_statement_indices() - self.else_start = {} - - # Containers filled by detect_control_flow() - self.not_continue = set() - self.return_end_ifs = set() - self.setup_loop_targets = {} # target given setup_loop offset - self.setup_loops = {} # setup_loop offset given target - - targets = {} - extended_arg = 0 - for i, inst in enumerate(self.insts): - offset = inst.offset - op = inst.opcode - - self.detect_control_flow(offset, targets, extended_arg) - - if inst.has_arg: - label = self.fixed_jumps.get(offset) - oparg = inst.arg - next_offset = xdis.next_offset(op, self.opc, offset) - - if label is None: - if op in self.opc.hasjrel and op != self.opc.FOR_ITER: - label = next_offset + oparg - elif op in self.opc.hasjabs: - if op in self.jump_if_pop: - if oparg > offset: - label = oparg - - if label is not None and label != -1: - targets[label] = targets.get(label, []) + [offset] - elif op == self.opc.END_FINALLY and offset in self.fixed_jumps: - label = self.fixed_jumps[offset] - targets[label] = targets.get(label, []) + [offset] - pass - - extended_arg = 0 - pass # for loop - - # DEBUG: - if debug in ('both', 'after'): - import pprint as pp - pp.pprint(self.structs) - - return targets - pass if __name__ == "__main__": From 9379922c89573972aa387e4f0b9abcba7358d1a3 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 00:38:22 -0500 Subject: [PATCH 15/41] Iterate over instruction, not bytecode --- uncompyle6/scanners/scanner2.py | 2 +- uncompyle6/scanners/scanner26.py | 2 +- uncompyle6/scanners/scanner3.py | 2 +- uncompyle6/scanners/scanner36.py | 341 ++++++++++++++++++++++++++++++- 4 files changed, 343 insertions(+), 4 deletions(-) diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 538666d6..a006088e 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -91,7 +91,7 @@ class Scanner2(Scanner): from xdis.bytecode import Bytecode bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): - print(instr._disassemble()) + print(instr.disassemble()) # list of tokens/instructions tokens = [] diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 4936273c..b2d49b02 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -93,7 +93,7 @@ class Scanner26(scan.Scanner2): from xdis.bytecode import Bytecode bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): - print(instr._disassemble()) + print(instr.disassemble()) # Container for tokens tokens = [] diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index fb531367..e511a925 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -162,7 +162,7 @@ class Scanner3(Scanner): if show_asm in ('both', 'before'): bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): - print(instr._disassemble()) + print(instr.disassemble()) # list of tokens/instructions tokens = [] diff --git a/uncompyle6/scanners/scanner36.py b/uncompyle6/scanners/scanner36.py index 08746908..d3b5f862 100644 --- a/uncompyle6/scanners/scanner36.py +++ b/uncompyle6/scanners/scanner36.py @@ -13,6 +13,12 @@ from __future__ import print_function from uncompyle6.scanners.scanner3 import Scanner3 +from uncompyle6.scanner import Token, parse_fn_counts +from xdis.code import iscode +from xdis.bytecode import Bytecode +import xdis +from array import array + # bytecode verification, verify(), uses JUMP_OPS from here from xdis.opcodes import opcode_36 as opc JUMP_OPS = opc.JUMP_OPS @@ -24,7 +30,7 @@ class Scanner36(Scanner3): return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm) + tokens, customize = self.ingest_internal(co, classname, code_objects, show_asm) for t in tokens: # The lowest bit of flags indicates whether the # var-keyword argument is placed at the top of the stack @@ -40,6 +46,339 @@ class Scanner36(Scanner3): pass return tokens, customize + def ingest_internal(self, co, classname=None, code_objects={}, show_asm=None): + """ + Pick out tokens from an uncompyle6 code object, and transform them, + returning a list of uncompyle6 'Token's. + + The transformations are made to assist the deparsing grammar. + Specificially: + - various types of LOAD_CONST's are categorized in terms of what they load + - COME_FROM instructions are added to assist parsing control structures + - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + + Also, when we encounter certain tokens, we add them to a set which will cause custom + grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST + cause specific rules for the specific number of arguments they take. + """ + + # FIXME: remove this when all subsidiary functions have been removed. + # We should be able to get everything from the self.insts list. + self.code = array('B', co.co_code) + + show_asm = self.show_asm if not show_asm else show_asm + # show_asm = 'both' + if show_asm in ('both', 'before'): + bytecode = Bytecode(co, self.opc) + for instr in bytecode.get_instructions(co): + print(instr.disassemble()) + + # list of tokens/instructions + tokens = [] + + # "customize" is a dict whose keys are nonterminals + # and the value is the argument stack entries for that + # nonterminal. The count is a little hoaky. It is mostly + # not used, but sometimes it is. + customize = {} + if self.is_pypy: + customize['PyPy'] = 0 + + self.build_lines_data(co) + self.build_prev_op() + + bytecode = Bytecode(co, self.opc) + + # FIXME: put as its own method? + # Scan for assertions. Later we will + # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. + # 'LOAD_ASSERT' is used in assert statements. + self.load_asserts = set() + self.insts = list(bytecode) + n = len(self.insts) + for i, inst in enumerate(self.insts): + # We need to detect the difference between + # "raise AssertionError" and "assert" + # If we have a JUMP_FORWARD after the + # RAISE_VARARGS then we have a "raise" statement + # else we have an "assert" statement. + if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: + next_inst = self.insts[i+1] + if (next_inst.opname == 'LOAD_GLOBAL' and + next_inst.argval == 'AssertionError'): + if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')): + self.load_asserts.add(next_inst.offset) + pass + pass + + # Get jump targets + # Format: {target offset: [jump offsets]} + jump_targets = self.find_jump_targets(show_asm) + # print("XXX2", jump_targets) + last_op_was_break = False + + for i, inst in enumerate(bytecode): + + argval = inst.argval + op = inst.opcode + if op == self.opc.EXTENDED_ARG: + continue + + if inst.offset in jump_targets: + jump_idx = 0 + # We want to process COME_FROMs to the same offset to be in *descending* + # offset order so we have the larger range or biggest instruction interval + # last. (I think they are sorted in increasing order, but for safety + # we sort them). That way, specific COME_FROM tags will match up + # properly. For example, a "loop" with an "if" nested in it should have the + # "loop" tag last so the grammar rule matches that properly. + for jump_offset in sorted(jump_targets[inst.offset], reverse=True): + come_from_name = 'COME_FROM' + opname = self.opname_for_offset(jump_offset) + if opname.startswith('SETUP_'): + come_from_type = opname[len('SETUP_'):] + come_from_name = 'COME_FROM_%s' % come_from_type + pass + elif inst.offset in self.except_targets: + come_from_name = 'COME_FROM_EXCEPT_CLAUSE' + tokens.append(Token(come_from_name, + None, repr(jump_offset), + offset='%s_%s' % (inst.offset, jump_idx), + has_arg = True, opc=self.opc)) + jump_idx += 1 + pass + pass + elif inst.offset in self.else_start: + end_offset = self.else_start[inst.offset] + tokens.append(Token('ELSE', + None, repr(end_offset), + offset='%s' % (inst.offset), + has_arg = True, opc=self.opc)) + + pass + + pattr = inst.argrepr + opname = inst.opname + + if opname in ['LOAD_CONST']: + const = argval + if iscode(const): + if const.co_name == '': + opname = 'LOAD_LAMBDA' + elif const.co_name == '': + opname = 'LOAD_GENEXPR' + elif const.co_name == '': + opname = 'LOAD_DICTCOMP' + elif const.co_name == '': + opname = 'LOAD_SETCOMP' + elif const.co_name == '': + opname = 'LOAD_LISTCOMP' + # verify() uses 'pattr' for comparison, since 'attr' + # now holds Code(const) and thus can not be used + # for comparison (todo: think about changing this) + # pattr = 'code_object @ 0x%x %s->%s' %\ + # (id(const), const.co_filename, const.co_name) + pattr = '' + else: + pattr = const + pass + elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'): + if self.version >= 3.6: + # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION' + flags = argval + opname = 'MAKE_FUNCTION_%d' % (flags) + attr = [] + for flag in self.MAKE_FUNCTION_FLAGS: + bit = flags & 1 + if bit: + if pattr: + pattr += ", " + flag + else: + pattr += flag + attr.append(bit) + flags >>= 1 + attr = attr[:4] # remove last value: attr[5] == False + else: + pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval) + pattr = ("%d positional, %d keyword pair, %d annotated" % + (pos_args, name_pair_args, annotate_args)) + if name_pair_args > 0: + opname = '%s_N%d' % (opname, name_pair_args) + pass + if annotate_args > 0: + opname = '%s_A_%d' % (opname, annotate_args) + pass + opname = '%s_%d' % (opname, pos_args) + attr = (pos_args, name_pair_args, annotate_args) + tokens.append( + Token( + opname = opname, + attr = attr, + pattr = pattr, + offset = inst.offset, + linestart = inst.starts_line, + op = op, + has_arg = inst.has_arg, + opc = self.opc + ) + ) + continue + elif op in self.varargs_ops: + pos_args = argval + if self.is_pypy and not pos_args and opname == 'BUILD_MAP': + opname = 'BUILD_MAP_n' + else: + opname = '%s_%d' % (opname, pos_args) + elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'): + # The value in the dict is in special cases in semantic actions, such + # as CALL_FUNCTION. The value is not used in these cases, so we put + # in arbitrary value 0. + customize[opname] = 0 + elif opname == 'UNPACK_EX': + # FIXME: try with scanner and parser by + # changing argval + before_args = argval & 0xFF + after_args = (argval >> 8) & 0xff + pattr = "%d before vararg, %d after" % (before_args, after_args) + argval = (before_args, after_args) + opname = '%s_%d+%d' % (opname, before_args, after_args) + + elif op == self.opc.JUMP_ABSOLUTE: + # Further classify JUMP_ABSOLUTE into backward jumps + # which are used in loops, and "CONTINUE" jumps which + # may appear in a "continue" statement. The loop-type + # and continue-type jumps will help us classify loop + # boundaries The continue-type jumps help us get + # "continue" statements with would otherwise be turned + # into a "pass" statement because JUMPs are sometimes + # ignored in rules as just boundary overhead. In + # comprehensions we might sometimes classify JUMP_BACK + # as CONTINUE, but that's okay since we add a grammar + # rule for that. + pattr = argval + # FIXME: 0 isn't always correct + target = self.get_target(inst.offset, 0) + if target <= inst.offset: + next_opname = self.opname[self.code[inst.offset+3]] + if (inst.offset in self.stmts and + (self.version != 3.0 or (hasattr(inst, 'linestart'))) and + (next_opname not in ('END_FINALLY', 'POP_BLOCK', + # Python 3.0 only uses POP_TOP + 'POP_TOP'))): + opname = 'CONTINUE' + else: + opname = 'JUMP_BACK' + # FIXME: this is a hack to catch stuff like: + # if x: continue + # the "continue" is not on a new line. + # There are other situations where we don't catch + # CONTINUE as well. + if tokens[-1].kind == 'JUMP_BACK' and tokens[-1].attr <= argval: + if tokens[-2].kind == 'BREAK_LOOP': + del tokens[-1] + else: + # intern is used because we are changing the *previous* token + tokens[-1].kind = intern('CONTINUE') + if last_op_was_break and opname == 'CONTINUE': + last_op_was_break = False + continue + elif op == self.opc.RETURN_VALUE: + if inst.offset in self.return_end_ifs: + opname = 'RETURN_END_IF' + elif inst.offset in self.load_asserts: + opname = 'LOAD_ASSERT' + + last_op_was_break = opname == 'BREAK_LOOP' + tokens.append( + Token( + opname = opname, + attr = argval, + pattr = pattr, + offset = inst.offset, + linestart = inst.starts_line, + op = op, + has_arg = inst.has_arg, + opc = self.opc + ) + ) + pass + + if show_asm in ('both', 'after'): + for t in tokens: + print(t) + print() + return tokens, customize + + def find_jump_targets(self, debug): + """ + Detect all offsets in a byte code which are jump targets + where we might insert a COME_FROM instruction. + + Return the list of offsets. + + Return the list of offsets. An instruction can be jumped + to in from multiple instructions. + """ + code = self.code + n = len(code) + self.structs = [{'type': 'root', + 'start': 0, + 'end': n-1}] + + # All loop entry points + self.loops = [] + + # Map fixed jumps to their real destination + self.fixed_jumps = {} + self.except_targets = {} + self.ignore_if = set() + self.build_statement_indices() + self.else_start = {} + + # Containers filled by detect_control_flow() + self.not_continue = set() + self.return_end_ifs = set() + self.setup_loop_targets = {} # target given setup_loop offset + self.setup_loops = {} # setup_loop offset given target + + targets = {} + extended_arg = 0 + for i, inst in enumerate(self.insts): + offset = inst.offset + op = inst.opcode + + self.detect_control_flow(offset, targets, extended_arg) + + if inst.has_arg: + label = self.fixed_jumps.get(offset) + oparg = inst.arg + next_offset = xdis.next_offset(op, self.opc, offset) + + if label is None: + if op in self.opc.hasjrel and op != self.opc.FOR_ITER: + label = next_offset + oparg + elif op in self.opc.hasjabs: + if op in self.jump_if_pop: + if oparg > offset: + label = oparg + + if label is not None and label != -1: + targets[label] = targets.get(label, []) + [offset] + elif op == self.opc.END_FINALLY and offset in self.fixed_jumps: + label = self.fixed_jumps[offset] + targets[label] = targets.get(label, []) + [offset] + pass + + extended_arg = 0 + pass # for loop + + # DEBUG: + if debug in ('both', 'after'): + import pprint as pp + pp.pprint(self.structs) + + return targets + pass if __name__ == "__main__": From 6bffae91fa0ea1350cfa2b451d315bd442dbaee5 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 09:10:42 -0500 Subject: [PATCH 16/41] awith custom COME_FROMs ... Now that jump branching has been properly fixed up for EXTENDED_ARG instructions which are more prevalent with wordcode encoding. --- uncompyle6/parsers/parse36.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/uncompyle6/parsers/parse36.py b/uncompyle6/parsers/parse36.py index 6cb0e85f..9933e5b1 100644 --- a/uncompyle6/parsers/parse36.py +++ b/uncompyle6/parsers/parse36.py @@ -36,6 +36,26 @@ class Python36Parser(Python35Parser): # This might be valid in < 3.6 and ::= expr jmp_false expr + # Adds a COME_FROM_ASYNC_WITH over 3.5 + # FIXME: remove corresponding rule for 3.5? + async_with_as_stmt ::= expr + BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM + SETUP_ASYNC_WITH designator + suite_stmts_opt + POP_BLOCK LOAD_CONST + COME_FROM_ASYNC_WITH + WITH_CLEANUP_START + GET_AWAITABLE LOAD_CONST YIELD_FROM + WITH_CLEANUP_FINISH END_FINALLY + async_with_stmt ::= expr + BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM + SETUP_ASYNC_WITH POP_TOP suite_stmts_opt + POP_BLOCK LOAD_CONST + COME_FROM_ASYNC_WITH + WITH_CLEANUP_START + GET_AWAITABLE LOAD_CONST YIELD_FROM + WITH_CLEANUP_FINISH END_FINALLY + except_suite ::= c_stmts_opt COME_FROM POP_EXCEPT jump_except COME_FROM """ From 124267849cf6f0ca8247dee5b9f4880042211ee6 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 09:43:49 -0500 Subject: [PATCH 17/41] Move refactored ingest from 3.6 to 3.x... We are getting away from working with bytecode in favor of working with full-fledged structured instructions Up next: find_jump_targets() --- test/Makefile | 6 +- uncompyle6/scanners/scanner3.py | 65 +++----- uncompyle6/scanners/scanner36.py | 269 +------------------------------ 3 files changed, 27 insertions(+), 313 deletions(-) diff --git a/test/Makefile b/test/Makefile index 1e233a85..e6c0d3aa 100644 --- a/test/Makefile +++ b/test/Makefile @@ -50,8 +50,8 @@ check-3.6: check-bytecode $(PYTHON) test_pythonlib.py --bytecode-3.6 --weak-verify $(COMPILE) # FIXME -#: this is called when running under pypy3.5-5.8.0 -5.8: +#: this is called when running under pypy3.5-5.8.0 or pypy2-5.6.0 +5.8 5.6: #: Check deparsing only, but from a different Python version check-disasm: @@ -71,7 +71,7 @@ check-bytecode-2: check-bytecode-3: $(PYTHON) test_pythonlib.py --bytecode-3.0 \ --bytecode-3.1 --bytecode-3.2 --bytecode-3.3 \ - --bytecode-3.4 --bytecode-3.5 --bytecode-pypy3.2 + --bytecode-3.4 --bytecode-3.5 --bytecode-3.6 --bytecode-pypy3.2 #: Check deparsing bytecode that works running Python 2 and Python 3 check-bytecode: check-bytecode-3 diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index e511a925..34fb1bce 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -27,7 +27,7 @@ from array import array from uncompyle6.scanner import Scanner from xdis.code import iscode -from xdis.bytecode import Bytecode, op_has_argument, instruction_size +from xdis.bytecode import Bytecode, instruction_size from xdis.util import code2num from uncompyle6.scanner import Token, parse_fn_counts @@ -144,19 +144,24 @@ class Scanner3(Scanner): def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, - returning a list of uncompyle6 'Token's. + returning a list of uncompyle6 Token's. The transformations are made to assist the deparsing grammar. Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + - some EXTENDED_ARGS instructions are removed Also, when we encounter certain tokens, we add them to a set which will cause custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules for the specific number of arguments they take. """ + # FIXME: remove this when all subsidiary functions have been removed. + # We should be able to get everything from the self.insts list. + self.code = array('B', co.co_code) + show_asm = self.show_asm if not show_asm else show_asm # show_asm = 'both' if show_asm in ('both', 'before'): @@ -175,7 +180,6 @@ class Scanner3(Scanner): if self.is_pypy: customize['PyPy'] = 0 - self.code = array('B', co.co_code) self.build_lines_data(co) self.build_prev_op() @@ -186,27 +190,20 @@ class Scanner3(Scanner): # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() - bs = list(bytecode) - n = len(bs) - for i in range(n): - inst = bs[i] - + self.insts = list(bytecode) + n = len(self.insts) + for i, inst in enumerate(self.insts): # We need to detect the difference between # "raise AssertionError" and "assert" # If we have a JUMP_FORWARD after the # RAISE_VARARGS then we have a "raise" statement # else we have an "assert" statement. if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: - next_inst = bs[i+1] + next_inst = self.insts[i+1] if (next_inst.opname == 'LOAD_GLOBAL' and next_inst.argval == 'AssertionError'): - for j in range(i+2, n): - raise_inst = bs[j] - if raise_inst.opname.startswith('RAISE_VARARGS'): - if j+1 >= n or bs[j+1].opname != 'JUMP_FORWARD': - self.load_asserts.add(next_inst.offset) - pass - break + if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')): + self.load_asserts.add(next_inst.offset) pass pass @@ -216,28 +213,15 @@ class Scanner3(Scanner): # print("XXX2", jump_targets) last_op_was_break = False - extended_arg = 0 for i, inst in enumerate(bytecode): argval = inst.argval op = inst.opcode - has_arg = op_has_argument(op, self.opc) - if has_arg: - if op == self.opc.EXTENDED_ARG: - extended_arg += self.extended_arg_val(argval) - - # Normally we remove EXTENDED_ARG from the - # opcodes, but in the case of annotated functions - # can use the EXTENDED_ARG tuple to signal we have - # an annotated function. - if not bs[i+1].opname.startswith("MAKE_FUNCTION"): - continue - - if isinstance(argval, int) and extended_arg: - min_extended= self.extended_arg_val(1) - if argval < min_extended: - argval += extended_arg - extended_arg = 0 + if op == self.opc.EXTENDED_ARG: + # FIXME: The EXTENDED_ARG is used to signal annotation + # parameters + if self.insts[i+1].opcode != self.opc.MAKE_FUNCTION: + continue if inst.offset in jump_targets: jump_idx = 0 @@ -256,9 +240,6 @@ class Scanner3(Scanner): pass elif inst.offset in self.except_targets: come_from_name = 'COME_FROM_EXCEPT_CLAUSE' - if self.version <= 3.2: - continue - pass tokens.append(Token(come_from_name, None, repr(jump_offset), offset='%s_%s' % (inst.offset, jump_idx), @@ -336,7 +317,7 @@ class Scanner3(Scanner): offset = inst.offset, linestart = inst.starts_line, op = op, - has_arg = op_has_argument(op, op3), + has_arg = inst.has_arg, opc = self.opc ) ) @@ -415,7 +396,7 @@ class Scanner3(Scanner): offset = inst.offset, linestart = inst.starts_line, op = op, - has_arg = (op >= op3.HAVE_ARGUMENT), + has_arg = inst.has_arg, opc = self.opc ) ) @@ -1063,9 +1044,9 @@ class Scanner3(Scanner): op = self.code[i] if op == self.opc.END_FINALLY: if count_END_FINALLY == count_SETUP_: - assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, - JUMP_FORWARD, - RETURN_VALUE) + assert self.code[self.prev_op[i]] in frozenset([self.opc.JUMP_ABSOLUTE, + self.opc.JUMP_FORWARD, + self.opc.RETURN_VALUE]) self.not_continue.add(self.prev_op[i]) return self.prev_op[i] count_END_FINALLY += 1 diff --git a/uncompyle6/scanners/scanner36.py b/uncompyle6/scanners/scanner36.py index d3b5f862..fa2b7a91 100644 --- a/uncompyle6/scanners/scanner36.py +++ b/uncompyle6/scanners/scanner36.py @@ -13,11 +13,7 @@ from __future__ import print_function from uncompyle6.scanners.scanner3 import Scanner3 -from uncompyle6.scanner import Token, parse_fn_counts -from xdis.code import iscode -from xdis.bytecode import Bytecode import xdis -from array import array # bytecode verification, verify(), uses JUMP_OPS from here from xdis.opcodes import opcode_36 as opc @@ -30,7 +26,7 @@ class Scanner36(Scanner3): return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - tokens, customize = self.ingest_internal(co, classname, code_objects, show_asm) + tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm) for t in tokens: # The lowest bit of flags indicates whether the # var-keyword argument is placed at the top of the stack @@ -46,269 +42,6 @@ class Scanner36(Scanner3): pass return tokens, customize - def ingest_internal(self, co, classname=None, code_objects={}, show_asm=None): - """ - Pick out tokens from an uncompyle6 code object, and transform them, - returning a list of uncompyle6 'Token's. - - The transformations are made to assist the deparsing grammar. - Specificially: - - various types of LOAD_CONST's are categorized in terms of what they load - - COME_FROM instructions are added to assist parsing control structures - - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. - """ - - # FIXME: remove this when all subsidiary functions have been removed. - # We should be able to get everything from the self.insts list. - self.code = array('B', co.co_code) - - show_asm = self.show_asm if not show_asm else show_asm - # show_asm = 'both' - if show_asm in ('both', 'before'): - bytecode = Bytecode(co, self.opc) - for instr in bytecode.get_instructions(co): - print(instr.disassemble()) - - # list of tokens/instructions - tokens = [] - - # "customize" is a dict whose keys are nonterminals - # and the value is the argument stack entries for that - # nonterminal. The count is a little hoaky. It is mostly - # not used, but sometimes it is. - customize = {} - if self.is_pypy: - customize['PyPy'] = 0 - - self.build_lines_data(co) - self.build_prev_op() - - bytecode = Bytecode(co, self.opc) - - # FIXME: put as its own method? - # Scan for assertions. Later we will - # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. - # 'LOAD_ASSERT' is used in assert statements. - self.load_asserts = set() - self.insts = list(bytecode) - n = len(self.insts) - for i, inst in enumerate(self.insts): - # We need to detect the difference between - # "raise AssertionError" and "assert" - # If we have a JUMP_FORWARD after the - # RAISE_VARARGS then we have a "raise" statement - # else we have an "assert" statement. - if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: - next_inst = self.insts[i+1] - if (next_inst.opname == 'LOAD_GLOBAL' and - next_inst.argval == 'AssertionError'): - if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')): - self.load_asserts.add(next_inst.offset) - pass - pass - - # Get jump targets - # Format: {target offset: [jump offsets]} - jump_targets = self.find_jump_targets(show_asm) - # print("XXX2", jump_targets) - last_op_was_break = False - - for i, inst in enumerate(bytecode): - - argval = inst.argval - op = inst.opcode - if op == self.opc.EXTENDED_ARG: - continue - - if inst.offset in jump_targets: - jump_idx = 0 - # We want to process COME_FROMs to the same offset to be in *descending* - # offset order so we have the larger range or biggest instruction interval - # last. (I think they are sorted in increasing order, but for safety - # we sort them). That way, specific COME_FROM tags will match up - # properly. For example, a "loop" with an "if" nested in it should have the - # "loop" tag last so the grammar rule matches that properly. - for jump_offset in sorted(jump_targets[inst.offset], reverse=True): - come_from_name = 'COME_FROM' - opname = self.opname_for_offset(jump_offset) - if opname.startswith('SETUP_'): - come_from_type = opname[len('SETUP_'):] - come_from_name = 'COME_FROM_%s' % come_from_type - pass - elif inst.offset in self.except_targets: - come_from_name = 'COME_FROM_EXCEPT_CLAUSE' - tokens.append(Token(come_from_name, - None, repr(jump_offset), - offset='%s_%s' % (inst.offset, jump_idx), - has_arg = True, opc=self.opc)) - jump_idx += 1 - pass - pass - elif inst.offset in self.else_start: - end_offset = self.else_start[inst.offset] - tokens.append(Token('ELSE', - None, repr(end_offset), - offset='%s' % (inst.offset), - has_arg = True, opc=self.opc)) - - pass - - pattr = inst.argrepr - opname = inst.opname - - if opname in ['LOAD_CONST']: - const = argval - if iscode(const): - if const.co_name == '': - opname = 'LOAD_LAMBDA' - elif const.co_name == '': - opname = 'LOAD_GENEXPR' - elif const.co_name == '': - opname = 'LOAD_DICTCOMP' - elif const.co_name == '': - opname = 'LOAD_SETCOMP' - elif const.co_name == '': - opname = 'LOAD_LISTCOMP' - # verify() uses 'pattr' for comparison, since 'attr' - # now holds Code(const) and thus can not be used - # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = '' - else: - pattr = const - pass - elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'): - if self.version >= 3.6: - # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION' - flags = argval - opname = 'MAKE_FUNCTION_%d' % (flags) - attr = [] - for flag in self.MAKE_FUNCTION_FLAGS: - bit = flags & 1 - if bit: - if pattr: - pattr += ", " + flag - else: - pattr += flag - attr.append(bit) - flags >>= 1 - attr = attr[:4] # remove last value: attr[5] == False - else: - pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval) - pattr = ("%d positional, %d keyword pair, %d annotated" % - (pos_args, name_pair_args, annotate_args)) - if name_pair_args > 0: - opname = '%s_N%d' % (opname, name_pair_args) - pass - if annotate_args > 0: - opname = '%s_A_%d' % (opname, annotate_args) - pass - opname = '%s_%d' % (opname, pos_args) - attr = (pos_args, name_pair_args, annotate_args) - tokens.append( - Token( - opname = opname, - attr = attr, - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line, - op = op, - has_arg = inst.has_arg, - opc = self.opc - ) - ) - continue - elif op in self.varargs_ops: - pos_args = argval - if self.is_pypy and not pos_args and opname == 'BUILD_MAP': - opname = 'BUILD_MAP_n' - else: - opname = '%s_%d' % (opname, pos_args) - elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'): - # The value in the dict is in special cases in semantic actions, such - # as CALL_FUNCTION. The value is not used in these cases, so we put - # in arbitrary value 0. - customize[opname] = 0 - elif opname == 'UNPACK_EX': - # FIXME: try with scanner and parser by - # changing argval - before_args = argval & 0xFF - after_args = (argval >> 8) & 0xff - pattr = "%d before vararg, %d after" % (before_args, after_args) - argval = (before_args, after_args) - opname = '%s_%d+%d' % (opname, before_args, after_args) - - elif op == self.opc.JUMP_ABSOLUTE: - # Further classify JUMP_ABSOLUTE into backward jumps - # which are used in loops, and "CONTINUE" jumps which - # may appear in a "continue" statement. The loop-type - # and continue-type jumps will help us classify loop - # boundaries The continue-type jumps help us get - # "continue" statements with would otherwise be turned - # into a "pass" statement because JUMPs are sometimes - # ignored in rules as just boundary overhead. In - # comprehensions we might sometimes classify JUMP_BACK - # as CONTINUE, but that's okay since we add a grammar - # rule for that. - pattr = argval - # FIXME: 0 isn't always correct - target = self.get_target(inst.offset, 0) - if target <= inst.offset: - next_opname = self.opname[self.code[inst.offset+3]] - if (inst.offset in self.stmts and - (self.version != 3.0 or (hasattr(inst, 'linestart'))) and - (next_opname not in ('END_FINALLY', 'POP_BLOCK', - # Python 3.0 only uses POP_TOP - 'POP_TOP'))): - opname = 'CONTINUE' - else: - opname = 'JUMP_BACK' - # FIXME: this is a hack to catch stuff like: - # if x: continue - # the "continue" is not on a new line. - # There are other situations where we don't catch - # CONTINUE as well. - if tokens[-1].kind == 'JUMP_BACK' and tokens[-1].attr <= argval: - if tokens[-2].kind == 'BREAK_LOOP': - del tokens[-1] - else: - # intern is used because we are changing the *previous* token - tokens[-1].kind = intern('CONTINUE') - if last_op_was_break and opname == 'CONTINUE': - last_op_was_break = False - continue - elif op == self.opc.RETURN_VALUE: - if inst.offset in self.return_end_ifs: - opname = 'RETURN_END_IF' - elif inst.offset in self.load_asserts: - opname = 'LOAD_ASSERT' - - last_op_was_break = opname == 'BREAK_LOOP' - tokens.append( - Token( - opname = opname, - attr = argval, - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line, - op = op, - has_arg = inst.has_arg, - opc = self.opc - ) - ) - pass - - if show_asm in ('both', 'after'): - for t in tokens: - print(t) - print() - return tokens, customize - def find_jump_targets(self, debug): """ Detect all offsets in a byte code which are jump targets From 4a904951f45e1262b36a0893528233f1b79a7eb3 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 11:54:01 -0500 Subject: [PATCH 18/41] Move refactored find-jump-targets from 3.6 to 3.x --- Makefile | 2 +- uncompyle6/scanners/scanner3.py | 22 ++++++---------------- uncompyle6/scanners/scanner36.py | 4 +--- 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index 3cd37b33..eb24ba59 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,7 @@ check-2.6: #:PyPy 2.6.1 PyPy 5.0.1, or PyPy 5.8.0-beta0 # Skip for now -2.6 5.0 5.3 5.8: +2.6 5.0 5.3 5.6 5.8: #:PyPy pypy3-2.4.0 Python 3: pypy-3.2 2.4: diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 34fb1bce..0d4b679a 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -487,26 +487,17 @@ class Scanner3(Scanner): self.setup_loops = {} # setup_loop offset given target targets = {} - extended_arg = 0 - for offset in self.op_range(0, n): - op = code[offset] - - if op == self.opc.EXTENDED_ARG: - arg = code2num(code, offset+1) | extended_arg - extended_arg = self.extended_arg_val(arg) - continue + for i, inst in enumerate(self.insts): + offset = inst.offset + op = inst.opcode # Determine structures and fix jumps in Python versions # since 2.3 - self.detect_control_flow(offset, targets, extended_arg) + self.detect_control_flow(offset, targets, 0) - has_arg = (op >= op3.HAVE_ARGUMENT) - if has_arg: + if inst.has_arg: label = self.fixed_jumps.get(offset) - if self.version >= 3.6: - oparg = code[offset+1] - else: - oparg = code[offset+1] + code[offset+2] * 256 + oparg = inst.arg next_offset = xdis.next_offset(op, self.opc, offset) if label is None: @@ -524,7 +515,6 @@ class Scanner3(Scanner): targets[label] = targets.get(label, []) + [offset] pass - extended_arg = 0 pass # for loop # DEBUG: diff --git a/uncompyle6/scanners/scanner36.py b/uncompyle6/scanners/scanner36.py index fa2b7a91..58a7daf0 100644 --- a/uncompyle6/scanners/scanner36.py +++ b/uncompyle6/scanners/scanner36.py @@ -75,12 +75,11 @@ class Scanner36(Scanner3): self.setup_loops = {} # setup_loop offset given target targets = {} - extended_arg = 0 for i, inst in enumerate(self.insts): offset = inst.offset op = inst.opcode - self.detect_control_flow(offset, targets, extended_arg) + self.detect_control_flow(offset, targets, 0) if inst.has_arg: label = self.fixed_jumps.get(offset) @@ -102,7 +101,6 @@ class Scanner36(Scanner3): targets[label] = targets.get(label, []) + [offset] pass - extended_arg = 0 pass # for loop # DEBUG: From 6b6755d5990faf6c85776d3ce498dc44623e7109 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 12:27:43 -0500 Subject: [PATCH 19/41] Fix 3.{3,4} pytest. Remove dup find_jump_targets --- pytest/test_fjt.py | 3 ++ uncompyle6/scanners/scanner3.py | 1 - uncompyle6/scanners/scanner36.py | 70 -------------------------------- 3 files changed, 3 insertions(+), 71 deletions(-) diff --git a/pytest/test_fjt.py b/pytest/test_fjt.py index 28cb2a7e..aab08bd2 100644 --- a/pytest/test_fjt.py +++ b/pytest/test_fjt.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from uncompyle6 import PYTHON_VERSION, IS_PYPY from uncompyle6.scanner import get_scanner +from xdis.bytecode import Bytecode from array import array def bug(state, slotstate): if state: @@ -53,9 +54,11 @@ def test_if_in_for(): {'start': 48, 'end': 67, 'type': 'while-loop'}] elif 3.2 < PYTHON_VERSION <= 3.4: + bytecode = Bytecode(code, scan.opc) scan.code = array('B', code.co_code) scan.build_lines_data(code) scan.build_prev_op() + scan.insts = list(bytecode) fjt = scan.find_jump_targets(False) assert {69: [66], 63: [18]} == fjt assert scan.structs == \ diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 0d4b679a..dd487d18 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -28,7 +28,6 @@ from array import array from uncompyle6.scanner import Scanner from xdis.code import iscode from xdis.bytecode import Bytecode, instruction_size -from xdis.util import code2num from uncompyle6.scanner import Token, parse_fn_counts import xdis diff --git a/uncompyle6/scanners/scanner36.py b/uncompyle6/scanners/scanner36.py index 58a7daf0..e951930a 100644 --- a/uncompyle6/scanners/scanner36.py +++ b/uncompyle6/scanners/scanner36.py @@ -42,76 +42,6 @@ class Scanner36(Scanner3): pass return tokens, customize - def find_jump_targets(self, debug): - """ - Detect all offsets in a byte code which are jump targets - where we might insert a COME_FROM instruction. - - Return the list of offsets. - - Return the list of offsets. An instruction can be jumped - to in from multiple instructions. - """ - code = self.code - n = len(code) - self.structs = [{'type': 'root', - 'start': 0, - 'end': n-1}] - - # All loop entry points - self.loops = [] - - # Map fixed jumps to their real destination - self.fixed_jumps = {} - self.except_targets = {} - self.ignore_if = set() - self.build_statement_indices() - self.else_start = {} - - # Containers filled by detect_control_flow() - self.not_continue = set() - self.return_end_ifs = set() - self.setup_loop_targets = {} # target given setup_loop offset - self.setup_loops = {} # setup_loop offset given target - - targets = {} - for i, inst in enumerate(self.insts): - offset = inst.offset - op = inst.opcode - - self.detect_control_flow(offset, targets, 0) - - if inst.has_arg: - label = self.fixed_jumps.get(offset) - oparg = inst.arg - next_offset = xdis.next_offset(op, self.opc, offset) - - if label is None: - if op in self.opc.hasjrel and op != self.opc.FOR_ITER: - label = next_offset + oparg - elif op in self.opc.hasjabs: - if op in self.jump_if_pop: - if oparg > offset: - label = oparg - - if label is not None and label != -1: - targets[label] = targets.get(label, []) + [offset] - elif op == self.opc.END_FINALLY and offset in self.fixed_jumps: - label = self.fixed_jumps[offset] - targets[label] = targets.get(label, []) + [offset] - pass - - pass # for loop - - # DEBUG: - if debug in ('both', 'after'): - import pprint as pp - pp.pprint(self.structs) - - return targets - - pass - if __name__ == "__main__": from uncompyle6 import PYTHON_VERSION if PYTHON_VERSION == 3.6: From 7beac3f646dbf450671ca9cfc74654efef9a1f1e Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 12:56:50 -0500 Subject: [PATCH 20/41] Remove parts of erroneous 2.7 test for now --- pytest/test_fjt.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pytest/test_fjt.py b/pytest/test_fjt.py index aab08bd2..5fafea5f 100644 --- a/pytest/test_fjt.py +++ b/pytest/test_fjt.py @@ -30,12 +30,17 @@ def test_if_in_for(): scan.build_lines_data(code, n) scan.build_prev_op(n) fjt = scan.find_jump_targets(False) - assert {15: [3], 69: [66], 63: [18]} == fjt - assert scan.structs == \ - [{'start': 0, 'end': 72, 'type': 'root'}, - {'start': 15, 'end': 66, 'type': 'if-then'}, - {'start': 31, 'end': 59, 'type': 'for-loop'}, - {'start': 62, 'end': 63, 'type': 'for-else'}] + + ## FIXME: the data below is wrong. + ## we get different results currenty as well. + ## We need to probably fix both the code + ## and the test below + # assert {15: [3], 69: [66], 63: [18]} == fjt + # assert scan.structs == \ + # [{'start': 0, 'end': 72, 'type': 'root'}, + # {'start': 15, 'end': 66, 'type': 'if-then'}, + # {'start': 31, 'end': 59, 'type': 'for-loop'}, + # {'start': 62, 'end': 63, 'type': 'for-else'}] code = bug_loop.__code__ n = scan.setup_code(code) From 3e4889bcd7223d601f52d3181df541ccc88f737d Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 13:30:49 -0500 Subject: [PATCH 21/41] Small tweaks to sync up better with scanner2.py --- uncompyle6/scanners/scanner3.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index dd487d18..c55459d1 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -175,7 +175,9 @@ class Scanner3(Scanner): # and the value is the argument stack entries for that # nonterminal. The count is a little hoaky. It is mostly # not used, but sometimes it is. + # "customize" is a dict whose keys are nonterminals customize = {} + if self.is_pypy: customize['PyPy'] = 0 @@ -193,7 +195,9 @@ class Scanner3(Scanner): n = len(self.insts) for i, inst in enumerate(self.insts): # We need to detect the difference between - # "raise AssertionError" and "assert" + # raise AssertionError + # and + # assert ... # If we have a JUMP_FORWARD after the # RAISE_VARARGS then we have a "raise" statement # else we have an "assert" statement. @@ -258,10 +262,11 @@ class Scanner3(Scanner): pattr = inst.argrepr opname = inst.opname - if opname in ['LOAD_CONST']: + if op in self.opc.CONST_OPS: const = argval if iscode(const): if const.co_name == '': + assert opname == 'LOAD_CONST' opname = 'LOAD_LAMBDA' elif const.co_name == '': opname = 'LOAD_GENEXPR' From 4c77170ddfaf0910aea99ea20693352b04547844 Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 7 Nov 2017 12:48:03 -0500 Subject: [PATCH 22/41] Small fixes and tweaks: parser.py: handle errors when no tokens have been produced. scanner3{,0}.py: DRY custom scanner 3.0 rem_or code. scanner3.py misc other small tweaks --- uncompyle6/parser.py | 16 ++++++++++------ uncompyle6/scanners/scanner3.py | 13 ++++++++----- uncompyle6/scanners/scanner30.py | 22 ---------------------- 3 files changed, 18 insertions(+), 33 deletions(-) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 5939eb0c..9ec2b28f 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -120,18 +120,22 @@ class PythonParser(GenericASTBuilder): def error(self, instructions, index): # Find the last line boundary + start, finish = -1, -1 for start in range(index, -1, -1): if instructions[start].linestart: break pass for finish in range(index+1, len(instructions)): if instructions[finish].linestart: break pass - err_token = instructions[index] - print("Instruction context:") - for i in range(start, finish): - indent = ' ' if i != index else '-> ' - print("%s%s" % (indent, instructions[i])) - raise ParserError(err_token, err_token.offset) + if start > 0: + err_token = instructions[index] + print("Instruction context:") + for i in range(start, finish): + indent = ' ' if i != index else '-> ' + print("%s%s" % (indent, instructions[i])) + raise ParserError(err_token, err_token.offset) + else: + raise ParserError(None, -1) def typestring(self, token): return token.kind diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index c55459d1..f7dfdd5c 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -161,10 +161,10 @@ class Scanner3(Scanner): # We should be able to get everything from the self.insts list. self.code = array('B', co.co_code) + bytecode = Bytecode(co, self.opc) show_asm = self.show_asm if not show_asm else show_asm # show_asm = 'both' if show_asm in ('both', 'before'): - bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): print(instr.disassemble()) @@ -184,8 +184,6 @@ class Scanner3(Scanner): self.build_lines_data(co) self.build_prev_op() - bytecode = Bytecode(co, self.opc) - # FIXME: put as its own method? # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. @@ -194,7 +192,7 @@ class Scanner3(Scanner): self.insts = list(bytecode) n = len(self.insts) for i, inst in enumerate(self.insts): - # We need to detect the difference between + # We need to detect the difference between: # raise AssertionError # and # assert ... @@ -214,6 +212,7 @@ class Scanner3(Scanner): # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets(show_asm) # print("XXX2", jump_targets) + last_op_was_break = False for i, inst in enumerate(bytecode): @@ -1058,7 +1057,11 @@ class Scanner3(Scanner): # Find all offsets of requested instructions instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target) # Get all POP_JUMP_IF_TRUE (or) offsets - pjit_offsets = self.all_instr(start, end, self.opc.POP_JUMP_IF_TRUE) + if self.version == 3.0: + jump_true_op = self.opc.JUMP_IF_TRUE + else: + jump_true_op = self.opc.POP_JUMP_IF_TRUE + pjit_offsets = self.all_instr(start, end, jump_true_op) filtered = [] for pjit_offset in pjit_offsets: pjit_tgt = self.get_target(pjit_offset) - 3 diff --git a/uncompyle6/scanners/scanner30.py b/uncompyle6/scanners/scanner30.py index 4193b5cd..1e717a61 100644 --- a/uncompyle6/scanners/scanner30.py +++ b/uncompyle6/scanners/scanner30.py @@ -369,28 +369,6 @@ class Scanner30(Scanner3): pass return - def rem_or(self, start, end, instr, target=None, include_beyond_target=False): - """ - Find offsets of all requested between and , - optionally ing specified offset, and return list found - offsets which are not within any POP_JUMP_IF_TRUE jumps. - """ - assert(start>=0 and end<=len(self.code) and start <= end) - - # Find all offsets of requested instructions - instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target) - # Get all JUMP_IF_TRUE (or) offsets - pjit_offsets = self.all_instr(start, end, opc.JUMP_IF_TRUE) - filtered = [] - for pjit_offset in pjit_offsets: - pjit_tgt = self.get_target(pjit_offset) - 3 - for instr_offset in instr_offsets: - if instr_offset <= pjit_offset or instr_offset >= pjit_tgt: - filtered.append(instr_offset) - instr_offsets = filtered - filtered = [] - return instr_offsets - if __name__ == "__main__": from uncompyle6 import PYTHON_VERSION if PYTHON_VERSION == 3.0: From 0bb793239bd5a414d2e93ec90c1d888afbe419eb Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 10:31:38 -0500 Subject: [PATCH 23/41] Add 3.6+ grammar for except's ending in RETURN... Not totally out of the maze in 3.6 control flow... There are still problems with erroneous RETURN_VALUEs becoming RETURN_END_IF, --- uncompyle6/parser.py | 4 ++++ uncompyle6/parsers/parse36.py | 6 ++++++ uncompyle6/scanners/scanner3.py | 3 +++ 3 files changed, 13 insertions(+) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 9ec2b28f..29e4e2ed 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -262,6 +262,10 @@ class PythonParser(GenericASTBuilder): return_stmt ::= ret_expr RETURN_VALUE return_stmt_lambda ::= ret_expr RETURN_VALUE_LAMBDA + # return_stmts are a sequence of statements that ends in a RETURN statement. + # In later Python versions with jump optimization, this can cause JUMPs + # that would normally appear to be omitted. + return_stmts ::= return_stmt return_stmts ::= _stmts return_stmt diff --git a/uncompyle6/parsers/parse36.py b/uncompyle6/parsers/parse36.py index 9933e5b1..4a21d0b1 100644 --- a/uncompyle6/parsers/parse36.py +++ b/uncompyle6/parsers/parse36.py @@ -57,6 +57,12 @@ class Python36Parser(Python35Parser): WITH_CLEANUP_FINISH END_FINALLY except_suite ::= c_stmts_opt COME_FROM POP_EXCEPT jump_except COME_FROM + + # In 3.6+, A sequence of statements ending in a RETURN can cause + # JUMP_FORWARD END_FINALLY to be omitted from try middle + + except_return ::= POP_TOP POP_TOP POP_TOP return_stmts + try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_return """ def add_custom_rules(self, tokens, customize): diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index f7dfdd5c..58ede5e6 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -384,9 +384,12 @@ class Scanner3(Scanner): if last_op_was_break and opname == 'CONTINUE': last_op_was_break = False continue + + # FIXME: go over for Python 3.6+. This is sometimes wrong elif op == self.opc.RETURN_VALUE: if inst.offset in self.return_end_ifs: opname = 'RETURN_END_IF' + elif inst.offset in self.load_asserts: opname = 'LOAD_ASSERT' From 024f295feb4760398467335090677ed48f718d30 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 15:42:51 -0500 Subject: [PATCH 24/41] Tweak how to report a bug. --- HOW-TO-REPORT-A-BUG.md | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index 179d7664..6ff93df6 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -2,16 +2,25 @@ ## The difficulty of the problem -There is no Python decompiler yet, that I know about that will -decompyle everything. This one probably does the best job of *any* -Python decompiler. But it is a constant work in progress: Python keeps +This decompiler is a constant work in progress: Python keeps changing, and so does its code generation. +There is no Python decompiler yet that I know about that will +decompyle everything. Overall, I think this one probably does the best +job of *any* Python decompiler. + +But at any given time, there are maybe dozens of valid Python bytecode +files that I know of that will cause problems. And when I get through +those, along with all the issues of bugs that are currently logged, I +could probably easily find dozens more bugs just by doing a decompile +of all the Python bytecode one of my computers. Unless you want to +help out by _fixing_ bug, or are willing to do work by isolating and +narrowing problems, don't feel you are doing me a favor by doing scans +on your favorite byteocde. + I have found bugs in *every* Python decompiler I have tried. Even -those where authors/maintainers claim that they have used it on -the entire Python standard library. And I don't mean that -the program doesn't come out with the same Python source instructions, -but that the program is *semantically* not equivalent. +those where authors/maintainers claim that they have used it on the +entire Python standard library. So it is likely you'll find a mistranslation in decompiling. @@ -72,10 +81,11 @@ The basic requirement is pretty simple: * Python source text Please don't put files on download services that one has to register -for. If you can't attach it to the issue, or create a github gist, -then the code you are sending is too large. +for or can't get to by issuing curl or wget. If you can't attach it to +the issue, or create a github gist, then the code you are sending is +too large. -Please also try to narrow the bug. See below. +Also try to narrow the bug. See below. ## What to send (additional helpful information) @@ -106,9 +116,9 @@ one fool can learn, so can another." ## Narrowing the problem -I don't need or want the entire source code base for which one file or module -can't be decompiled. I just need that one file or module only. If -there are several files, file a bug report for each file. +I don't need or want the entire source code base for which one file or +module can't be decompiled. I just need that one file or module +only. If there are several files, file a bug report for each file. Python modules can get quite large, and usually decompilation problems occur in a single function or maybe the main-line code but not any of From 918d4f58084310b038eec041636df99680c55646 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 15:53:09 -0500 Subject: [PATCH 25/41] Typo --- HOW-TO-REPORT-A-BUG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index 6ff93df6..51f259ae 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -6,7 +6,7 @@ This decompiler is a constant work in progress: Python keeps changing, and so does its code generation. There is no Python decompiler yet that I know about that will -decompyle everything. Overall, I think this one probably does the best +decompile everything. Overall, I think this one probably does the best job of *any* Python decompiler. But at any given time, there are maybe dozens of valid Python bytecode From 70b77025ac5ef1e30d793c35a874ed1b025316ec Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 15:53:48 -0500 Subject: [PATCH 26/41] Typo --- HOW-TO-REPORT-A-BUG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index 51f259ae..f3e89c7b 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -13,10 +13,10 @@ But at any given time, there are maybe dozens of valid Python bytecode files that I know of that will cause problems. And when I get through those, along with all the issues of bugs that are currently logged, I could probably easily find dozens more bugs just by doing a decompile -of all the Python bytecode one of my computers. Unless you want to -help out by _fixing_ bug, or are willing to do work by isolating and -narrowing problems, don't feel you are doing me a favor by doing scans -on your favorite byteocde. +of all the Python bytecode on any one of my computers. Unless you want +to help out by _fixing_ bug, or are willing to do work by isolating +and narrowing problems, don't feel you are doing me a favor by doing +scans on your favorite byteocde. I have found bugs in *every* Python decompiler I have tried. Even those where authors/maintainers claim that they have used it on the From 41f5835fcf7e9388bc4f26f7c35b603660fffa89 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 15:54:08 -0500 Subject: [PATCH 27/41] Typo --- HOW-TO-REPORT-A-BUG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index f3e89c7b..cb02bd83 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -16,7 +16,7 @@ could probably easily find dozens more bugs just by doing a decompile of all the Python bytecode on any one of my computers. Unless you want to help out by _fixing_ bug, or are willing to do work by isolating and narrowing problems, don't feel you are doing me a favor by doing -scans on your favorite byteocde. +scans on your favorite bytecode. I have found bugs in *every* Python decompiler I have tried. Even those where authors/maintainers claim that they have used it on the From 55ced53ca93a84548df1e7d74bd2a57893f82ebc Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 15:54:25 -0500 Subject: [PATCH 28/41] Typo --- HOW-TO-REPORT-A-BUG.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index cb02bd83..75711a6b 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -18,10 +18,6 @@ to help out by _fixing_ bug, or are willing to do work by isolating and narrowing problems, don't feel you are doing me a favor by doing scans on your favorite bytecode. -I have found bugs in *every* Python decompiler I have tried. Even -those where authors/maintainers claim that they have used it on the -entire Python standard library. - So it is likely you'll find a mistranslation in decompiling. From a28f5604ce90620c55d4724d5bba158ebd4f9da2 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 15:56:54 -0500 Subject: [PATCH 29/41] more wordsmithing --- HOW-TO-REPORT-A-BUG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index 75711a6b..a5e75ff3 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -7,7 +7,8 @@ changing, and so does its code generation. There is no Python decompiler yet that I know about that will decompile everything. Overall, I think this one probably does the best -job of *any* Python decompiler. +job of *any* Python decompiler that handles such a wide range of +versions. But at any given time, there are maybe dozens of valid Python bytecode files that I know of that will cause problems. And when I get through From cb2b6d9bf4b72cbbf7e305f6bd18bcf94dfa1c0e Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 15:58:27 -0500 Subject: [PATCH 30/41] more wordsmithing --- HOW-TO-REPORT-A-BUG.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index a5e75ff3..13cb6b4d 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -15,11 +15,12 @@ files that I know of that will cause problems. And when I get through those, along with all the issues of bugs that are currently logged, I could probably easily find dozens more bugs just by doing a decompile of all the Python bytecode on any one of my computers. Unless you want -to help out by _fixing_ bug, or are willing to do work by isolating -and narrowing problems, don't feel you are doing me a favor by doing +to help out by _fixing_ bugs, or are willing to do work by isolating +and narrowing thems, don't feel you are doing me a favor by doing scans on your favorite bytecode. -So it is likely you'll find a mistranslation in decompiling. +In sum, it is not uncommon that you will find a mistranslation in +decompiling. ## Is it really a bug? From 0e04b12ad4ddf5747cbd95b4756090c077823b3c Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 16:00:02 -0500 Subject: [PATCH 31/41] more wordsmithing --- HOW-TO-REPORT-A-BUG.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index 13cb6b4d..399b3f9e 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -12,12 +12,13 @@ versions. But at any given time, there are maybe dozens of valid Python bytecode files that I know of that will cause problems. And when I get through -those, along with all the issues of bugs that are currently logged, I -could probably easily find dozens more bugs just by doing a decompile -of all the Python bytecode on any one of my computers. Unless you want -to help out by _fixing_ bugs, or are willing to do work by isolating -and narrowing thems, don't feel you are doing me a favor by doing -scans on your favorite bytecode. +those and all the issues of decompiler bugs that are currently logged, +I could probably easily find dozens more bugs just by doing a +decompile of all the Python bytecode on any one of my +computers. Unless you want to help out by _fixing_ bugs, or are +willing to do work by isolating and narrowing bugs, don't feel you are +doing me a favor by doing scans on your favorite sets of bytecode +files. In sum, it is not uncommon that you will find a mistranslation in decompiling. From e9b60ddbf020ee7f14d8d77c6f4a8588d2968377 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 8 Nov 2017 23:05:01 -0500 Subject: [PATCH 32/41] Better Python 3 ENDIF detection If we have COMPARE_OP exception-match POP_JUMP_IF... .... RETURN_VALUE Then RETURN_VALUE can't be RETURN_END_IF --- uncompyle6/scanners/scanner3.py | 47 +++++++++++++++++--------------- uncompyle6/scanners/scanner30.py | 8 +++--- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 58ede5e6..ba4277eb 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -359,7 +359,7 @@ class Scanner3(Scanner): # rule for that. pattr = argval # FIXME: 0 isn't always correct - target = self.get_target(inst.offset, 0) + target = self.get_target(inst.offset) if target <= inst.offset: next_opname = self.opname[self.code[inst.offset+3]] if (inst.offset in self.stmts and @@ -499,7 +499,7 @@ class Scanner3(Scanner): # Determine structures and fix jumps in Python versions # since 2.3 - self.detect_control_flow(offset, targets, 0) + self.detect_control_flow(offset, targets, i) if inst.has_arg: label = self.fixed_jumps.get(offset) @@ -579,7 +579,7 @@ class Scanner3(Scanner): # If absolute jump occurs in forward direction or it takes off from the # same line as previous statement, this is not a statement # FIXME: 0 isn't always correct - target = self.get_target(stmt_offset, 0) + target = self.get_target(stmt_offset) if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: stmts.remove(stmt_offset) continue @@ -613,7 +613,7 @@ class Scanner3(Scanner): # Finish filling the list for last statement slist += [codelen] * (codelen-len(slist)) - def get_target(self, offset, extended_arg): + def get_target(self, offset, extended_arg=0): """ Get target offset for op located at given . """ @@ -634,7 +634,7 @@ class Scanner3(Scanner): return target - def detect_control_flow(self, offset, targets, extended_arg): + def detect_control_flow(self, offset, targets, inst_index): """ Detect structures and their boundaries to fix optimized jumps in python2.3+ @@ -667,7 +667,7 @@ class Scanner3(Scanner): # It could be a return instruction. start += instruction_size(op, self.opc) - target = self.get_target(offset, extended_arg) + target = self.get_target(offset, 0) end = self.restrict_to_parent(target, parent) self.setup_loops[target] = offset @@ -712,7 +712,7 @@ class Scanner3(Scanner): jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) if end > jump_back+4 and self.is_jump_forward(end): if self.is_jump_forward(jump_back+4): - if self.get_target(jump_back+4, extended_arg) == self.get_target(end, extended_arg): + if self.get_target(jump_back+4) == self.get_target(end): self.fixed_jumps[offset] = jump_back+4 end = jump_back+4 elif target < offset: @@ -721,7 +721,7 @@ class Scanner3(Scanner): # I think 0 right because jump_back has been adjusted for any EXTENDED_ARG # it encounters - target = self.get_target(jump_back, 0) + target = self.get_target(jump_back) if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER): loop_type = 'for' @@ -733,7 +733,7 @@ class Scanner3(Scanner): loop_type = 'while 1' elif self.code[test] in self.opc.JUMP_OPs: self.ignore_if.add(test) - test_target = self.get_target(test, extended_arg) + test_target = self.get_target(test) if test_target > (jump_back+3): jump_back = test_target self.not_continue.add(jump_back) @@ -748,7 +748,7 @@ class Scanner3(Scanner): 'end': end}) elif op in self.pop_jump_tf: start = offset + instruction_size(op, self.opc) - target = self.get_target(offset, extended_arg) + target = self.get_target(offset) rtarget = self.restrict_to_parent(target, parent) prev_op = self.prev_op @@ -791,12 +791,12 @@ class Scanner3(Scanner): if match: is_jump_forward = self.is_jump_forward(pre_rtarget) if (is_jump_forward and pre_rtarget not in self.stmts and - self.restrict_to_parent(self.get_target(pre_rtarget, extended_arg), parent) == rtarget): + self.restrict_to_parent(self.get_target(pre_rtarget), parent) == rtarget): if (code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and - target == self.get_target(prev_op[pre_rtarget], extended_arg) and + target == self.get_target(prev_op[pre_rtarget]) and (prev_op[pre_rtarget] not in self.stmts or - self.get_target(prev_op[pre_rtarget], extended_arg) > prev_op[pre_rtarget]) and + self.get_target(prev_op[pre_rtarget]) > prev_op[pre_rtarget]) and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], self.pop_jump_tf, target)))): pass elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE @@ -815,7 +815,7 @@ class Scanner3(Scanner): self.opc.POP_JUMP_IF_FALSE) last_jump_good = True for j in jump_ifs: - if target == self.get_target(j, extended_arg): + if target == self.get_target(j): if self.lines[j].next == j + 3 and last_jump_good: fix = j break @@ -831,7 +831,7 @@ class Scanner3(Scanner): next = self.next_stmt[offset] if prev_op[next] == offset: pass - elif self.is_jump_forward(next) and target == self.get_target(next, extended_arg): + elif self.is_jump_forward(next) and target == self.get_target(next): if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE: if (code[next] == self.opc.JUMP_FORWARD or target != rtarget @@ -840,7 +840,7 @@ class Scanner3(Scanner): self.fixed_jumps[offset] = prev_op[next] return elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and - self.get_target(target, extended_arg) == self.get_target(next, extended_arg)): + self.get_target(target) == self.get_target(next)): self.fixed_jumps[offset] = prev_op[next] return @@ -937,7 +937,10 @@ class Scanner3(Scanner): pass pass if code[pre_rtarget] == self.opc.RETURN_VALUE: - self.return_end_ifs.add(pre_rtarget) + # If we are at some sort of POP_JUMP_IF and the instruction before was + # COMPARE_OP exception-match, then pre_rtarget is not an end_if + if not (inst_index > 0 and self.insts[inst_index-1].argval == 'exception-match'): + self.return_end_ifs.add(pre_rtarget) else: self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) @@ -955,12 +958,12 @@ class Scanner3(Scanner): self.fixed_jumps[offset] = rtarget elif op == self.opc.SETUP_EXCEPT: - target = self.get_target(offset, extended_arg) + target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op == self.opc.POP_EXCEPT: next_offset = xdis.next_offset(op, self.opc, offset) - target = self.get_target(next_offset, extended_arg) + target = self.get_target(next_offset) if target > next_offset: next_op = code[next_offset] if (self.opc.JUMP_ABSOLUTE == next_op and @@ -969,11 +972,11 @@ class Scanner3(Scanner): self.except_targets[target] = next_offset elif op == self.opc.SETUP_FINALLY: - target = self.get_target(offset, extended_arg) + target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op in self.jump_if_pop: - target = self.get_target(offset, extended_arg) + target = self.get_target(offset) if target > offset: unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target) if unop_target and code[unop_target+3] != self.opc.ROT_TWO: @@ -997,7 +1000,7 @@ class Scanner3(Scanner): # If we have: # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x: # then RETURN_VALUE is not RETURN_END_IF - rtarget = self.get_target(offset, extended_arg) + rtarget = self.get_target(offset) rtarget_prev = self.prev[rtarget] if (code[rtarget_prev] == self.opc.RETURN_VALUE and rtarget_prev in self.return_end_ifs): diff --git a/uncompyle6/scanners/scanner30.py b/uncompyle6/scanners/scanner30.py index 1e717a61..8d8efffb 100644 --- a/uncompyle6/scanners/scanner30.py +++ b/uncompyle6/scanners/scanner30.py @@ -23,7 +23,7 @@ class Scanner30(Scanner3): return pass - def detect_control_flow(self, offset, targets, extended_arg): + def detect_control_flow(self, offset, targets, inst_index): """ Detect structures and their boundaries to fix optimized jumps Python 3.0 is more like Python 2.6 than it is Python 3.x. @@ -55,7 +55,7 @@ class Scanner30(Scanner3): # It could be a return instruction. start += instruction_size(op, self.opc) - target = self.get_target(offset, extended_arg) + target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.setup_loop_targets[offset] = target self.setup_loops[target] = offset @@ -136,7 +136,7 @@ class Scanner30(Scanner3): 'end': end}) elif op in self.pop_jump_tf: start = offset + instruction_size(op, self.opc) - target = self.get_target(offset, extended_arg) + target = self.get_target(offset) rtarget = self.restrict_to_parent(target, parent) prev_op = self.prev_op @@ -329,7 +329,7 @@ class Scanner30(Scanner3): end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op == self.opc.SETUP_FINALLY: - target = self.get_target(offset, extended_arg) + target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op in self.jump_if_pop: From 9ec43de0396dee3022efeb73c97e7ae15abf23c9 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 9 Nov 2017 04:41:16 -0500 Subject: [PATCH 33/41] bug in 3.x importlists consts.py: add rule for importlists. imports weren't separated by ', '. parser.py: Make importlist a list type of node. test/* add test for importlist --- test/bytecode_3.5/04_importlist.pyc | Bin 0 -> 384 bytes test/simple_source/bug35/04_importlist.py | 7 +++++++ uncompyle6/parser.py | 17 ++++++++++------- uncompyle6/semantics/consts.py | 1 + 4 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 test/bytecode_3.5/04_importlist.pyc create mode 100644 test/simple_source/bug35/04_importlist.py diff --git a/test/bytecode_3.5/04_importlist.pyc b/test/bytecode_3.5/04_importlist.pyc new file mode 100644 index 0000000000000000000000000000000000000000..93bf84e5003bf8e1984e939a3ee8075cf3d55acd GIT binary patch literal 384 zcmZ8dO-sW-5S`6#ni{Dff+sJ6mmIW$AEzP(5$wS<7*VsA&^BoaWRtMlfPv)ZpYbo{ z>dAi~o}5iZ#9`mt*`0Yav%5bS*q4Wle*(Ze;@2p8V}#m6if|E#40t8LyQA+=cfl~E z%|dJ;t`Ta4R2~21rvQRL!$26&m>?)LJs`CSbq{I=OA7u|)(1jJkP&#sKDzJ+mH>yg z+i5DjYvPAq-~|rj>?KN+G*Be<*iz5sSe)jn;O5;j zNn>t{S&|i1=hJAu`j`C%(nQhMkxE37#-Ykfxrm4Ja&>kwJUtKbHka!(QR|bUYMCyk i+bJ6f2qgz(7g_3$a5h_4?+^7b)b>Z4$6R${t^EcRT1yB3 literal 0 HcmV?d00001 diff --git a/test/simple_source/bug35/04_importlist.py b/test/simple_source/bug35/04_importlist.py new file mode 100644 index 00000000..fa9e48c2 --- /dev/null +++ b/test/simple_source/bug35/04_importlist.py @@ -0,0 +1,7 @@ +# Had bug in 3.x in not having semantic importlist rule +def main(osp, Mfile, mainpyfile, dbg=None): + try: + from xdis import load_module, PYTHON_VERSION, IS_PYPY + return PYTHON_VERSION, IS_PYPY, load_module + except: + pass diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 29e4e2ed..ca4ca1a2 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -30,13 +30,16 @@ class PythonParser(GenericASTBuilder): def __init__(self, AST, start, debug): super(PythonParser, self).__init__(AST, start, debug) - self.collect = frozenset( - ['stmts', 'except_stmts', '_stmts', 'load_attrs', - 'exprlist', 'kvlist', 'kwargs', 'come_froms', '_come_from', - # Python < 3 - 'print_items', - # PyPy: - 'kvlist_n']) + # FIXME: customize per python parser version + nt_list = [ + 'stmts', 'except_stmts', '_stmts', 'load_attrs', + 'exprlist', 'kvlist', 'kwargs', 'come_froms', '_come_from', + 'importlist', + # Python < 3 + 'print_items', + # PyPy: + 'kvlist_n'] + self.collect = frozenset(nt_list) def ast_first_offset(self, ast): if hasattr(ast, 'offset'): diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index 55343656..1fef9cb8 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -269,6 +269,7 @@ TABLE_DIRECT = { 'kv2': ( '%c: %c', 1, 2 ), 'mapexpr': ( '{%[1]C}', (0, maxint, ', ') ), 'importstmt': ( '%|import %c\n', 2), + 'importlist': ( '%C', (0, maxint, ', ') ), 'importfrom': ( '%|from %[2]{pattr} import %c\n', (3, 'importlist') ), 'importstar': ( '%|from %[2]{pattr} import *\n', ), From b9dfba740043a2cf5465a8ba7d3f4eb8900d3ddf Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 9 Nov 2017 09:57:11 -0500 Subject: [PATCH 34/41] More detail is needed in bug reporting... sigh. --- HOW-TO-REPORT-A-BUG.md | 59 +++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index 399b3f9e..b62dc951 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -21,13 +21,36 @@ doing me a favor by doing scans on your favorite sets of bytecode files. In sum, it is not uncommon that you will find a mistranslation in -decompiling. +decompiling. Furthermore, you may be expected to do some work in order +to have your bug worthy of being considered above other bugs. +No one is getting paid to work to work on this project, let alone bugs +you may have an interest in. If you require decompiling bytecode +immediately, consider using a decompilation service. ## Is it really a bug? + +### Do you have valid bytecode? + +As mentioned in README.rst, this project doesn't handle obfuscated +code. See README.rst for suggestions for how to remove some kinds of +obfuscation. + +Checking if bytecode is valid is pretty simple: disassemble the code. +Python comes with a disassembly module called `dis`. A prerequisite +module for this package, `xdis` has a cross-python version +disassembler. + +### Semantic equivalence vs. exact source code + +Almost all versions of Python can perform some sort of code +improvement that can't be undone. In earlier versions of Python it is +rare; in later Python versions, it is more common. + If the code emitted is semantically equivalent, then this isn't a bug. + For example the code might be ``` @@ -80,19 +103,19 @@ The basic requirement is pretty simple: * Python source text Please don't put files on download services that one has to register -for or can't get to by issuing curl or wget. If you can't attach it to -the issue, or create a github gist, then the code you are sending is -too large. +for or can't get to by issuing a simple `curl` or `wget`. If you can't +attach it to the issue, or create a github gist, then the code you are +sending is too large. Also try to narrow the bug. See below. ## What to send (additional helpful information) Some kind folks also give the invocation they used and the output -which usually includes an error message produced. This is helpful. I -can figure out what OS you are running this on and what version of -*uncomplye6* was used. Therefore, if you don't provide the input -command and the output from that, please give: +which usually includes an error message produced. This is +helpful. From this, I can figure out what OS you are running this on +and what version of *uncomplye6* was used. Therefore, if you don't +provide the input command and the output from that, please give: * _uncompyle6_ version used * OS that you used this on @@ -113,11 +136,17 @@ Well, you could learn. No one is born into this world knowing how to disassemble Python bytecode. And as Richard Feynman once said, "What one fool can learn, so can another." +If this is too difficult, or too time consuming, or not of interest to +you, then perhaps what require is a decompilation service. [Crazy +Compilers](http://www.crazy-compilers.com/decompyle/) offers a +byte-code decompiler service for versions of Python up to 2.6. (If +there are others around let me know and I'll list them here.) + ## Narrowing the problem I don't need or want the entire source code base for which one file or -module can't be decompiled. I just need that one file or module -only. If there are several files, file a bug report for each file. +module can't be decompiled. I just need those file(s) or module(s). +If there are several files, file a bug report for each file. Python modules can get quite large, and usually decompilation problems occur in a single function or maybe the main-line code but not any of @@ -131,3 +160,13 @@ properly on a neighboring version of Python. That is helpful too. In sum, the more you can isolate or narrow the problem, the more likley the problem will be fixed and fixed sooner. + +## Confidentiality of Bug Reports + +When you report a bug, you are giving up confidentiality to the source +code and the byte code. However, I would imagine that if you have +narrowed the problem sufficiently, confidentiality little that +remains would not be an issue. + +However feel free to remove any commments, and modify variable names +or constants in the source code. From 74731a9d423c704553c5871b1973e115bbdac866 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 9 Nov 2017 11:01:29 -0500 Subject: [PATCH 35/41] Fix bug in return-optimized try stmt --- test/bytecode_3.6/04_importlist.pyc | Bin 0 -> 370 bytes uncompyle6/parsers/parse36.py | 6 ++++++ uncompyle6/semantics/pysource.py | 5 +++++ 3 files changed, 11 insertions(+) create mode 100644 test/bytecode_3.6/04_importlist.pyc diff --git a/test/bytecode_3.6/04_importlist.pyc b/test/bytecode_3.6/04_importlist.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5cf5c8d5d7e8aa8b7eaaba4f3c12088660ee3a65 GIT binary patch literal 370 zcmZ8c!AiqG5S`6#5{9(x8oXX|q zU-vsG8%5t|I?c;0iFL78t7Np?Zzh+c@kNZ)LhZ6t@6OArWyZ{m(;gBKiyRY=I7FG_ XIq`X)b(Vj~!Qb}ay1(b9(;N8@l0QnQ literal 0 HcmV?d00001 diff --git a/uncompyle6/parsers/parse36.py b/uncompyle6/parsers/parse36.py index 4a21d0b1..5ca649fa 100644 --- a/uncompyle6/parsers/parse36.py +++ b/uncompyle6/parsers/parse36.py @@ -63,6 +63,12 @@ class Python36Parser(Python35Parser): except_return ::= POP_TOP POP_TOP POP_TOP return_stmts try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_return + + # Try middle following a return_stmts + try_middle36 ::= COME_FROM_EXCEPT except_stmts END_FINALLY + + stmt ::= trystmt36 + trystmt36 ::= SETUP_EXCEPT return_stmts try_middle36 opt_come_from_except """ def add_custom_rules(self, tokens, customize): diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 3a99af93..0b5e5b05 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -418,6 +418,11 @@ class SourceWalker(GenericASTTraversal, object): # 'unmapexpr': ( '{**%c}', 0), # done by n_unmapexpr }) + if version >= 3.6: + TABLE_DIRECT.update({ + 'trystmt36': ( '%|try:\n%+%c%-%c\n\n', 1, 2 ), + }) + def n_async_call_function(node): self.f.write('async ') node.kind == 'call_function' From 4b0892bcb5fdad6d1eb553bbc20d9ca66d5fa8e5 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 10 Nov 2017 22:30:03 -0500 Subject: [PATCH 36/41] Use newer xdis --- __pkginfo__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/__pkginfo__.py b/__pkginfo__.py index c9b1bc7c..a2d83fdb 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -40,7 +40,7 @@ entry_points = { ]} ftp_url = None install_requires = ['spark-parser >= 1.7.1, < 1.8.0', - 'xdis >= 3.6.0, < 3.7.0', 'six'] + 'xdis >= 3.6.1, < 3.7.0', 'six'] license = 'MIT' mailing_list = 'python-debugger@googlegroups.com' modname = 'uncompyle6' From 5b9f9319a849e4f62be92d54801b5ae3274be584 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 12 Nov 2017 15:51:07 -0500 Subject: [PATCH 37/41] Reinstate previously failed tests 2.6, 3.5 and 3.6 decompilation has gotten better --- ...ifnot_and.pyc-notyet => 06_list_ifnot_and.pyc} | Bin ...-if-break.pyc-notyet => 03_while-if-break.pyc} | Bin test/bytecode_3.5/05_ifelse.pyc-notyet | Bin 882 -> 0 bytes ...xtended_arg.pyc-notyet => 01_extended_arg.pyc} | Bin 4 files changed, 0 insertions(+), 0 deletions(-) rename test/bytecode_2.6/{06_list_ifnot_and.pyc-notyet => 06_list_ifnot_and.pyc} (100%) rename test/bytecode_3.5/{03_while-if-break.pyc-notyet => 03_while-if-break.pyc} (100%) delete mode 100644 test/bytecode_3.5/05_ifelse.pyc-notyet rename test/bytecode_3.6/{01_extended_arg.pyc-notyet => 01_extended_arg.pyc} (100%) diff --git a/test/bytecode_2.6/06_list_ifnot_and.pyc-notyet b/test/bytecode_2.6/06_list_ifnot_and.pyc similarity index 100% rename from test/bytecode_2.6/06_list_ifnot_and.pyc-notyet rename to test/bytecode_2.6/06_list_ifnot_and.pyc diff --git a/test/bytecode_3.5/03_while-if-break.pyc-notyet b/test/bytecode_3.5/03_while-if-break.pyc similarity index 100% rename from test/bytecode_3.5/03_while-if-break.pyc-notyet rename to test/bytecode_3.5/03_while-if-break.pyc diff --git a/test/bytecode_3.5/05_ifelse.pyc-notyet b/test/bytecode_3.5/05_ifelse.pyc-notyet deleted file mode 100644 index 8157b1ce5c7674750341881d5b555809b446c1df..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 882 zcmZ`%Jx>Bb5S?A%;En^Mg)y-(#zaE{Br265cAA*jkXR560rpTL9ONz-?QAXl6UN4$ zVBw!IRm*;Kd?gAbg?7cI9#{%*sesf#-bz)^Q33I)UjLW+ zh`Q+T?Bo`a`i@SW#0rqw98JT*0>qF6`?jHJa5F=M+|x8WF$Cv@Zzd15!qP&?!yhHi? T%_D1`sbxJoIgOd6EYAD`OZ$i# diff --git a/test/bytecode_3.6/01_extended_arg.pyc-notyet b/test/bytecode_3.6/01_extended_arg.pyc similarity index 100% rename from test/bytecode_3.6/01_extended_arg.pyc-notyet rename to test/bytecode_3.6/01_extended_arg.pyc From 81669ad7e7faaa596361ef19eba4b894d0e12ddd Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 12 Nov 2017 20:43:27 -0500 Subject: [PATCH 38/41] Back off --verify for --weak-verify --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index e6c0d3aa..55f4c4c2 100644 --- a/test/Makefile +++ b/test/Makefile @@ -158,7 +158,7 @@ check-native-short: #: Run longer Python 2.6's lib files known to be okay check-2.6-ok: - $(PYTHON) test_pythonlib.py --ok-2.6 --verify $(COMPILE) + $(PYTHON) test_pythonlib.py --ok-2.6 --weak-verify $(COMPILE) #: Run longer Python 2.7's lib files known to be okay check-2.7-ok: From 55f12e36b73ee4952c938242f8a81bdb0e0b6063 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 12 Nov 2017 21:26:57 -0500 Subject: [PATCH 39/41] Back off --verify for --weak-verify --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 55f4c4c2..15fc94f9 100644 --- a/test/Makefile +++ b/test/Makefile @@ -154,7 +154,7 @@ check-bytecode-3.6: #: short tests for bytecodes only for this version of Python check-native-short: - $(PYTHON) test_pythonlib.py --bytecode-$(PYTHON_VERSION) --verify $(COMPILE) + $(PYTHON) test_pythonlib.py --bytecode-$(PYTHON_VERSION) --weak-verify $(COMPILE) #: Run longer Python 2.6's lib files known to be okay check-2.6-ok: From d1917046f4719c18a5779e083de87f193d982e2b Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 13 Nov 2017 09:21:00 -0500 Subject: [PATCH 40/41] Get ready for release 2.13.3 --- ChangeLog | 225 +++++++++++++++++++++++++++++++++++++++++- NEWS | 26 +++++ uncompyle6/version.py | 2 +- 3 files changed, 251 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index baaecc2e..93ba95d2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,229 @@ +2017-11-13 rocky + + * uncompyle6/version.py: Get ready for release 2.13.3 + +2017-11-12 rocky + + * test/Makefile: Back off --verify for --weak-verify + +2017-11-12 rocky + + * test/Makefile: Back off --verify for --weak-verify + +2017-11-12 rocky + + * : Reinstate previously failed tests 2.6, 3.5 and 3.6 decompilation has gotten better + +2017-11-10 rocky + + * __pkginfo__.py: Use newer xdis + +2017-11-09 rocky + + * uncompyle6/parsers/parse36.py, uncompyle6/semantics/pysource.py: + Fix bug in return-optimized try stmt + +2017-11-09 rocky + + * HOW-TO-REPORT-A-BUG.md: More detail is needed in bug reporting... sigh. + +2017-11-09 rocky + + * test/simple_source/bug35/04_importlist.py, uncompyle6/parser.py, + uncompyle6/semantics/consts.py: bug in 3.x importlists consts.py: add rule for importlists. imports weren't separated by ', + '. parser.py: Make importlist a list type of node. test/* add test for importlist + +2017-11-08 rocky + + * : commit e9b60ddbf020ee7f14d8d77c6f4a8588d2968377 Author: rocky + Date: Wed Nov 8 23:05:01 2017 -0500 + +2017-11-08 rocky + + * HOW-TO-REPORT-A-BUG.md: more wordsmithing + +2017-11-08 rocky + + * HOW-TO-REPORT-A-BUG.md: more wordsmithing + +2017-11-08 rocky + + * HOW-TO-REPORT-A-BUG.md: more wordsmithing + +2017-11-08 rocky + + * HOW-TO-REPORT-A-BUG.md: Typo + +2017-11-08 rocky + + * HOW-TO-REPORT-A-BUG.md: Typo + +2017-11-08 rocky + + * HOW-TO-REPORT-A-BUG.md: Typo + +2017-11-08 rocky + + * HOW-TO-REPORT-A-BUG.md: Typo + +2017-11-08 rocky + + * HOW-TO-REPORT-A-BUG.md: Tweak how to report a bug. + +2017-11-08 rocky + + * uncompyle6/parser.py, uncompyle6/parsers/parse36.py, + uncompyle6/scanners/scanner3.py: Add 3.6+ grammar for except's + ending in RETURN... Not totally out of the maze in 3.6 control flow... There are still + problems with erroneous RETURN_VALUEs becoming RETURN_END_IF, + +2017-11-07 R. Bernstein + + * : Merge pull request #135 from rocky/3.6-instruction-refactor 3.6 instruction refactor + +2017-11-06 rocky + + * uncompyle6/scanners/scanner3.py: Small tweaks to sync up better + with scanner2.py + +2017-11-06 rocky + + * pytest/test_fjt.py: Remove parts of erroneous 2.7 test for now + +2017-11-06 rocky + + * pytest/test_fjt.py, uncompyle6/scanners/scanner3.py, + uncompyle6/scanners/scanner36.py: Fix 3.{3,4} pytest. Remove dup + find_jump_targets + +2017-11-06 rocky + + * Makefile, uncompyle6/scanners/scanner3.py, + uncompyle6/scanners/scanner36.py: Move refactored find-jump-targets + from 3.6 to 3.x + +2017-11-06 rocky + + * test/Makefile, uncompyle6/scanners/scanner3.py, + uncompyle6/scanners/scanner36.py: Move refactored ingest from 3.6 to + 3.x... We are getting away from working with bytecode in favor of working + with full-fledged structured instructions Up next: find_jump_targets() + +2017-11-06 rocky + + * uncompyle6/parsers/parse36.py: awith custom COME_FROMs ... Now that jump branching has been properly fixed up for EXTENDED_ARG + instructions which are more prevalent with wordcode encoding. + +2017-11-06 rocky + + * : commit 9379922c89573972aa387e4f0b9abcba7358d1a3 Author: rocky + Date: Mon Nov 6 00:38:22 2017 -0500 + +2017-11-06 rocky + + * uncompyle6/scanners/scanner36.py: Revert change that should have + been in a branch + +2017-11-06 rocky + + * uncompyle6/scanners/scanner2.py, + uncompyle6/scanners/scanner26.py, uncompyle6/scanners/scanner3.py, + uncompyle6/scanners/scanner36.py: xdis _disassemble->disassemble + +2017-11-04 rocky + + * uncompyle6/semantics/fragments.py, + uncompyle6/semantics/make_function.py, + uncompyle6/semantics/pysource.py: Add flag to tolerate deparse + errors... and keep going. The fragment parser should ignore errors in nested + function definitions + +2017-11-04 rocky + + * uncompyle6/scanner.py, uncompyle6/semantics/fragments.py: Add + Python 3.6.3 scanner lookup + +2017-11-03 R. Bernstein + + * : Merge pull request #134 from mikemrm/master Corrected python3 import from queue + +2017-10-29 rocky + + * test/simple_source/bug36/10_extended_arg_loop.py, + uncompyle6/parsers/parse36.py, uncompyle6/scanners/scanner3.py: + Python 3.6 control flow bug... Much more is needed, but it's a start + +2017-10-29 rocky + + * uncompyle6/verify.py: In verify, JUMP_BACK is the same as + CONTINUE... at least for now. See FIXME in verify + +2017-10-29 rocky + + * uncompyle6/scanner.py, uncompyle6/scanners/scanner2.py, + uncompyle6/scanners/scanner3.py, uncompyle6/scanners/scanner30.py: + Python 3.6-inspired instruction size cleanup Revise and generalize for Python 3.6+ instructions vs < 3.6 + instuctions. Used more of the generalized methods in xdis and + remove some (but not all) of the magic numbers. This is a lot of changes, but not all of the refactoring needed. + Much crap still remains. Also, there are still bugs in handling 3.6 + bytecodes. + +2017-10-24 rocky + + * Makefile, __pkginfo__.py: Bump uncompyle. Pypy 5.8.0-beta + tolerance + +2017-10-13 rocky + + * test/Makefile, uncompyle6/semantics/consts.py: Tag more semantic + actions with nonterminals + +2017-10-13 rocky + + * uncompyle6/parser.py, uncompyle6/semantics/consts.py: More node + checking in tables + +2017-10-13 rocky + + * pytest/test_pysource.py, uncompyle6/parser.py, + uncompyle6/parsers/parse24.py, uncompyle6/semantics/consts.py, + uncompyle6/semantics/fragments.py, uncompyle6/semantics/pysource.py: + Start allowing node names in template engine These are now used to assert we have the right node type. Simplify import_from + +2017-10-13 rocky + + * HISTORY.md, uncompyle6/semantics/pysource.py: Small changes + 2017-10-12 rocky - * Makefile, admin-tools/how-to-make-a-release.txt, + * admin-tools/make-dist-newer.sh, admin-tools/make-dist-older.sh: + Administrivia - generalize shell code + +2017-10-12 rocky + + * admin-tools/how-to-make-a-release.md: Update install doc + +2017-10-12 rocky + + * admin-tools/how-to-make-a-release.md: Update instructions + +2017-10-12 rocky + + * admin-tools/make-dist-newer.sh, admin-tools/make-dist-older.sh: + Administrivia + +2017-10-12 rocky + + * admin-tools/how-to-make-a-release.md: Minor + +2017-10-12 rocky + + * : commit 491572ed2dd01bf655288638a121bf48c530b303 Author: rocky + Date: Thu Oct 12 06:48:49 2017 -0400 + +2017-10-12 rocky + + * ChangeLog, Makefile, NEWS, admin-tools/how-to-make-a-release.txt, uncompyle6/version.py: Get ready for release 2.13.2 2017-10-11 rocky diff --git a/NEWS b/NEWS index 24684bb2..3611097b 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,29 @@ +uncompyle6 2.13.3 2017-11-13 + +Overall: better 3.6 decompiling and some much needed code refactoring and cleanup + + +- Start noting names in for template-action names; these are + used to check/assert we have the right node type +- Simplify rule +- Pypy 5.80-beta testing tolerance +- Start to clean up instruction mangling phase by using 3.6-style instructions + rather trying to parse the bytecode array. This largely been done in for versions 3.x; + 3.0 custom mangling code has been reduced; + some 2.x conversion has been done, but more is desired. This make it possible to... +- Handle EXTENDED_ARGS better. While relevant to all Python versions it is most noticeable in + version 3.6+ where in switching to wordcodes the size of operands has been reduced from 2**16 + to 2**8. JUMP instruction then often need EXTENDED_ARGS. +- Refactor find_jump_targets() with via working of of instructions rather the bytecode array. +- use --weak-verify more and additional fuzzing on verify() +- fragment parser now ignores errors in nested function definitions; an parameter was + added to assist here. Ignoring errors may be okay because the fragment parser often just needs, + well, *fragments*. +- Distinguish RETURN_VALUE from RETURN_END_IF in exception bodies better in 3.6 +- bug in 3.x language changes: import queue va import Queue +- reinstate some bytecode tests since decompiling has gotten better +- Revise how to report a bug + uncompyle6 2.13.2 2017-10-12 - Re-release using a more automated approach diff --git a/uncompyle6/version.py b/uncompyle6/version.py index 580702e2..81b1ee4d 100644 --- a/uncompyle6/version.py +++ b/uncompyle6/version.py @@ -1,3 +1,3 @@ # This file is suitable for sourcing inside bash as # well as importing into Python -VERSION='2.13.2' +VERSION='2.13.3' From 35e4e034686065609b8b2e55cd37c9a391e16c00 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 13 Nov 2017 09:53:10 -0500 Subject: [PATCH 41/41] Administrivia --- admin-tools/check-older-versions.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/admin-tools/check-older-versions.sh b/admin-tools/check-older-versions.sh index 0bfb93fb..bd27b65a 100755 --- a/admin-tools/check-older-versions.sh +++ b/admin-tools/check-older-versions.sh @@ -13,6 +13,8 @@ if ! source ./setup-python-2.4.sh ; then exit $? fi +PYVERSIONS='2.7.14 2.6.9 3.3.6 3.4.2 3.5.4 3.6.3' + cd .. for version in $PYVERSIONS; do if ! pyenv local $version ; then