From 7844456e1e947dd13c66439c01e5d127e8e4c387 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 31 Aug 2017 10:12:09 -0400 Subject: [PATCH 01/22] Skeletal support for Python 3.7 Largely failing though. --- uncompyle6/scanner.py | 2 +- uncompyle6/scanners/scanner37.py | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 uncompyle6/scanners/scanner37.py diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 698e94f2..88e5bdb9 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -22,7 +22,7 @@ from xdis.magics import py_str2float # The byte code versions we support PYTHON_VERSIONS = (1.5, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, - 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6) + 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7) # FIXME: DRY if PYTHON3: diff --git a/uncompyle6/scanners/scanner37.py b/uncompyle6/scanners/scanner37.py new file mode 100644 index 00000000..84c5c96f --- /dev/null +++ b/uncompyle6/scanners/scanner37.py @@ -0,0 +1,38 @@ +# Copyright (c) 2016-2017 by Rocky Bernstein +""" +Python 3.7 bytecode decompiler scanner + +Does some additional massaging of xdis-disassembled instructions to +make things easier for decompilation. + +This sets up opcodes Python's 3.6 and calls a generalized +scanner routine for Python 3. +""" + +from __future__ import print_function + +from uncompyle6.scanners.scanner3 import Scanner3 + +# bytecode verification, verify(), uses JUMP_OPs from here +from xdis.opcodes import opcode_36 as opc +JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs) + +class Scanner37(Scanner3): + + def __init__(self, show_asm=None): + Scanner3.__init__(self, 3.7, show_asm) + return + pass + +if __name__ == "__main__": + from uncompyle6 import PYTHON_VERSION + if PYTHON_VERSION == 3.7: + import inspect + co = inspect.currentframe().f_code + tokens, customize = Scanner37().ingest(co) + for t in tokens: + print(t.format()) + pass + else: + print("Need to be Python 3.7 to demo; I am %s." % + PYTHON_VERSION) From 9b336251a7edd5cc03c95a6e2f462e27ff76b845 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 9 Sep 2017 07:47:21 -0400 Subject: [PATCH 02/22] New-style Python classes only, please. --- uncompyle6/scanners/tok.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index 6fdd9057..8371d101 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016 by Rocky Bernstein +# Copyright (c) 2016-2017 by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -8,7 +8,7 @@ from uncompyle6 import PYTHON3 if PYTHON3: intern = sys.intern -class Token: +class Token(): """ Class representing a byte-code instruction. From 7cdf0abb4383b9fc9665e2ab056d09f02127ee0a Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 9 Sep 2017 08:03:04 -0400 Subject: [PATCH 03/22] Revert last change --- uncompyle6/scanners/tok.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index 8371d101..6fdd9057 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2017 by Rocky Bernstein +# Copyright (c) 2016 by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -8,7 +8,7 @@ from uncompyle6 import PYTHON3 if PYTHON3: intern = sys.intern -class Token(): +class Token: """ Class representing a byte-code instruction. From 4e1467adc80f15433295c588513d00df553743d7 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 9 Sep 2017 08:08:40 -0400 Subject: [PATCH 04/22] Revert last revert --- uncompyle6/scanners/tok.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index 6fdd9057..8371d101 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016 by Rocky Bernstein +# Copyright (c) 2016-2017 by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -8,7 +8,7 @@ from uncompyle6 import PYTHON3 if PYTHON3: intern = sys.intern -class Token: +class Token(): """ Class representing a byte-code instruction. From 5bef5683e4bd2a346395f29fdb9dba3d9c4ed340 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 10 Sep 2017 00:48:54 -0400 Subject: [PATCH 05/22] Match Python 3.4's terms a little names better --- uncompyle6/scanners/scanner3.py | 4 ++-- uncompyle6/scanners/tok.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index cb30e8fc..5beedb0e 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -330,7 +330,7 @@ class Scanner3(Scanner): attr = (pos_args, name_pair_args, annotate_args) tokens.append( Token( - type_ = opname, + opname = opname, attr = attr, pattr = pattr, offset = inst.offset, @@ -408,7 +408,7 @@ class Scanner3(Scanner): last_op_was_break = opname == 'BREAK_LOOP' tokens.append( Token( - type_ = opname, + opname = opname, attr = argval, pattr = pattr, offset = inst.offset, diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index 8371d101..d49d8eb3 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -16,13 +16,12 @@ class Token(): the contents of one line as output by dis.dis(). """ # FIXME: match Python 3.4's terms: - # type_ should be opname # linestart = starts_line # attr = argval # pattr = argrepr - def __init__(self, type_, attr=None, pattr=None, offset=-1, + def __init__(self, opname, attr=None, pattr=None, offset=-1, linestart=None, op=None, has_arg=None, opc=None): - self.type = intern(type_) + self.type = intern(opname) self.op = op self.has_arg = has_arg self.attr = attr From f017acce21a8082ff02c1fec77def2fa145e23d3 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 10 Sep 2017 02:56:47 -0400 Subject: [PATCH 06/22] More semantic action cleanup --- uncompyle6/semantics/fragments.py | 11 ----------- uncompyle6/semantics/pysource.py | 7 +++---- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 9e70a5df..7b8e326f 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -1514,17 +1514,6 @@ class FragmentsWalker(pysource.SourceWalker, object): start = len(self.f.getvalue()) self.preorder(node[entry[arg]]) finish = len(self.f.getvalue()) - - # FIXME rocky: figure out how to get this to be table driven - # for loops have two positions that correspond to a single text - # location. In "for i in ..." there is the initialization "i" code as well - # as the iteration code with "i" - match = re.search(r'^for', startnode.type) - if match and entry[arg] == 3: - self.set_pos_info(node[0], start, finish) - for n in node[2]: - self.set_pos_info(n, start, finish) - self.set_pos_info(node, start, finish) arg += 1 elif typ == 'p': diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 6780c8ba..e269edbc 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -1777,10 +1777,9 @@ class SourceWalker(GenericASTTraversal, object): node[0].attr == 1): self.write(',') elif typ == 'c': - if isinstance(entry[arg], int): - entry_node = node[entry[arg]] - self.preorder(entry_node) - arg += 1 + entry_node = node[entry[arg]] + self.preorder(entry_node) + arg += 1 elif typ == 'p': p = self.prec (index, self.prec) = entry[arg] From 51ad3fb36eee492b4a27ac7740757711329daac0 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 10 Sep 2017 03:01:19 -0400 Subject: [PATCH 07/22] Revert one of the changes pending a better fix --- uncompyle6/semantics/fragments.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 7b8e326f..9e70a5df 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -1514,6 +1514,17 @@ class FragmentsWalker(pysource.SourceWalker, object): start = len(self.f.getvalue()) self.preorder(node[entry[arg]]) finish = len(self.f.getvalue()) + + # FIXME rocky: figure out how to get this to be table driven + # for loops have two positions that correspond to a single text + # location. In "for i in ..." there is the initialization "i" code as well + # as the iteration code with "i" + match = re.search(r'^for', startnode.type) + if match and entry[arg] == 3: + self.set_pos_info(node[0], start, finish) + for n in node[2]: + self.set_pos_info(n, start, finish) + self.set_pos_info(node, start, finish) arg += 1 elif typ == 'p': From 19d6dedcf57aa33d461094d6a73c93701af2f912 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 13 Sep 2017 01:09:04 -0400 Subject: [PATCH 08/22] Need weak-verification on 3.4 for now --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 16aabd92..80433ae0 100644 --- a/test/Makefile +++ b/test/Makefile @@ -39,7 +39,7 @@ check-3.3: check-bytecode #: Run working tests from Python 3.4 check-3.4: check-bytecode check-3.4-ok check-2.7-ok - $(PYTHON) test_pythonlib.py --bytecode-3.4 --verify $(COMPILE) + $(PYTHON) test_pythonlib.py --bytecode-3.4 --weak-verify $(COMPILE) #: Run working tests from Python 3.5 check-3.5: check-bytecode From 3003070acbc4149ff4dad39cf2530ed8dfd6f24c Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 17 Sep 2017 11:56:51 -0400 Subject: [PATCH 09/22] engine -> template_engine --- uncompyle6/semantics/fragments.py | 8 ++++---- uncompyle6/semantics/pysource.py | 28 +++++++++++++++++----------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 9e70a5df..5e209b88 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015, 2016 by Rocky Bernstein +# Copyright (c) 2015-2017 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -1458,10 +1458,10 @@ class FragmentsWalker(pysource.SourceWalker, object): self.prec = p self.prune() - def engine(self, entry, startnode): + def template_engine(self, entry, startnode): """The format template interpetation engine. See the comment at the - beginning of this module for the how we interpret format specifications such as - %c, %C, and so on. + beginning of this module for the how we interpret format + specifications such as %c, %C, and so on. """ # print("-----") diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index e269edbc..1ffca4b9 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -26,8 +26,8 @@ These uses a printf-like syntax to direct substitution from attributes of the nonterminal and its children.. The rest of the below describes how table-driven semantic actions work -and gives a list of the format specifiers. The default() and engine() -methods implement most of the below. +and gives a list of the format specifiers. The default() and +template_engine() methods implement most of the below. Step 1 determines a table (T) and a path to a table key (K) from the node type (N) (other nodes are shown as O): @@ -64,8 +64,10 @@ methods implement most of the below. * indicates an argument (A) required. - The '%' may optionally be followed by a number (C) in square brackets, which - makes the engine walk down to N[C] before evaluating the escape code. + The '%' may optionally be followed by a number (C) in square + brackets, which makes the template_engine walk down to N[C] before + evaluating the escape code. + """ from __future__ import print_function @@ -361,7 +363,8 @@ class SourceWalker(GenericASTTraversal, object): node.type == 'call_function' p = self.prec self.prec = 80 - self.engine(('%c(%P)', 0, (1, -4, ', ', 100)), node) + self.template_engine(('%c(%P)', 0, + (1, -4, ', ', 100)), node) self.prec = p self.prune() self.n_async_call_function = n_async_call_function @@ -402,9 +405,11 @@ class SourceWalker(GenericASTTraversal, object): is_code = hasattr(code_node, 'attr') and iscode(code_node.attr) if (is_code and (code_node.attr.co_flags & COMPILER_FLAG_BIT['COROUTINE'])): - self.engine(('\n\n%|async def %c\n', -2), node) + self.template_engine(('\n\n%|async def %c\n', + -2), node) else: - self.engine(('\n\n%|def %c\n', -2), node) + self.template_engine(('\n\n%|def %c\n', -2), + node) self.prune() self.n_funcdef = n_funcdef @@ -1740,11 +1745,12 @@ class SourceWalker(GenericASTTraversal, object): node[-2][0].type = 'unpack_w_parens' self.default(node) - def engine(self, entry, startnode): + def template_engine(self, entry, startnode): """The format template interpetation engine. See the comment at the - beginning of this module for the how we interpret format specifications such as - %c, %C, and so on. + beginning of this module for the how we interpret format + specifications such as %c, %C, and so on. """ + # self.println("----> ", startnode.type, ', ', entry[0]) fmt = entry[0] arg = 1 @@ -1846,7 +1852,7 @@ class SourceWalker(GenericASTTraversal, object): pass if key.type in table: - self.engine(table[key.type], node) + self.template_engine(table[key.type], node) self.prune() def customize(self, customize): From c7b9e54e59c4f29c6aa7b818ec79063efada3259 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 20 Sep 2017 00:06:50 -0400 Subject: [PATCH 10/22] Update Table-driven info... Start a pysource unit test. --- pytest/test_pysource.py | 20 ++++ uncompyle6/semantics/pysource.py | 157 ++++++++++++++++++++----------- 2 files changed, 120 insertions(+), 57 deletions(-) create mode 100644 pytest/test_pysource.py diff --git a/pytest/test_pysource.py b/pytest/test_pysource.py new file mode 100644 index 00000000..aacefdf3 --- /dev/null +++ b/pytest/test_pysource.py @@ -0,0 +1,20 @@ +from uncompyle6 import PYTHON3 +from uncompyle6.semantics.consts import ( + NONE, + # RETURN_NONE, PASS, RETURN_LOCALS +) + +if PYTHON3: + from io import StringIO +else: + from StringIO import StringIO + +from uncompyle6.semantics.pysource import SourceWalker as SourceWalker + +def test_template_engine(): + s = StringIO() + sw = SourceWalker(2.7, s, None) + sw.ast = NONE + sw.template_engine(('--%c--', 0), NONE) + print(sw.f.getvalue()) + assert sw.f.getvalue() == '--None--' diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 1ffca4b9..02770c90 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -11,64 +11,84 @@ and what they mean). Upper levels of the grammar is a more-or-less conventional grammar for Python. - -Semantic action rules for nonterminal symbols can be specified here by -creating a method prefaced with "n_" for that nonterminal. For -example, "n_exec_stmt" handles the semantic actions for the -"exec_smnt" nonterminal symbol. Similarly if a method with the name -of the nonterminal is suffixed with "_exit" it will be called after -all of its children are called. - -Another other way to specify a semantic rule for a nonterminal is via -rule given in one of the tables MAP_R0, MAP_R, or MAP_DIRECT. - -These uses a printf-like syntax to direct substitution from attributes -of the nonterminal and its children.. - -The rest of the below describes how table-driven semantic actions work -and gives a list of the format specifiers. The default() and -template_engine() methods implement most of the below. - - Step 1 determines a table (T) and a path to a - table key (K) from the node type (N) (other nodes are shown as O): - - N N N&K - / | ... \ / | ... \ / | ... \ - O O O O O K O O O - | - K - - MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT) - - The default is a direct mapping. The key K is then extracted from the - subtree and used to find a table entry T[K], if any. The result is a - format string and arguments (a la printf()) for the formatting engine. - Escapes in the format string are: - - %c evaluate children N[A] recursively* - %C evaluate children N[A[0]]..N[A[1]-1] recursively, separate by A[2]* - %P same as %C but sets operator precedence - %D same as %C but is for left-recursive lists like kwargs which - goes to epsilon at the beginning. Using %C an extra separator - with an epsilon appears at the beginning - %, print ',' if last %C only printed one item. This is mostly for tuples - on the LHS of an assignment statement since BUILD_TUPLE_n pretty-prints - other tuples. - %| tab to current indentation level - %+ increase current indentation level - %- decrease current indentation level - %{...} evaluate ... in context of N - %% literal '%' - %p evaluate N setting precedence - - - * indicates an argument (A) required. - - The '%' may optionally be followed by a number (C) in square - brackets, which makes the template_engine walk down to N[C] before - evaluating the escape code. - """ + +# The below is a bit long, but still it is somehwat abbreviated. +# See https://github.com/rocky/python-uncompyle6/wiki/Table-driven-semantic-actions. +# for a more complete explanation, nicely marked up and with examples. +# +# +# Semantic action rules for nonterminal symbols can be specified here by +# creating a method prefaced with "n_" for that nonterminal. For +# example, "n_exec_stmt" handles the semantic actions for the +# "exec_stmt" nonterminal symbol. Similarly if a method with the name +# of the nonterminal is suffixed with "_exit" it will be called after +# all of its children are called. +# +# However if this were done for all of the rules, this file would be even longer +# than it is already. +# +# Another more compact way to specify a semantic rule for a nonterminal is via +# rule given in one of the tables MAP_R0, MAP_R, or MAP_DIRECT. +# +# These uses a printf-like syntax to direct substitution from attributes +# of the nonterminal and its children.. +# +# The rest of the below describes how table-driven semantic actions work +# and gives a list of the format specifiers. The default() and +# template_engine() methods implement most of the below. +# +# Step 1 determines a table (T) and a path to a +# table key (K) from the node type (N) (other nodes are shown as O): +# +# N N N&K +# / | ... \ / | ... \ / | ... \ +# O O O O O K O O O +# | +# K +# +# MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT) +# +# The default is a direct mapping. The key K is then extracted from the +# subtree and used to find a table entry T[K], if any. The result is a +# format string and arguments (a la printf()) for the formatting engine. +# Escapes in the format string are: +# +# %c evaluate the node recursively. Its argument is a single +# integer representing a node index. +# %p like %c but sets the operator precedence. +# Its argument then is a tuple indicating the node +# index and the precidence value, an integer. +# +# %C evaluate children recursively, with sibling children separated by the +# given string. It needs a tuple of 3 items, a starting node, the maximimum +# value of an end node, and a string to be inserted between sibling children +# +# %, Append ',' if last %C only printed one item. This is mostly for tuples +# on the LHS of an assignment statement since BUILD_TUPLE_n pretty-prints +# other tuples. The specifier takes no arguments +# +# %P same as %C but sets operator precedence. +# +# %D Same as `%C` this is for left-recursive lists like kwargs where +# goes to epsilon at the beginning. If we were to use `%C` an extra separator +# with an epsilon would appear at the beginning. +# +# %| Insert spaces to the current indentation level. Takes no arguments. +# +# %+ increase current indentation level. Takes no arguments. +# +# %- decrease current indentation level. Takes no arguments. +# +# %{...} evaluate ... in context of N +# +# %% literal '%'. Takes no arguments. +# +# +# The '%' may optionally be followed by a number (C) in square +# brackets, which makes the template_engine walk down to N[C] before +# evaluating the escape code. + from __future__ import print_function import sys @@ -126,6 +146,29 @@ class SourceWalker(GenericASTTraversal, object): debug_parser=PARSER_DEFAULT_DEBUG, compile_mode='exec', is_pypy=False, linestarts={}): + """version is the Python version (a float) of the Python dialect + + of both the AST and language we should produce. + + out is IO-like file pointer to where the output should go. It + whould have a getvalue() method. + + scanner is a method to call when we need to scan tokens. Sometimes + in producing output we will run across further tokens that need + to be scaned. + + If showast is True, we print the AST tree. + + compile_mode is is either 'exec' or 'single'. It isthe compile + mode that was used to create the AST and specifies a gramar variant within + a Python version to use. + + is_pypy should be True if the AST was generated for PyPy. + + linestarts is a dictionary of line number to bytecode offset. This + can sometimes assist in determinte which kind of source-code construct + to use when there is ambiguity. + """ GenericASTTraversal.__init__(self, ast=None) self.scanner = scanner params = { From d7b12f4da14edff75c775a1be4461de912867005 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 20 Sep 2017 02:49:14 -0400 Subject: [PATCH 11/22] More small doc changes --- uncompyle6/semantics/consts.py | 2 +- uncompyle6/semantics/pysource.py | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index 9673906e..fd3ba6f2 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -276,7 +276,7 @@ MAP = { } # Operator precidence -# See https://docs.python.org/3/reference/expressions.html +# See https://docs.python.org/2/reference/expressions.html # or https://docs.python.org/3/reference/expressions.html # for a list. PRECEDENCE = { diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 02770c90..082b68eb 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -41,15 +41,14 @@ Python. # Step 1 determines a table (T) and a path to a # table key (K) from the node type (N) (other nodes are shown as O): # -# N N N&K -# / | ... \ / | ... \ / | ... \ -# O O O O O K O O O -# | -# K +# N&K N N +# / | ... \ / | ... \ / | ... \ +# O O O O O K O O O +# | +# K +# TABLE_DIRECT TABLE_R TABLE_R0 # -# MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT) -# -# The default is a direct mapping. The key K is then extracted from the +# The default is a "TABLE_DIRECT" mapping. The key K is then extracted from the # subtree and used to find a table entry T[K], if any. The result is a # format string and arguments (a la printf()) for the formatting engine. # Escapes in the format string are: From 147b6e1cfe5afeaae83dbf3df563e40395da3b88 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 20 Sep 2017 11:27:01 -0400 Subject: [PATCH 12/22] Small fixes test_pyenvlib.py: it is sys.exit(), not exit() pysource.py: reinstate nod type of async_func_call --- test/test_pythonlib.py | 6 +++--- uncompyle6/semantics/pysource.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_pythonlib.py b/test/test_pythonlib.py index b541ca47..8e250664 100755 --- a/test/test_pythonlib.py +++ b/test/test_pythonlib.py @@ -169,13 +169,13 @@ def do_tests(src_dir, obj_patterns, target_dir, opts): main(src_dir, target_dir, files, [], do_verify=opts['do_verify']) if failed_files != 0: - exit(2) + sys.exit(2) elif failed_verify != 0: - exit(3) + sys.exit(3) except (KeyboardInterrupt, OSError): print() - exit(1) + sys.exit(1) if test_opts['rmtree']: parent_dir = os.path.dirname(target_dir) print("Everything good, removing %s" % parent_dir) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 082b68eb..c31890ab 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -408,6 +408,7 @@ class SourceWalker(GenericASTTraversal, object): self.template_engine(('%c(%P)', 0, (1, -4, ', ', 100)), node) self.prec = p + node.type == 'async_call_function' self.prune() self.n_async_call_function = n_async_call_function self.n_build_list_unpack = self.n_build_list From 96ca68a6feebbdcae826c68e84dde0203e6813da Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 20 Sep 2017 17:47:56 -0400 Subject: [PATCH 13/22] Tidy/regularize table entry formatting --- uncompyle6/semantics/consts.py | 203 ++++++++++++++++----------------- 1 file changed, 101 insertions(+), 102 deletions(-) diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index fd3ba6f2..a2194595 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -1,5 +1,5 @@ # Copyright (c) 2017 by Rocky Bernstein -"""Constants used in pysource.py""" +"""Constants and initial table values used in pysource.py and fragments.py""" import re, sys from uncompyle6.parsers.astnode import AST @@ -57,9 +57,7 @@ INDENT_PER_LEVEL = ' ' # additional intent per pretty-print level TABLE_R = { 'STORE_ATTR': ( '%c.%[1]{pattr}', 0), -# 'STORE_SUBSCR': ( '%c[%c]', 0, 1 ), 'DELETE_ATTR': ( '%|del %c.%[-1]{pattr}\n', 0 ), -# 'EXEC_STMT': ( '%|exec %c in %[1]C\n', 0, (0,maxint,', ') ), } TABLE_R0 = { @@ -67,8 +65,9 @@ TABLE_R0 = { # 'BUILD_TUPLE': ( '(%C)', (0,-1,', ') ), # 'CALL_FUNCTION': ( '%c(%P)', 0, (1,-1,', ') ), } + TABLE_DIRECT = { - 'BINARY_ADD': ( '+' ,), + 'BINARY_ADD': ( '+' ,), 'BINARY_SUBTRACT': ( '-' ,), 'BINARY_MULTIPLY': ( '*' ,), 'BINARY_DIVIDE': ( '/' ,), @@ -76,13 +75,13 @@ TABLE_DIRECT = { 'BINARY_TRUE_DIVIDE': ( '/' ,), # Not in <= 2.1 'BINARY_FLOOR_DIVIDE': ( '//' ,), 'BINARY_MODULO': ( '%%',), - 'BINARY_POWER': ( '**',), + 'BINARY_POWER': ( '**',), 'BINARY_LSHIFT': ( '<<',), 'BINARY_RSHIFT': ( '>>',), - 'BINARY_AND': ( '&' ,), - 'BINARY_OR': ( '|' ,), - 'BINARY_XOR': ( '^' ,), - 'INPLACE_ADD': ( '+=' ,), + 'BINARY_AND': ( '&' ,), + 'BINARY_OR': ( '|' ,), + 'BINARY_XOR': ( '^' ,), + 'INPLACE_ADD': ( '+=' ,), 'INPLACE_SUBTRACT': ( '-=' ,), 'INPLACE_MULTIPLY': ( '*=' ,), 'INPLACE_MATRIX_MULTIPLY': ( '@=' ,), @@ -93,125 +92,125 @@ TABLE_DIRECT = { 'INPLACE_POWER': ( '**=',), 'INPLACE_LSHIFT': ( '<<=',), 'INPLACE_RSHIFT': ( '>>=',), - 'INPLACE_AND': ( '&=' ,), - 'INPLACE_OR': ( '|=' ,), - 'INPLACE_XOR': ( '^=' ,), - 'binary_expr': ( '%c %c %c', 0, -1, 1 ), + 'INPLACE_AND': ( '&=' ,), + 'INPLACE_OR': ( '|=' ,), + 'INPLACE_XOR': ( '^=' ,), + 'binary_expr': ( '%c %c %c', 0, -1, 1 ), 'UNARY_POSITIVE': ( '+',), 'UNARY_NEGATIVE': ( '-',), - 'UNARY_INVERT': ( '~%c'), - 'unary_expr': ( '%c%c', 1, 0), + 'UNARY_INVERT': ( '~%c'), + 'unary_expr': ( '%c%c', 1, 0), - 'unary_not': ( 'not %c', 0 ), + 'unary_not': ( 'not %c', 0 ), 'unary_convert': ( '`%c`', 0 ), - 'get_iter': ( 'iter(%c)', 0 ), - 'slice0': ( '%c[:]', 0 ), - 'slice1': ( '%c[%p:]', 0, (1, 100) ), - 'slice2': ( '%c[:%p]', 0, (1, 100) ), - 'slice3': ( '%c[%p:%p]', 0, (1, 100), (2, 100) ), + 'get_iter': ( 'iter(%c)', 0 ), + 'slice0': ( '%c[:]', 0 ), + 'slice1': ( '%c[%p:]', 0, (1, 100) ), + 'slice2': ( '%c[:%p]', 0, (1, 100) ), + 'slice3': ( '%c[%p:%p]', 0, (1, 100), (2, 100) ), - 'IMPORT_FROM': ( '%{pattr}', ), - 'load_attr': ( '%c.%[1]{pattr}', 0), - 'LOAD_FAST': ( '%{pattr}', ), - 'LOAD_NAME': ( '%{pattr}', ), + 'IMPORT_FROM': ( '%{pattr}', ), + 'load_attr': ( '%c.%[1]{pattr}', 0), + 'LOAD_FAST': ( '%{pattr}', ), + 'LOAD_NAME': ( '%{pattr}', ), 'LOAD_CLASSNAME': ( '%{pattr}', ), - 'LOAD_GLOBAL': ( '%{pattr}', ), - 'LOAD_DEREF': ( '%{pattr}', ), - 'LOAD_LOCALS': ( 'locals()', ), - 'LOAD_ASSERT': ( '%{pattr}', ), + 'LOAD_GLOBAL': ( '%{pattr}', ), + 'LOAD_DEREF': ( '%{pattr}', ), + 'LOAD_LOCALS': ( 'locals()', ), + 'LOAD_ASSERT': ( '%{pattr}', ), # 'LOAD_CONST': ( '%{pattr}', ), # handled by n_LOAD_CONST - 'DELETE_FAST': ( '%|del %{pattr}\n', ), - 'DELETE_NAME': ( '%|del %{pattr}\n', ), + 'DELETE_FAST': ( '%|del %{pattr}\n', ), + 'DELETE_NAME': ( '%|del %{pattr}\n', ), 'DELETE_GLOBAL': ( '%|del %{pattr}\n', ), 'delete_subscr': ( '%|del %c[%c]\n', 0, 1,), 'binary_subscr': ( '%c[%p]', 0, (1, 100)), 'binary_subscr2': ( '%c[%p]', 0, (1, 100)), - 'store_subscr': ( '%c[%c]', 0, 1), - 'STORE_FAST': ( '%{pattr}', ), - 'STORE_NAME': ( '%{pattr}', ), - 'STORE_GLOBAL': ( '%{pattr}', ), - 'STORE_DEREF': ( '%{pattr}', ), - 'unpack': ( '%C%,', (1, maxint, ', ') ), + 'store_subscr': ( '%c[%c]', 0, 1), + 'STORE_FAST': ( '%{pattr}', ), + 'STORE_NAME': ( '%{pattr}', ), + 'STORE_GLOBAL': ( '%{pattr}', ), + 'STORE_DEREF': ( '%{pattr}', ), + 'unpack': ( '%C%,', (1, maxint, ', ') ), # This nonterminal we create on the fly in semantic routines 'unpack_w_parens': ( '(%C%,)', (1, maxint, ', ') ), - 'unpack_list': ( '[%C]', (1, maxint, ', ') ), - 'build_tuple2': ( '%P', (0, -1, ', ', 100) ), + 'unpack_list': ( '[%C]', (1, maxint, ', ') ), + 'build_tuple2': ( '%P', (0, -1, ', ', 100) ), # 'list_compr': ( '[ %c ]', -2), # handled by n_list_compr - 'list_iter': ( '%c', 0), - 'list_for': ( ' for %c in %c%c', 2, 0, 3 ), - 'list_if': ( ' if %c%c', 0, 2 ), + 'list_iter': ( '%c', 0 ), + 'list_for': ( ' for %c in %c%c', 2, 0, 3 ), + 'list_if': ( ' if %c%c', 0, 2 ), 'list_if_not': ( ' if not %p%c', (0, 22), 2 ), - 'lc_body': ( '', ), # ignore when recusing + 'lc_body': ( '', ), # ignore when recusing - 'comp_iter': ( '%c', 0), - 'comp_if': ( ' if %c%c', 0, 2 ), - 'comp_ifnot': ( ' if not %p%c', (0, 22), 2 ), - 'comp_body': ( '', ), # ignore when recusing + 'comp_iter': ( '%c', 0 ), + 'comp_if': ( ' if %c%c', 0, 2 ), + 'comp_ifnot': ( ' if not %p%c', (0, 22), 2 ), + 'comp_body': ( '', ), # ignore when recusing 'set_comp_body': ( '%c', 0 ), 'gen_comp_body': ( '%c', 0 ), 'dict_comp_body': ( '%c:%c', 1, 0 ), - 'assign': ( '%|%c = %p\n', -1, (0, 200) ), + 'assign': ( '%|%c = %p\n', -1, (0, 200) ), # The 2nd parameter should have a = suffix. # There is a rule with a 4th parameter "designator" # which we don't use here. - 'augassign1': ( '%|%c %c %c\n', 0, 2, 1), + 'augassign1': ( '%|%c %c %c\n', 0, 2, 1), - 'augassign2': ( '%|%c.%[2]{pattr} %c %c\n', 0, -3, -4), - 'designList': ( '%c = %c', 0, -1 ), + 'augassign2': ( '%|%c.%[2]{pattr} %c %c\n', 0, -3, -4 ), + 'designList': ( '%c = %c', 0, -1 ), 'and': ( '%c and %c', 0, 2 ), 'ret_and': ( '%c and %c', 0, 2 ), 'and2': ( '%c', 3 ), 'or': ( '%c or %c', 0, 2 ), - 'ret_or': ( '%c or %c', 0, 2 ), - 'conditional': ( '%p if %p else %p', (2, 27), (0, 27), (4, 27)), - 'conditionalTrue': ( '%p if 1 else %p', (0, 27), (2, 27)), - 'ret_cond': ( '%p if %p else %p', (2, 27), (0, 27), (-1, 27)), - 'conditionalnot': ( '%p if not %p else %p', (2, 27), (0, 22), (4, 27)), - 'ret_cond_not': ( '%p if not %p else %p', (2, 27), (0, 22), (-1, 27)), + 'ret_or': ( '%c or %c', 0, 2 ), + 'conditional': ( '%p if %p else %p', (2, 27), (0, 27), (4, 27) ), + 'conditionalTrue': ( '%p if 1 else %p', (0, 27), (2, 27) ), + 'ret_cond': ( '%p if %p else %p', (2, 27), (0, 27), (-1, 27) ), + 'conditionalnot': ( '%p if not %p else %p', (2, 27), (0, 22), (4, 27) ), + 'ret_cond_not': ( '%p if not %p else %p', (2, 27), (0, 22), (-1, 27) ), 'conditional_lambda': ( '(%c if %c else %c)', 2, 0, 3), 'return_lambda': ('%c', 0), - 'compare': ( '%p %[-1]{pattr.replace("-", " ")} %p', (0, 19), (1, 19) ), - 'cmp_list': ( '%p %p', (0, 29), (1, 30)), - 'cmp_list1': ( '%[3]{pattr} %p %p', (0, 19), (-2, 19)), - 'cmp_list2': ( '%[1]{pattr} %p', (0, 19)), + 'compare': ( '%p %[-1]{pattr.replace("-", " ")} %p', (0, 19), (1, 19) ), + 'cmp_list': ( '%p %p', (0, 29), (1, 30)), + 'cmp_list1': ( '%[3]{pattr} %p %p', (0, 19), (-2, 19)), + 'cmp_list2': ( '%[1]{pattr} %p', (0, 19)), # 'classdef': (), # handled by n_classdef() - 'funcdef': ( '\n\n%|def %c\n', -2), # -2 to handle closures + 'funcdef': ( '\n\n%|def %c\n', -2), # -2 to handle closures 'funcdefdeco': ( '\n\n%c', 0), - 'mkfuncdeco': ( '%|@%c\n%c', 0, 1), + 'mkfuncdeco': ( '%|@%c\n%c', 0, 1), 'mkfuncdeco0': ( '%|def %c\n', 0), 'classdefdeco': ( '\n\n%c', 0), 'classdefdeco1': ( '%|@%c\n%c', 0, 1), - 'kwarg': ( '%[0]{pattr}=%c', 1), - 'kwargs': ( '%D', (0, maxint, ', ') ), + 'kwarg': ( '%[0]{pattr}=%c', 1), + 'kwargs': ( '%D', (0, maxint, ', ') ), - 'assert_expr_or': ( '%c or %c', 0, 2 ), - 'assert_expr_and': ( '%c and %c', 0, 2 ), - 'print_items_stmt': ( '%|print %c%c,\n', 0, 2), # Python 2 only - 'print_items_nl_stmt': ( '%|print %c%c\n', 0, 2), - 'print_item': ( ', %c', 0), - 'print_nl': ( '%|print\n', ), - 'print_to': ( '%|print >> %c, %c,\n', 0, 1 ), - 'print_to_nl': ( '%|print >> %c, %c\n', 0, 1 ), - 'print_nl_to': ( '%|print >> %c\n', 0 ), + 'assert_expr_or': ( '%c or %c', 0, 2 ), + 'assert_expr_and': ( '%c and %c', 0, 2 ), + 'print_items_stmt': ( '%|print %c%c,\n', 0, 2 ), # Python 2 only + 'print_items_nl_stmt': ( '%|print %c%c\n', 0, 2 ), + 'print_item': ( ', %c', 0), + 'print_nl': ( '%|print\n', ), + 'print_to': ( '%|print >> %c, %c,\n', 0, 1 ), + 'print_to_nl': ( '%|print >> %c, %c\n', 0, 1 ), + 'print_nl_to': ( '%|print >> %c\n', 0 ), 'print_to_items': ( '%C', (0, 2, ', ') ), - 'call_stmt': ( '%|%p\n', (0, 200)), - 'break_stmt': ( '%|break\n', ), + 'call_stmt': ( '%|%p\n', (0, 200)), + 'break_stmt': ( '%|break\n', ), 'continue_stmt': ( '%|continue\n', ), - 'raise_stmt0': ( '%|raise\n', ), - 'raise_stmt1': ( '%|raise %c\n', 0), - 'raise_stmt3': ( '%|raise %c, %c, %c\n', 0, 1, 2), + 'raise_stmt0': ( '%|raise\n', ), + 'raise_stmt1': ( '%|raise %c\n', 0), + 'raise_stmt3': ( '%|raise %c, %c, %c\n', 0, 1, 2), # 'yield': ( 'yield %c', 0), # 'return_stmt': ( '%|return %c\n', 0), - 'ifstmt': ( '%|if %c:\n%+%c%-', 0, 1 ), + 'ifstmt': ( '%|if %c:\n%+%c%-', 0, 1 ), 'iflaststmt': ( '%|if %c:\n%+%c%-', 0, 1 ), 'iflaststmtl': ( '%|if %c:\n%+%c%-', 0, 1 ), 'testtrue': ( 'not %p', (0, 22) ), @@ -229,37 +228,37 @@ TABLE_DIRECT = { 'elifelsestmtr2': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-\n\n', 0, 1, 3 ), # has COME_FROM 'whileTruestmt': ( '%|while True:\n%+%c%-\n\n', 1 ), - 'whilestmt': ( '%|while %c:\n%+%c%-\n\n', 1, 2 ), - 'while1stmt': ( '%|while 1:\n%+%c%-\n\n', 1 ), - 'while1elsestmt': ( '%|while 1:\n%+%c%-%|else:\n%+%c%-\n\n', 1, -2 ), + 'whilestmt': ( '%|while %c:\n%+%c%-\n\n', 1, 2 ), + 'while1stmt': ( '%|while 1:\n%+%c%-\n\n', 1 ), + 'while1elsestmt': ( '%|while 1:\n%+%c%-%|else:\n%+%c%-\n\n', 1, -2 ), 'whileelsestmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-\n\n', 1, 2, -2 ), 'whileelselaststmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-', 1, 2, -2 ), - 'forstmt': ( '%|for %c in %c:\n%+%c%-\n\n', 3, 1, 4 ), - 'forelsestmt': ( - '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, 1, 4, -2), + 'forstmt': ( '%|for %c in %c:\n%+%c%-\n\n', 3, 1, 4 ), + 'forelsestmt': ( + '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, 1, 4, -2 ), 'forelselaststmt': ( - '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2), + '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2 ), 'forelselaststmtl': ( - '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, 1, 4, -2), - 'trystmt': ( '%|try:\n%+%c%-%c\n\n', 1, 3 ), - 'tryelsestmt': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-\n\n', 1, 3, 4 ), - 'tryelsestmtc': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), - 'tryelsestmtl': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), - 'tf_trystmt': ( '%c%-%c%+', 1, 3 ), + '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, 1, 4, -2 ), + 'trystmt': ( '%|try:\n%+%c%-%c\n\n', 1, 3 ), + 'tryelsestmt': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-\n\n', 1, 3, 4 ), + 'tryelsestmtc': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), + 'tryelsestmtl': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), + 'tf_trystmt': ( '%c%-%c%+', 1, 3 ), 'tf_tryelsestmt': ( '%c%-%c%|else:\n%+%c', 1, 3, 4 ), 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 5 ), 'except': ( '%|except:\n%+%c%-', 3 ), - 'except_cond1': ( '%|except %c:\n', 1 ), + 'except_cond1': ( '%|except %c:\n', 1 ), 'except_suite': ( '%+%c%-%C', 0, (1, maxint, '') ), 'except_suite_finalize': ( '%+%c%-%C', 1, (3, maxint, '') ), - 'passstmt': ( '%|pass\n', ), - 'STORE_FAST': ( '%{pattr}', ), - 'kv': ( '%c: %c', 3, 1 ), - 'kv2': ( '%c: %c', 1, 2 ), - 'mapexpr': ( '{%[1]C}', (0, maxint, ', ') ), - 'importstmt': ( '%|import %c\n', 2), - 'importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), - 'importstar': ( '%|from %[2]{pattr} import *\n', ), + 'passstmt': ( '%|pass\n', ), + 'STORE_FAST': ( '%{pattr}', ), + 'kv': ( '%c: %c', 3, 1 ), + 'kv2': ( '%c: %c', 1, 2 ), + 'mapexpr': ( '{%[1]C}', (0, maxint, ', ') ), + 'importstmt': ( '%|import %c\n', 2), + 'importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), + 'importstar': ( '%|from %[2]{pattr} import *\n', ), } From da7421da1cf6096810163c9b03f46a729c0d38a5 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 20 Sep 2017 19:02:56 -0400 Subject: [PATCH 14/22] Tidy pysource and fragments a little more --- uncompyle6/semantics/fragments.py | 42 ++++++++++++++-------------- uncompyle6/semantics/pysource.py | 46 +++++++++++++++---------------- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 5e209b88..f8fdb9d5 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -8,8 +8,8 @@ Creates Python source code from an uncompyle6 abstract syntax tree, and indexes fragments which can be accessed by instruction offset address. -See the comments in pysource for information on the abstract sytax tree -and how semantic actions are written. +See https://github.com/rocky/python-uncompyle6/wiki/Table-driven-semantic-actions. +for a more complete explanation, nicely marked up and with examples. We add some format specifiers here not used in pysource @@ -421,10 +421,10 @@ class FragmentsWalker(pysource.SourceWalker, object): self.write(self.indent, 'if ') self.preorder(node[0]) self.println(':') - self.indentMore() + self.indent_more() node[1].parent = node self.preorder(node[1]) - self.indentLess() + self.indent_less() if_ret_at_end = False if len(node[2][0]) >= 3: @@ -443,17 +443,17 @@ class FragmentsWalker(pysource.SourceWalker, object): prev_stmt_is_if_ret = False if not past_else and not if_ret_at_end: self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() past_else = True n.parent = node self.preorder(n) if not past_else or if_ret_at_end: self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() node[2][1].parent = node self.preorder(node[2][1]) self.set_pos_info(node, start, len(self.f.getvalue())) - self.indentLess() + self.indent_less() self.prune() def n_elifelsestmtr(self, node): @@ -470,20 +470,20 @@ class FragmentsWalker(pysource.SourceWalker, object): node[0].parent = node self.preorder(node[0]) self.println(':') - self.indentMore() + self.indent_more() node[1].parent = node self.preorder(node[1]) - self.indentLess() + self.indent_less() for n in node[2][0]: n[0].type = 'elifstmt' n.parent = node self.preorder(n) self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() node[2][1].parent = node self.preorder(node[2][1]) - self.indentLess() + self.indent_less() self.set_pos_info(node, start, len(self.f.getvalue())) self.prune() @@ -527,7 +527,7 @@ class FragmentsWalker(pysource.SourceWalker, object): self.write(func_name) self.set_pos_info(code_node, start, len(self.f.getvalue())) - self.indentMore() + self.indent_more() start = len(self.f.getvalue()) self.make_function(node, isLambda=False, codeNode=code_node) @@ -537,7 +537,7 @@ class FragmentsWalker(pysource.SourceWalker, object): self.write('\n\n') else: self.write('\n\n\n') - self.indentLess() + self.indent_less() self.prune() # stop recursing def n_list_compr(self, node): @@ -977,9 +977,9 @@ class FragmentsWalker(pysource.SourceWalker, object): self.println(':') # class body - self.indentMore() + self.indent_more() self.build_class(subclass) - self.indentLess() + self.indent_less() self.currentclass = cclass self.set_pos_info(node, start, len(self.f.getvalue())) @@ -1316,7 +1316,7 @@ class FragmentsWalker(pysource.SourceWalker, object): p = self.prec self.prec = 100 - self.indentMore(INDENT_PER_LEVEL) + self.indent_more(INDENT_PER_LEVEL) line_seperator = ',\n' + self.indent sep = INDENT_PER_LEVEL[:-1] start = len(self.f.getvalue()) @@ -1393,7 +1393,7 @@ class FragmentsWalker(pysource.SourceWalker, object): n.parent = node self.set_pos_info(n, start, finish) self.set_pos_info(node, start, finish) - self.indentLess(INDENT_PER_LEVEL) + self.indent_less(INDENT_PER_LEVEL) self.prec = p self.prune() @@ -1429,7 +1429,7 @@ class FragmentsWalker(pysource.SourceWalker, object): else: flat_elems.append(elem) - self.indentMore(INDENT_PER_LEVEL) + self.indent_more(INDENT_PER_LEVEL) if len(node) > 3: line_separator = ',\n' + self.indent else: @@ -1454,7 +1454,7 @@ class FragmentsWalker(pysource.SourceWalker, object): n.parent = node.parent self.set_pos_info(n, start, finish) self.set_pos_info(node, start, finish) - self.indentLess(INDENT_PER_LEVEL) + self.indent_less(INDENT_PER_LEVEL) self.prec = p self.prune() @@ -1498,8 +1498,8 @@ class FragmentsWalker(pysource.SourceWalker, object): self.write('%') self.set_pos_info(node, start, len(self.f.getvalue())) - elif typ == '+': self.indentMore() - elif typ == '-': self.indentLess() + elif typ == '+': self.indent_more() + elif typ == '-': self.indent_less() elif typ == '|': self.write(self.indent) # no longer used, since BUILD_TUPLE_n is pretty printed: elif typ == 'r': recurse_node = True diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index c31890ab..0c38be13 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -350,7 +350,7 @@ class SourceWalker(GenericASTTraversal, object): # MAKE_FUNCTION .. code = node[-3] - self.indentMore() + self.indent_more() for annotate_last in range(len(node)-1, -1, -1): if node[annotate_last] == 'annotate_tuple': break @@ -370,7 +370,7 @@ class SourceWalker(GenericASTTraversal, object): self.write('\n\n') else: self.write('\n\n\n') - self.indentLess() + self.indent_less() self.prune() # stop recursing self.n_mkfunc_annotate = n_mkfunc_annotate @@ -550,10 +550,10 @@ class SourceWalker(GenericASTTraversal, object): super(SourceWalker, self).preorder(node) self.set_pos_info(node) - def indentMore(self, indent=TAB): + def indent_more(self, indent=TAB): self.indent += indent - def indentLess(self, indent=TAB): + def indent_less(self, indent=TAB): self.indent = self.indent[:-len(indent)] def traverse(self, node, indent=None, isLambda=False): @@ -871,9 +871,9 @@ class SourceWalker(GenericASTTraversal, object): self.write(self.indent, 'if ') self.preorder(node[0]) self.println(':') - self.indentMore() + self.indent_more() self.preorder(node[1]) - self.indentLess() + self.indent_less() if_ret_at_end = False if len(return_stmts_node[0]) >= 3: @@ -892,14 +892,14 @@ class SourceWalker(GenericASTTraversal, object): prev_stmt_is_if_ret = False if not past_else and not if_ret_at_end: self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() past_else = True self.preorder(n) if not past_else or if_ret_at_end: self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() self.preorder(return_stmts_node[1]) - self.indentLess() + self.indent_less() self.prune() n_ifelsestmtr2 = n_ifelsestmtr @@ -921,17 +921,17 @@ class SourceWalker(GenericASTTraversal, object): self.write(self.indent, 'elif ') self.preorder(node[0]) self.println(':') - self.indentMore() + self.indent_more() self.preorder(node[1]) - self.indentLess() + self.indent_less() for n in return_stmts_node[0]: n[0].type = 'elifstmt' self.preorder(n) self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() self.preorder(return_stmts_node[1]) - self.indentLess() + self.indent_less() self.prune() def n_import_as(self, node): @@ -972,14 +972,14 @@ class SourceWalker(GenericASTTraversal, object): func_name = code_node.attr.co_name self.write(func_name) - self.indentMore() + self.indent_more() self.make_function(node, isLambda=False, codeNode=code_node) if len(self.param_stack) > 1: self.write('\n\n') else: self.write('\n\n\n') - self.indentLess() + self.indent_less() self.prune() # stop recursing def make_function(self, node, isLambda, nested=1, @@ -1450,9 +1450,9 @@ class SourceWalker(GenericASTTraversal, object): self.println(':') # class body - self.indentMore() + self.indent_more() self.build_class(subclass_code) - self.indentLess() + self.indent_less() self.currentclass = cclass if len(self.param_stack) > 1: @@ -1523,7 +1523,7 @@ class SourceWalker(GenericASTTraversal, object): p = self.prec self.prec = 100 - self.indentMore(INDENT_PER_LEVEL) + self.indent_more(INDENT_PER_LEVEL) sep = INDENT_PER_LEVEL[:-1] self.write('{') line_number = self.line_number @@ -1661,7 +1661,7 @@ class SourceWalker(GenericASTTraversal, object): if sep.startswith(",\n"): self.write(sep[1:]) self.write('}') - self.indentLess(INDENT_PER_LEVEL) + self.indent_less(INDENT_PER_LEVEL) self.prec = p self.prune() @@ -1712,7 +1712,7 @@ class SourceWalker(GenericASTTraversal, object): else: flat_elems.append(elem) - self.indentMore(INDENT_PER_LEVEL) + self.indent_more(INDENT_PER_LEVEL) sep = '' for elem in flat_elems: @@ -1737,7 +1737,7 @@ class SourceWalker(GenericASTTraversal, object): if lastnode.attr == 1 and lastnodetype.startswith('BUILD_TUPLE'): self.write(',') self.write(endchar) - self.indentLess(INDENT_PER_LEVEL) + self.indent_less(INDENT_PER_LEVEL) self.prec = p self.prune() @@ -1812,10 +1812,10 @@ class SourceWalker(GenericASTTraversal, object): if typ == '%': self.write('%') elif typ == '+': self.line_number += 1 - self.indentMore() + self.indent_more() elif typ == '-': self.line_number += 1 - self.indentLess() + self.indent_less() elif typ == '|': self.line_number += 1 self.write(self.indent) From aadea7224d9f5150a335a0c3d717f59625c49f69 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 21 Sep 2017 11:25:51 -0400 Subject: [PATCH 15/22] Unit test for format-specifiers And in the process we catch some small bugs --- pytest/test_pysource.py | 142 +++++++++++++++++++++++++++++- uncompyle6/semantics/consts.py | 2 +- uncompyle6/semantics/fragments.py | 5 +- uncompyle6/semantics/pysource.py | 16 ++-- 4 files changed, 155 insertions(+), 10 deletions(-) diff --git a/pytest/test_pysource.py b/pytest/test_pysource.py index aacefdf3..d7f4e776 100644 --- a/pytest/test_pysource.py +++ b/pytest/test_pysource.py @@ -1,6 +1,6 @@ from uncompyle6 import PYTHON3 from uncompyle6.semantics.consts import ( - NONE, + escape, NONE, # RETURN_NONE, PASS, RETURN_LOCALS ) @@ -18,3 +18,143 @@ def test_template_engine(): sw.template_engine(('--%c--', 0), NONE) print(sw.f.getvalue()) assert sw.f.getvalue() == '--None--' + # FIXME: and so on... + +from uncompyle6.semantics.consts import ( + TABLE_R, TABLE_DIRECT, + ) + +from uncompyle6.semantics.fragments import ( + TABLE_DIRECT_FRAGMENT, + ) + +def test_tables(): + for t, name, fragment in ( + (TABLE_DIRECT, 'TABLE_DIRECT', False), + (TABLE_R, 'TABLE_R', False), + (TABLE_DIRECT_FRAGMENT, 'TABLE_DIRECT_FRAGMENT', True)): + for k, entry in t.iteritems(): + fmt = entry[0] + arg = 1 + i = 0 + m = escape.search(fmt) + print("%s[%s]" % (name, k)) + while m: + i = m.end() + typ = m.group('type') or '{' + if typ in frozenset(['%', '+', '-', '|', ',', '{']): + # No args + pass + elif typ in frozenset(['c', 'p', 'P', 'C', 'D']): + # One arg - should be int or tuple of int + if typ == 'c': + assert isinstance(entry[arg], int), ( + "%s[%s][%d] type %s is '%s' should be an int but is %s. " + "Full entry: %s" % + (name, k, arg, typ, entry[arg], type(entry[arg]), entry) + ) + elif typ in frozenset(['C', 'D']): + tup = entry[arg] + assert isinstance(tup, tuple), ( + "%s[%s][%d] type %s is %s should be an tuple but is %s. " + "Full entry: %s" % + (name, k, arg, typ, entry[arg], type(entry[arg]), entry) + ) + assert len(tup) == 3 + for j, x in enumerate(tup[:-1]): + assert isinstance(x, int), ( + "%s[%s][%d][%d] type %s is %s should be an tuple but is %s. " + "Full entry: %s" % + (name, k, arg, j, typ, x, type(x), entry) + ) + assert isinstance(tup[-1], str) or tup[-1] is None, ( + "%s[%s][%d][%d] sep type %s is %s should be an string but is %s. " + "Full entry: %s" % + (name, k, arg, j, typ, tup[-1], type(x), entry) + ) + + elif typ == 'P': + tup = entry[arg] + assert isinstance(tup, tuple), ( + "%s[%s][%d] type %s is %s should be an tuple but is %s. " + "Full entry: %s" % + (name, k, arg, typ, entry[arg], type(entry[arg]), entry) + ) + assert len(tup) == 4 + for j, x in enumerate(tup[:-2]): + assert isinstance(x, int), ( + "%s[%s][%d][%d] type %s is '%s' should be an tuple but is %s. " + "Full entry: %s" % + (name, k, arg, j, typ, x, type(x), entry) + ) + assert isinstance(tup[-2], str), ( + "%s[%s][%d][%d] sep type %s is '%s' should be an string but is %s. " + "Full entry: %s" % + (name, k, arg, j, typ, x, type(x), entry) + ) + assert isinstance(tup[1], int), ( + "%s[%s][%d][%d] prec type %s is '%s' should be an int but is %s. " + "Full entry: %s" % + (name, k, arg, j, typ, x, type(x), entry) + ) + + else: + # Should be a tuple which contains only ints + tup = entry[arg] + assert isinstance(tup, tuple), ( + "%s[%s][%d] type %s is '%s' should be an tuple but is %s. " + "Full entry: %s" % + (name, k, arg, typ, entry[arg], type(entry[arg]), entry) + ) + assert len(tup) == 2 + for j, x in enumerate(tup): + assert isinstance(x, int), ( + "%s[%s][%d][%d] type '%s' is '%s should be an int but is %s. Full entry: %s" % + (name, k, arg, j, typ, x, type(x), entry) + ) + pass + arg += 1 + elif typ in frozenset(['r']) and fragment: + pass + elif typ == 'b' and fragment: + assert isinstance(entry[arg], int), ( + "%s[%s][%d] type %s is '%s' should be an int but is %s. " + "Full entry: %s" % + (name, k, arg, typ, entry[arg], type(entry[arg]), entry) + ) + arg += 1 + elif typ == 'x' and fragment: + tup = entry[arg] + assert isinstance(tup, tuple), ( + "%s[%s][%d] type %s is '%s' should be an tuple but is %s. " + "Full entry: %s" % + (name, k, arg, typ, entry[arg], type(entry[arg]), entry) + ) + assert len(tup) == 2 + assert isinstance(tup[0], int), ( + "%s[%s][%d] source type %s is '%s' should be an int but is %s. " + "Full entry: %s" % + (name, k, arg, typ, entry[arg], type(entry[arg]), entry) + ) + assert isinstance(tup[1], tuple), ( + "%s[%s][%d] dest type %s is '%s' should be an tuple but is %s. " + "Full entry: %s" % + (name, k, arg, typ, entry[arg], type(entry[arg]), entry) + ) + for j, x in enumerate(tup[1]): + assert isinstance(x, int), ( + "%s[%s][%d][%d] type %s is %s should be an int but is %s. Full entry: %s" % + (name, k, arg, j, typ, x, type(x), entry) + ) + arg += 1 + pass + else: + assert False, ( + "%s[%s][%d] type %s is not known. Full entry: %s" % + (name, k, arg, typ, entry) + ) + m = escape.search(fmt, i) + pass + assert arg == len(entry), ( + "%s[%s] arg %d should be length of entry %d. Full entry: %s" % + (name, k, arg, len(entry), entry)) diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index a2194595..b5d49644 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -99,7 +99,7 @@ TABLE_DIRECT = { 'UNARY_POSITIVE': ( '+',), 'UNARY_NEGATIVE': ( '-',), - 'UNARY_INVERT': ( '~%c'), + 'UNARY_INVERT': ( '~'), 'unary_expr': ( '%c%c', 1, 0), 'unary_not': ( 'not %c', 0 ), diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index f8fdb9d5..35768a17 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -40,7 +40,8 @@ do it recursively which is where offsets are probably located. 2. %b ----- - %b associates the text from the previous start node up to what we have now + %b associates the text from the specified index to what we have now. + it takes an integer argument. For example in: 'importmultiple': ( '%|import%b %c%c\n', 0, 2, 3 ), @@ -95,7 +96,7 @@ TABLE_DIRECT_FRAGMENT = { 'list_for': (' for %c%x in %c%c', 2, (2, (1, )), 0, 3 ), 'forstmt': ( '%|for%b %c%x in %c:\n%+%c%-\n\n', 0, 3, (3, (2, )), 1, 4 ), 'forelsestmt': ( - '%|for %c in %c%x:\n%+%c%-%|else:\n%+%c%-\n\n', 3, (3, (2,)), 1, 4, -2), + '%|for %c%x in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, (3, (2,)), 1, 4, -2), 'forelselaststmt': ( '%|for %c%x in %c:\n%+%c%-%|else:\n%+%c%-', 3, (3, (2,)), 1, 4, -2), 'forelselaststmtl': ( diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 0c38be13..0d7914e3 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -60,18 +60,22 @@ Python. # index and the precidence value, an integer. # # %C evaluate children recursively, with sibling children separated by the -# given string. It needs a tuple of 3 items, a starting node, the maximimum +# given string. It needs a 3-tuple: a starting node, the maximimum # value of an end node, and a string to be inserted between sibling children # # %, Append ',' if last %C only printed one item. This is mostly for tuples # on the LHS of an assignment statement since BUILD_TUPLE_n pretty-prints # other tuples. The specifier takes no arguments # -# %P same as %C but sets operator precedence. +# %P same as %C but sets operator precedence. Its argument is a 4-tuple: +# the node low and high indices, the separator, a string the precidence +# value, an integer. # -# %D Same as `%C` this is for left-recursive lists like kwargs where -# goes to epsilon at the beginning. If we were to use `%C` an extra separator -# with an epsilon would appear at the beginning. +# %D Same as `%C` this is for left-recursive lists like kwargs where goes +# to epsilon at the beginning. It needs a 3-tuple: a starting node, the +# maximimum value of an end node, and a string to be inserted between +# sibling children. If we were to use `%C` an extra separator with an +# epsilon would appear at the beginning. # # %| Insert spaces to the current indentation level. Takes no arguments. # @@ -1919,7 +1923,7 @@ class SourceWalker(GenericASTTraversal, object): 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): if v == 0: str = '%c(%C' # '%C' is a dummy here ... - p2 = (0, 0, None) # .. because of this + p2 = (0, 0, None) # .. because of the None in this else: str = '%c(%C, ' p2 = (1, -2, ', ') From 8b67f2ccd05e12a2d0ac9ea158a396e8bb748126 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 21 Sep 2017 11:47:42 -0400 Subject: [PATCH 16/22] Python 3 compatibility --- pytest/test_pysource.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pytest/test_pysource.py b/pytest/test_pysource.py index d7f4e776..f9834fa6 100644 --- a/pytest/test_pysource.py +++ b/pytest/test_pysource.py @@ -6,8 +6,12 @@ from uncompyle6.semantics.consts import ( if PYTHON3: from io import StringIO + def iteritems(d): + return d.items() else: from StringIO import StringIO + def iteritems(d): + return d.iteritems() from uncompyle6.semantics.pysource import SourceWalker as SourceWalker @@ -21,19 +25,23 @@ def test_template_engine(): # FIXME: and so on... from uncompyle6.semantics.consts import ( - TABLE_R, TABLE_DIRECT, + TABLE_DIRECT, TABLE_R, ) from uncompyle6.semantics.fragments import ( TABLE_DIRECT_FRAGMENT, ) +skip_for_now = "DELETE_DEREF".split() + def test_tables(): for t, name, fragment in ( (TABLE_DIRECT, 'TABLE_DIRECT', False), (TABLE_R, 'TABLE_R', False), (TABLE_DIRECT_FRAGMENT, 'TABLE_DIRECT_FRAGMENT', True)): - for k, entry in t.iteritems(): + for k, entry in iteritems(t): + if k in skip_for_now: + continue fmt = entry[0] arg = 1 i = 0 From dfbd60231bf6e2d208a399f04a3a494a74199c12 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 20 Sep 2017 19:02:56 -0400 Subject: [PATCH 17/22] Get ready for release 2.12.0 --- ChangeLog | 75 ++++++++++++++++++++++++++++++- NEWS | 11 +++++ __pkginfo__.py | 2 +- uncompyle6/semantics/fragments.py | 42 ++++++++--------- uncompyle6/semantics/pysource.py | 46 +++++++++---------- uncompyle6/version.py | 2 +- 6 files changed, 131 insertions(+), 47 deletions(-) diff --git a/ChangeLog b/ChangeLog index 62c6c8c2..3c10484a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,79 @@ +2017-09-20 rocky + + * uncompyle6/semantics/fragments.py, + uncompyle6/semantics/pysource.py: Tidy pysource and fragments a + little more + +2017-09-20 rocky + + * uncompyle6/semantics/consts.py: Tidy/regularize table entry + formatting + +2017-09-20 rocky + + * test/test_pythonlib.py, uncompyle6/semantics/pysource.py: Small + fixes test_pyenvlib.py: it is sys.exit(), not exit() pysource.py: + reinstate nod type of async_func_call + +2017-09-20 rocky + + * uncompyle6/semantics/consts.py, uncompyle6/semantics/pysource.py: + More small doc changes + +2017-09-20 rocky + + * pytest/test_pysource.py, uncompyle6/semantics/pysource.py: Update + Table-driven info... Start a pysource unit test. + +2017-09-17 rocky + + * uncompyle6/semantics/fragments.py, + uncompyle6/semantics/pysource.py: engine -> template_engine + +2017-09-13 rocky + + * test/Makefile: Need weak-verification on 3.4 for now + +2017-09-10 rocky + + * uncompyle6/semantics/fragments.py: Revert one of the changes + pending a better fix + +2017-09-10 rocky + + * uncompyle6/semantics/fragments.py, + uncompyle6/semantics/pysource.py: More semantic action cleanup + +2017-09-10 rocky + + * uncompyle6/scanners/scanner3.py, uncompyle6/scanners/tok.py: Match + Python 3.4's terms a little names better + +2017-09-09 rocky + + * uncompyle6/scanners/tok.py: Revert last revert + +2017-09-09 rocky + + * uncompyle6/scanners/tok.py: Revert last change + +2017-09-09 rocky + + * uncompyle6/scanners/tok.py: New-style Python classes only, please. + 2017-08-31 rocky - * NEWS, README.rst, uncompyle6/parsers/parse37.py, + * uncompyle6/scanner.py, uncompyle6/scanners/scanner37.py: Skeletal + support for Python 3.7 Largely failing though. + +2017-08-31 rocky + + * README.rst: Remove python versions tag I think it's messing up Pypi's very fussy formatting + +2017-08-31 rocky + + * ChangeLog, NEWS, README.rst, __pkginfo__.py, + uncompyle6/parsers/parse37.py, uncompyle6/semantics/make_function.py, uncompyle6/version.py: Get ready for release 2.11.5 diff --git a/NEWS b/NEWS index d81228c6..25859e85 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,14 @@ +uncompyle6 2.12.0 2017-09-25 + +- Use xdis 3.6.0 or greater now +- Small semantic table cleanups +- Python 3.4's terms a little names better +- Slightly more Python 3.7, but still failing a lot + +uncompyle6 2.11.5 2017-08-31 + +- Skeletal support for Python 3.7 + uncompyle6 2.11.4 2017-08-15 * scanner and parser now allow 3-part version string lookups, diff --git a/__pkginfo__.py b/__pkginfo__.py index a6ea6a67..c46b4023 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -40,7 +40,7 @@ entry_points = { ]} ftp_url = None install_requires = ['spark-parser >= 1.6.1, < 1.7.0', - 'xdis >= 3.5.5, < 3.6.0', 'six'] + 'xdis >= 3.6.0, < 3.7.0', 'six'] license = 'MIT' mailing_list = 'python-debugger@googlegroups.com' modname = 'uncompyle6' diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 5e209b88..f8fdb9d5 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -8,8 +8,8 @@ Creates Python source code from an uncompyle6 abstract syntax tree, and indexes fragments which can be accessed by instruction offset address. -See the comments in pysource for information on the abstract sytax tree -and how semantic actions are written. +See https://github.com/rocky/python-uncompyle6/wiki/Table-driven-semantic-actions. +for a more complete explanation, nicely marked up and with examples. We add some format specifiers here not used in pysource @@ -421,10 +421,10 @@ class FragmentsWalker(pysource.SourceWalker, object): self.write(self.indent, 'if ') self.preorder(node[0]) self.println(':') - self.indentMore() + self.indent_more() node[1].parent = node self.preorder(node[1]) - self.indentLess() + self.indent_less() if_ret_at_end = False if len(node[2][0]) >= 3: @@ -443,17 +443,17 @@ class FragmentsWalker(pysource.SourceWalker, object): prev_stmt_is_if_ret = False if not past_else and not if_ret_at_end: self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() past_else = True n.parent = node self.preorder(n) if not past_else or if_ret_at_end: self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() node[2][1].parent = node self.preorder(node[2][1]) self.set_pos_info(node, start, len(self.f.getvalue())) - self.indentLess() + self.indent_less() self.prune() def n_elifelsestmtr(self, node): @@ -470,20 +470,20 @@ class FragmentsWalker(pysource.SourceWalker, object): node[0].parent = node self.preorder(node[0]) self.println(':') - self.indentMore() + self.indent_more() node[1].parent = node self.preorder(node[1]) - self.indentLess() + self.indent_less() for n in node[2][0]: n[0].type = 'elifstmt' n.parent = node self.preorder(n) self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() node[2][1].parent = node self.preorder(node[2][1]) - self.indentLess() + self.indent_less() self.set_pos_info(node, start, len(self.f.getvalue())) self.prune() @@ -527,7 +527,7 @@ class FragmentsWalker(pysource.SourceWalker, object): self.write(func_name) self.set_pos_info(code_node, start, len(self.f.getvalue())) - self.indentMore() + self.indent_more() start = len(self.f.getvalue()) self.make_function(node, isLambda=False, codeNode=code_node) @@ -537,7 +537,7 @@ class FragmentsWalker(pysource.SourceWalker, object): self.write('\n\n') else: self.write('\n\n\n') - self.indentLess() + self.indent_less() self.prune() # stop recursing def n_list_compr(self, node): @@ -977,9 +977,9 @@ class FragmentsWalker(pysource.SourceWalker, object): self.println(':') # class body - self.indentMore() + self.indent_more() self.build_class(subclass) - self.indentLess() + self.indent_less() self.currentclass = cclass self.set_pos_info(node, start, len(self.f.getvalue())) @@ -1316,7 +1316,7 @@ class FragmentsWalker(pysource.SourceWalker, object): p = self.prec self.prec = 100 - self.indentMore(INDENT_PER_LEVEL) + self.indent_more(INDENT_PER_LEVEL) line_seperator = ',\n' + self.indent sep = INDENT_PER_LEVEL[:-1] start = len(self.f.getvalue()) @@ -1393,7 +1393,7 @@ class FragmentsWalker(pysource.SourceWalker, object): n.parent = node self.set_pos_info(n, start, finish) self.set_pos_info(node, start, finish) - self.indentLess(INDENT_PER_LEVEL) + self.indent_less(INDENT_PER_LEVEL) self.prec = p self.prune() @@ -1429,7 +1429,7 @@ class FragmentsWalker(pysource.SourceWalker, object): else: flat_elems.append(elem) - self.indentMore(INDENT_PER_LEVEL) + self.indent_more(INDENT_PER_LEVEL) if len(node) > 3: line_separator = ',\n' + self.indent else: @@ -1454,7 +1454,7 @@ class FragmentsWalker(pysource.SourceWalker, object): n.parent = node.parent self.set_pos_info(n, start, finish) self.set_pos_info(node, start, finish) - self.indentLess(INDENT_PER_LEVEL) + self.indent_less(INDENT_PER_LEVEL) self.prec = p self.prune() @@ -1498,8 +1498,8 @@ class FragmentsWalker(pysource.SourceWalker, object): self.write('%') self.set_pos_info(node, start, len(self.f.getvalue())) - elif typ == '+': self.indentMore() - elif typ == '-': self.indentLess() + elif typ == '+': self.indent_more() + elif typ == '-': self.indent_less() elif typ == '|': self.write(self.indent) # no longer used, since BUILD_TUPLE_n is pretty printed: elif typ == 'r': recurse_node = True diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index c31890ab..0c38be13 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -350,7 +350,7 @@ class SourceWalker(GenericASTTraversal, object): # MAKE_FUNCTION .. code = node[-3] - self.indentMore() + self.indent_more() for annotate_last in range(len(node)-1, -1, -1): if node[annotate_last] == 'annotate_tuple': break @@ -370,7 +370,7 @@ class SourceWalker(GenericASTTraversal, object): self.write('\n\n') else: self.write('\n\n\n') - self.indentLess() + self.indent_less() self.prune() # stop recursing self.n_mkfunc_annotate = n_mkfunc_annotate @@ -550,10 +550,10 @@ class SourceWalker(GenericASTTraversal, object): super(SourceWalker, self).preorder(node) self.set_pos_info(node) - def indentMore(self, indent=TAB): + def indent_more(self, indent=TAB): self.indent += indent - def indentLess(self, indent=TAB): + def indent_less(self, indent=TAB): self.indent = self.indent[:-len(indent)] def traverse(self, node, indent=None, isLambda=False): @@ -871,9 +871,9 @@ class SourceWalker(GenericASTTraversal, object): self.write(self.indent, 'if ') self.preorder(node[0]) self.println(':') - self.indentMore() + self.indent_more() self.preorder(node[1]) - self.indentLess() + self.indent_less() if_ret_at_end = False if len(return_stmts_node[0]) >= 3: @@ -892,14 +892,14 @@ class SourceWalker(GenericASTTraversal, object): prev_stmt_is_if_ret = False if not past_else and not if_ret_at_end: self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() past_else = True self.preorder(n) if not past_else or if_ret_at_end: self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() self.preorder(return_stmts_node[1]) - self.indentLess() + self.indent_less() self.prune() n_ifelsestmtr2 = n_ifelsestmtr @@ -921,17 +921,17 @@ class SourceWalker(GenericASTTraversal, object): self.write(self.indent, 'elif ') self.preorder(node[0]) self.println(':') - self.indentMore() + self.indent_more() self.preorder(node[1]) - self.indentLess() + self.indent_less() for n in return_stmts_node[0]: n[0].type = 'elifstmt' self.preorder(n) self.println(self.indent, 'else:') - self.indentMore() + self.indent_more() self.preorder(return_stmts_node[1]) - self.indentLess() + self.indent_less() self.prune() def n_import_as(self, node): @@ -972,14 +972,14 @@ class SourceWalker(GenericASTTraversal, object): func_name = code_node.attr.co_name self.write(func_name) - self.indentMore() + self.indent_more() self.make_function(node, isLambda=False, codeNode=code_node) if len(self.param_stack) > 1: self.write('\n\n') else: self.write('\n\n\n') - self.indentLess() + self.indent_less() self.prune() # stop recursing def make_function(self, node, isLambda, nested=1, @@ -1450,9 +1450,9 @@ class SourceWalker(GenericASTTraversal, object): self.println(':') # class body - self.indentMore() + self.indent_more() self.build_class(subclass_code) - self.indentLess() + self.indent_less() self.currentclass = cclass if len(self.param_stack) > 1: @@ -1523,7 +1523,7 @@ class SourceWalker(GenericASTTraversal, object): p = self.prec self.prec = 100 - self.indentMore(INDENT_PER_LEVEL) + self.indent_more(INDENT_PER_LEVEL) sep = INDENT_PER_LEVEL[:-1] self.write('{') line_number = self.line_number @@ -1661,7 +1661,7 @@ class SourceWalker(GenericASTTraversal, object): if sep.startswith(",\n"): self.write(sep[1:]) self.write('}') - self.indentLess(INDENT_PER_LEVEL) + self.indent_less(INDENT_PER_LEVEL) self.prec = p self.prune() @@ -1712,7 +1712,7 @@ class SourceWalker(GenericASTTraversal, object): else: flat_elems.append(elem) - self.indentMore(INDENT_PER_LEVEL) + self.indent_more(INDENT_PER_LEVEL) sep = '' for elem in flat_elems: @@ -1737,7 +1737,7 @@ class SourceWalker(GenericASTTraversal, object): if lastnode.attr == 1 and lastnodetype.startswith('BUILD_TUPLE'): self.write(',') self.write(endchar) - self.indentLess(INDENT_PER_LEVEL) + self.indent_less(INDENT_PER_LEVEL) self.prec = p self.prune() @@ -1812,10 +1812,10 @@ class SourceWalker(GenericASTTraversal, object): if typ == '%': self.write('%') elif typ == '+': self.line_number += 1 - self.indentMore() + self.indent_more() elif typ == '-': self.line_number += 1 - self.indentLess() + self.indent_less() elif typ == '|': self.line_number += 1 self.write(self.indent) diff --git a/uncompyle6/version.py b/uncompyle6/version.py index 5d5aeedc..f6cb5567 100644 --- a/uncompyle6/version.py +++ b/uncompyle6/version.py @@ -1,3 +1,3 @@ # This file is suitable for sourcing inside bash as # well as importing into Python -VERSION='2.11.5' +VERSION='2.12.0' From 114f9795550a791b25ad4d7f6e887b9fbed543b0 Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 26 Sep 2017 09:31:04 -0400 Subject: [PATCH 18/22] Pyton 3.1 Annotation args can be unicode? --- uncompyle6/parsers/parse3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 7df06841..f9ead4fc 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -889,7 +889,8 @@ class Python3Parser(PythonParser): elif lhs == 'annotate_tuple': return not isinstance(tokens[first].attr, tuple) elif lhs == 'kwarg': - return not isinstance(tokens[first].attr, str) + arg = tokens[first].attr + return not (isinstance(arg, str) or isinstance(arg, unicode)) elif lhs == 'while1elsestmt': # if SETUP_LOOP target spans the else part, then this is # not while1else. Also do for whileTrue? From f73f0ba41ca955b76378f1a0f86747f3e7536aff Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 26 Sep 2017 09:43:01 -0400 Subject: [PATCH 19/22] No unicode in Python3. but we need it in Python2. The bug was probably introduced as a result of recent Python code type unteroperability canonicalization --- uncompyle6/parsers/parse3.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index f9ead4fc..91cc304a 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -20,6 +20,7 @@ from __future__ import print_function from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func from uncompyle6.parsers.astnode import AST from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from xdis import PYTHON3 class Python3Parser(PythonParser): @@ -890,7 +891,10 @@ class Python3Parser(PythonParser): return not isinstance(tokens[first].attr, tuple) elif lhs == 'kwarg': arg = tokens[first].attr - return not (isinstance(arg, str) or isinstance(arg, unicode)) + if PYTHON3: + return not isinstance(arg, str) + else: + return not (isinstance(arg, str) or isinstance(arg, unicode)) elif lhs == 'while1elsestmt': # if SETUP_LOOP target spans the else part, then this is # not while1else. Also do for whileTrue? From b51039ac1e55ad24a6ec9b6c6ca3756d3c53b05d Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 26 Sep 2017 09:59:55 -0400 Subject: [PATCH 20/22] Get ready for release 2.12.0 --- ChangeLog | 24 ++++++++++++++++++++++++ NEWS | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3c10484a..de318594 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +2017-09-26 rocky + + * uncompyle6/parsers/parse3.py: No unicode in Python3. but we need it in Python2. The bug was probably introduced as a + result of recent Python code type unteroperability canonicalization + +2017-09-26 rocky + + * uncompyle6/parsers/parse3.py: Pyton 3.1 Annotation args can be + unicode? + +2017-09-25 rocky + + * : Adjust for xdis opcode JUMP_OPS. release 2.12.0 + +2017-09-21 rocky + + * pytest/test_pysource.py: Python 3 compatibility + +2017-09-21 rocky + + * pytest/test_pysource.py, uncompyle6/semantics/consts.py, + uncompyle6/semantics/fragments.py, uncompyle6/semantics/pysource.py: + Unit test for format-specifiers And in the process we catch some small bugs + 2017-09-20 rocky * uncompyle6/semantics/fragments.py, diff --git a/NEWS b/NEWS index 25859e85..624d9543 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -uncompyle6 2.12.0 2017-09-25 +uncompyle6 2.12.0 2017-09-26 - Use xdis 3.6.0 or greater now - Small semantic table cleanups From e7778f83f2527547fda187cac16b22f358399568 Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 26 Sep 2017 10:35:00 -0400 Subject: [PATCH 21/22] Word hacking --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4cd0f1e1..5f957167 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ uncompyle6 ========== A native Python cross-version Decompiler and Fragment Decompiler. -Follows in the tradition of decompyle, uncompyle, and uncompyle2. +The successor to decompyle, uncompyle, and uncompyle2. Introduction From 1d7a3c6444eab5a02d899f789f2a57cfdcbc5a84 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 30 Sep 2017 18:02:35 -0400 Subject: [PATCH 22/22] Document hacky customize arg count better. --- uncompyle6/parser.py | 18 ++++++++++++------ uncompyle6/scanners/scanner2.py | 10 ++++++++-- uncompyle6/scanners/scanner3.py | 8 ++++++-- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 1cb57371..ee18b737 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -44,21 +44,25 @@ class PythonParser(GenericASTBuilder): else: return self.ast_first_offset(ast[0]) - def add_unique_rule(self, rule, opname, count, customize): + def add_unique_rule(self, rule, opname, arg_count, customize): """Add rule to grammar, but only if it hasn't been added previously - opname and count are used in the customize() semantic the actions - to add the semantic action rule. Often, count is not used. + opname and stack_count are used in the customize() semantic + the actions to add the semantic action rule. Stack_count is + used in custom opcodes like MAKE_FUNCTION to indicate how + many arguments it has. Often it is not used. """ if rule not in self.new_rules: # print("XXX ", rule) # debug self.new_rules.add(rule) self.addRule(rule, nop_func) - customize[opname] = count + customize[opname] = arg_count pass return def add_unique_rules(self, rules, customize): - """Add rules (a list of string) to grammar + """Add rules (a list of string) to grammar. Note that + the rules must not be those that set arg_count in the + custom dictionary. """ for rule in rules: if len(rule) == 0: @@ -68,7 +72,9 @@ class PythonParser(GenericASTBuilder): return def add_unique_doc_rules(self, rules_str, customize): - """Add rules (a docstring-like list of rules) to grammar + """Add rules (a docstring-like list of rules) to grammar. + Note that the rules must not be those that set arg_count in the + custom dictionary. """ rules = [r.strip() for r in rules_str.split("\n")] self.add_unique_rules(rules, customize) diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 66957a47..9c2d62b0 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -93,12 +93,18 @@ class Scanner2(Scanner): for instr in bytecode.get_instructions(co): print(instr._disassemble()) - # Container for tokens + # list of tokens/instructions tokens = [] + # "customize" is a dict whose keys are nonterminals + # and the value is the argument stack entries for that + # nonterminal. The count is a little hoaky. It is mostly + # not used, but sometimes it is. + # "customize" is a dict whose keys are nonterminals customize = {} + if self.is_pypy: - customize['PyPy'] = 1 + customize['PyPy'] = 0 Token = self.Token # shortcut diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 5beedb0e..c006dfc4 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -169,12 +169,16 @@ class Scanner3(Scanner): for instr in bytecode.get_instructions(co): print(instr._disassemble()) - # Container for tokens + # list of tokens/instructions tokens = [] + # "customize" is a dict whose keys are nonterminals + # and the value is the argument stack entries for that + # nonterminal. The count is a little hoaky. It is mostly + # not used, but sometimes it is. customize = {} if self.is_pypy: - customize['PyPy'] = 1 + customize['PyPy'] = 0 self.code = array('B', co.co_code) self.build_lines_data(co)