From eba0d37d0f0804a448a2f8cafff42a3c7804ec87 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 30 Apr 2022 05:00:49 -0400 Subject: [PATCH] Improve Python 1.x decompiling Still has bugs, but is much better. --- test/simple_source/bug14/test_builtin.py | 8 + uncompyle6/disas.py | 19 ++- uncompyle6/parser.py | 2 + uncompyle6/parsers/parse14.py | 21 ++- uncompyle6/parsers/parse26.py | 1 + uncompyle6/scanners/scanner26.py | 3 +- uncompyle6/semantics/consts.py | 10 +- uncompyle6/semantics/customize.py | 27 +++- uncompyle6/semantics/make_function1.py | 191 +++++++++++++++++++++++ uncompyle6/semantics/pysource.py | 9 +- 10 files changed, 267 insertions(+), 24 deletions(-) create mode 100644 test/simple_source/bug14/test_builtin.py create mode 100644 uncompyle6/semantics/make_function1.py diff --git a/test/simple_source/bug14/test_builtin.py b/test/simple_source/bug14/test_builtin.py new file mode 100644 index 00000000..81f8abda --- /dev/null +++ b/test/simple_source/bug14/test_builtin.py @@ -0,0 +1,8 @@ +from test_support import * +print '4. Built-in functions' +print 'test_b1' +unload('test_b1') +import test_b1 +print 'test_b2' +unload('test_b2') +import test_b2 diff --git a/uncompyle6/disas.py b/uncompyle6/disas.py index cb8cc7e3..60a21143 100644 --- a/uncompyle6/disas.py +++ b/uncompyle6/disas.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2016, 2818-2021 by Rocky Bernstein +# Copyright (c) 2015-2016, 2818-2022 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -60,11 +60,18 @@ def disco_loop(disasm, queue, real_out): while len(queue) > 0: co = queue.popleft() if co.co_name != "": - print( - "\n# %s line %d of %s" - % (co.co_name, co.co_firstlineno, co.co_filename), - file=real_out, - ) + if hasattr(co, "co_firstlineno"): + print( + "\n# %s line %d of %s" + % (co.co_name, co.co_firstlineno, co.co_filename), + file=real_out, + ) + else: + print( + "\n# %s of %s" + % (co.co_name, co.co_filename), + file=real_out, + ) tokens, customize = disasm(co) for t in tokens: if iscode(t.pattr): diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 785b0a45..d1027f4e 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -75,6 +75,8 @@ class PythonParser(GenericASTBuilder): "come_from_loops", # Python 3.7+ "importlist37", + # Python < 1.4 + "args_store", ] self.collect = frozenset(nt_list) diff --git a/uncompyle6/parsers/parse14.py b/uncompyle6/parsers/parse14.py index 250f37f4..86bc2ef6 100644 --- a/uncompyle6/parsers/parse14.py +++ b/uncompyle6/parsers/parse14.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 Rocky Bernstein +# Copyright (c) 2018, 2022 Rocky Bernstein from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from uncompyle6.parser import PythonParserSingle @@ -8,11 +8,24 @@ class Python14Parser(Python15Parser): def p_misc14(self, args): """ - # Not much here yet, but will probably need to add UNARY_CALL, BINARY_CALL, - # RAISE_EXCEPTION, BUILD_FUNCTION, UNPACK_ARG, UNPACK_VARARG, LOAD_LOCAL, - # SET_FUNC_ARGS, and RESERVE_FAST + # Not much here yet, but will probably need to add UNARY_CALL, + # LOAD_LOCAL, SET_FUNC_ARGS + + call ::= expr tuple BINARY_CALL + expr ::= call + kv ::= DUP_TOP expr ROT_TWO LOAD_CONST STORE_SUBSCR + mkfunc ::= LOAD_CODE BUILD_FUNCTION + print_expr_stmt ::= expr PRINT_EXPR + raise_stmt2 ::= expr expr RAISE_EXCEPTION + star_args ::= RESERVE_FAST UNPACK_VARARG_1 args_store + args ::= RESERVE_FAST UNPACK_ARG args_store + stmt ::= print_expr_stmt + args_store ::= STORE_FAST+ + stmt ::= args + stmt ::= star_args # Not strictly needed, but tidies up output + stmt ::= doc_junk doc_junk ::= LOAD_CONST POP_TOP diff --git a/uncompyle6/parsers/parse26.py b/uncompyle6/parsers/parse26.py index 2d95a5d8..d353c571 100644 --- a/uncompyle6/parsers/parse26.py +++ b/uncompyle6/parsers/parse26.py @@ -428,6 +428,7 @@ class Python26Parser(Python2Parser): # since the operand can be a relative offset rather than # an absolute offset. setup_inst = self.insts[self.offset2inst_index[tokens[first].offset]] + last = min(len(tokens)-1, last) if self.version <= (2, 2) and tokens[last] == "COME_FROM": last += 1 return tokens[last-1].off2int() > setup_inst.argval diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index e1317aa8..6e040c2e 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -228,7 +228,8 @@ class Scanner26(scan.Scanner2): elif op in self.opc.JABS_OPS: pattr = repr(oparg) elif op in self.opc.LOCAL_OPS: - pattr = varnames[oparg] + if oparg in varnames: + pattr = varnames[oparg] elif op in self.opc.COMPARE_OPS: pattr = self.opc.cmp_op[oparg] elif op in self.opc.FREE_OPS: diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index e81cae1b..43ebbf5a 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -199,6 +199,9 @@ TABLE_DIRECT = { "BINARY_AND": ("&",), "BINARY_OR": ("|",), "BINARY_XOR": ("^",), + "DELETE_FAST": ("%|del %{pattr}\n",), + "DELETE_NAME": ("%|del %{pattr}\n",), + "DELETE_GLOBAL": ("%|del %{pattr}\n",), "INPLACE_ADD": ("+=",), "INPLACE_SUBTRACT": ("-=",), "INPLACE_MULTIPLY": ("*=",), @@ -215,8 +218,6 @@ TABLE_DIRECT = { "INPLACE_XOR": ("^=",), # bin_op (formerly "binary_expr") is the Python AST BinOp "bin_op": ("%c %c %c", 0, (-1, "binary_operator"), (1, "expr")), - "UNARY_POSITIVE": ("+",), - "UNARY_NEGATIVE": ("-",), "UNARY_INVERT": ("~"), # unary_op (formerly "unary_expr") is the Python AST UnaryOp "unary_op": ("%c%c", (1, "unary_operator"), (0, "expr")), @@ -238,9 +239,6 @@ TABLE_DIRECT = { "LOAD_DEREF": ("%{pattr}",), "LOAD_LOCALS": ("locals()",), "LOAD_ASSERT": ("%{pattr}",), - "DELETE_FAST": ("%|del %{pattr}\n",), - "DELETE_NAME": ("%|del %{pattr}\n",), - "DELETE_GLOBAL": ("%|del %{pattr}\n",), "delete_subscript": ( "%|del %p[%c]\n", (0, "expr", PRECEDENCE["subscript"]), @@ -264,6 +262,8 @@ TABLE_DIRECT = { "STORE_NAME": ("%{pattr}",), "STORE_GLOBAL": ("%{pattr}",), "STORE_DEREF": ("%{pattr}",), + "UNARY_POSITIVE": ("+",), + "UNARY_NEGATIVE": ("-",), "unpack": ("%C%,", (1, maxint, ", ")), # This nonterminal we create on the fly in semantic routines "unpack_w_parens": ("(%C%,)", (1, maxint, ", ")), diff --git a/uncompyle6/semantics/customize.py b/uncompyle6/semantics/customize.py index e134a72e..f1f31f95 100644 --- a/uncompyle6/semantics/customize.py +++ b/uncompyle6/semantics/customize.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2019, 2021 by Rocky Bernstein +# Copyright (c) 2018-2019, 2021-2022 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ """ from uncompyle6.parsers.treenode import SyntaxTree -from uncompyle6.semantics.consts import INDENT_PER_LEVEL, PRECEDENCE, TABLE_R, TABLE_DIRECT +from uncompyle6.semantics.consts import INDENT_PER_LEVEL, NO_PARENTHESIS_EVER, PRECEDENCE, TABLE_R, TABLE_DIRECT from uncompyle6.semantics.helper import flatten_list from uncompyle6.scanners.tok import Token @@ -47,7 +47,7 @@ def customize_for_version(self, is_pypy, version): if version[:2] >= (3, 7): def n_call_kw_pypy37(node): - self.template_engine(("%p(", (0, 100)), node) + self.template_engine(("%p(", (0, NO_PARENTHESIS_EVER)), node) assert node[-1] == "CALL_METHOD_KW" arg_count = node[-1].attr kw_names = node[-2] @@ -193,7 +193,26 @@ def customize_for_version(self, is_pypy, version): self.prune() self.n_iftrue_stmt24 = n_iftrue_stmt24 - else: # version <= 2.3: + elif version <= (1, 4): + TABLE_DIRECT.update( + { + "call": ( + "%p(%P)", + (0, "expr", 100), (1,-1,", ") + ), + "print_expr_stmt": ( + ("%|print %c,\n", 0) + ), + } + ) + + # FIXME: figure out how to handle LOAD_FAST + # it uses code.names + # def n_LOAD_FAST(node): + # pass + # self.n_LOAD_FAST = n_LOAD_FAST + + else: # 1.0 <= version <= 2.3: TABLE_DIRECT.update({"if1_stmt": ("%|if 1\n%+%c%-", 5)}) if version <= (2, 1): TABLE_DIRECT.update( diff --git a/uncompyle6/semantics/make_function1.py b/uncompyle6/semantics/make_function1.py new file mode 100644 index 00000000..ecf10049 --- /dev/null +++ b/uncompyle6/semantics/make_function1.py @@ -0,0 +1,191 @@ +# Copyright (c) 2015-2022 by Rocky Bernstein +# Copyright (c) 2000-2002 by hartmut Goebel +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +All the crazy things we have to do to handle Python functions in Python before 3.0. +The saga of changes continues in 3.0 and above and in other files. +""" +from typing import List, Tuple +from uncompyle6.scanner import Code +from uncompyle6.semantics.parser_error import ParserError +from uncompyle6.parser import ParserError as ParserError2 +from uncompyle6.semantics.helper import ( + print_docstring, + find_all_globals, + find_globals_and_nonlocals, + find_none, +) +from xdis import iscode + +def make_function1(self, node, is_lambda, nested=1, code_node=None): + """ + Dump function defintion, doc string, and function body. + This code is specialied for Python 2. + """ + + def build_param(tree, param_names: List[str]) -> Tuple[bool, List[str]]: + """build parameters: + - handle defaults + - handle format tuple parameters + """ + # if formal parameter is a tuple, the paramater name + # starts with a dot (eg. '.1', '.2') + args = tree[0] + del tree[0] + params = [] + assert args.kind in ("star_args", "args") + has_star_arg = args.kind == "star_args" + args_store = args[2] + assert args_store == "args_store" + for arg in args_store: + params.append(param_names[arg.attr]) + return has_star_arg, params + + # MAKE_FUNCTION_... or MAKE_CLOSURE_... + assert node[-1].kind.startswith("BUILD_") + + defparams = [] + # args_node = node[-1] + # if isinstance(args_node.attr, tuple): + # # positional args are after kwargs + # defparams = node[1 : args_node.attr[0] + 1] + # pos_args, kw_args, annotate_argc = args_node.attr + # else: + # defparams = node[: args_node.attr] + # kw_args = 0 + # pass + + lambda_index = None + + if lambda_index and is_lambda and iscode(node[lambda_index].attr): + assert node[lambda_index].kind == "LOAD_LAMBDA" + code = node[lambda_index].attr + else: + code = code_node.attr + + assert iscode(code) + code = Code(code, self.scanner, self.currentclass) + + # add defaults values to parameter names + argc = code.co_argcount + paramnames = list(code.co_varnames[:argc]) + + # defaults are for last n parameters, thus reverse + paramnames.reverse() + defparams.reverse() + + try: + tree = self.build_ast( + code._tokens, + code._customize, + code, + is_lambda=is_lambda, + noneInNames=("None" in code.co_names), + ) + except (ParserError, ParserError2) as p: + self.write(str(p)) + if not self.tolerate_errors: + self.ERROR = p + return + + indent = self.indent + + # build parameters + has_star_arg, params = build_param(tree, code.co_names) + + if has_star_arg: + params[-1] = "*" + params[-1] + + # dump parameter list (with default values) + if is_lambda: + self.write("lambda ", ", ".join(params)) + # If the last statement is None (which is the + # same thing as "return None" in a lambda) and the + # next to last statement is a "yield". Then we want to + # drop the (return) None since that was just put there + # to have something to after the yield finishes. + # FIXME: this is a bit hoaky and not general + if ( + len(ast) > 1 + and self.traverse(ast[-1]) == "None" + and self.traverse(ast[-2]).strip().startswith("yield") + ): + del ast[-1] + # Now pick out the expr part of the last statement + ast_expr = ast[-1] + while ast_expr.kind != "expr": + ast_expr = ast_expr[0] + ast[-1] = ast_expr + pass + else: + self.write("(", ", ".join(params)) + + # if kw_args > 0: + # if not (4 & code.co_flags): + # if argc > 0: + # self.write(", *, ") + # else: + # self.write("*, ") + # pass + # else: + # self.write(", ") + + # for n in node: + # if n == "pos_arg": + # continue + # else: + # self.preorder(n) + # break + # pass + + # if code_has_star_star_arg(code): + # if argc > 0: + # self.write(", ") + # self.write("**%s" % code.co_varnames[argc + kw_pairs]) + + if is_lambda: + self.write(": ") + else: + self.println("):") + + if ( + len(code.co_consts) > 0 and code.co_consts[0] is not None and not is_lambda + ): # ugly + # docstring exists, dump it + print_docstring(self, indent, code.co_consts[0]) + + if not is_lambda: + assert tree == "stmts" + + all_globals = find_all_globals(tree, set()) + + globals, nonlocals = find_globals_and_nonlocals( + tree, set(), set(), code, self.version + ) + + # Python 1 doesn't support the "nonlocal" statement + + for g in sorted((all_globals & self.mod_globs) | globals): + self.println(self.indent, "global ", g) + self.mod_globs -= all_globals + has_none = "None" in code.co_names + rn = has_none and not find_none(tree) + tree.code = code + self.gen_source( + tree, code.co_name, code._customize, is_lambda=is_lambda, returnNone=rn + ) + + code._tokens = None # save memory + code._customize = None # save memory diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 52a7014e..4ded56c2 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -143,6 +143,7 @@ from uncompyle6.scanner import Code, get_scanner import uncompyle6.parser as python_parser from uncompyle6.semantics.check_ast import checker +from uncompyle6.semantics.make_function1 import make_function1 from uncompyle6.semantics.make_function2 import make_function2 from uncompyle6.semantics.make_function3 import make_function3 from uncompyle6.semantics.make_function36 import make_function36 @@ -151,9 +152,7 @@ from uncompyle6.semantics.customize import customize_for_version from uncompyle6.semantics.gencomp import ComprehensionMixin from uncompyle6.semantics.helper import ( print_docstring, - find_code_node, find_globals_and_nonlocals, - flatten_list, ) from uncompyle6.scanners.tok import Token @@ -176,7 +175,6 @@ from uncompyle6.semantics.consts import ( TAB, TABLE_R, escape, - minint, ) @@ -539,7 +537,9 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): # Python changes make function this much that we need at least 3 different routines, # and probably more in the future. def make_function(self, node, is_lambda, nested=1, code_node=None, annotate=None): - if self.version <= (2, 7): + if self.version <= (1, 2): + make_function1(self, node, is_lambda, nested, code_node) + elif self.version <= (2, 7): make_function2(self, node, is_lambda, nested, code_node) elif (3, 0) <= self.version <= (3, 5): make_function3(self, node, is_lambda, nested, code_node) @@ -994,6 +994,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): result = "(%s)" % result return result # return self.traverse(node[1]) + return f"({name}" raise Exception("Can't find tuple parameter " + name) def build_class(self, code):