From b3ddf95d7a7fe693535ec8a3ff22f62d90e47419 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 12 Aug 2023 07:12:10 -0400 Subject: [PATCH 01/24] comprehension in lambda for 3.0 & 3.1 --- .../03_comprehension_in_lambda.pyc | Bin 0 -> 1021 bytes uncompyle6/parsers/parse3.py | 14 ++++++++++++++ uncompyle6/parsers/parse30.py | 4 ++-- 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 test/bytecode_3.1_run/03_comprehension_in_lambda.pyc diff --git a/test/bytecode_3.1_run/03_comprehension_in_lambda.pyc b/test/bytecode_3.1_run/03_comprehension_in_lambda.pyc new file mode 100644 index 0000000000000000000000000000000000000000..983f747dbcb7d8afde08fca70fcab3c74b88eb5d GIT binary patch literal 1021 zcmcgq$xg#C5S=6?EMmE#_JZIRiLERWmkNP^D;&xR39K|Wt<|I{c8!F93-AT-ar_Q% zlCnu$070aAo{VSw#?Ny!IW@5qKg)zZd=>aRFxvu1q7$G;4?v$}K(a#ZfbKmy@kn_b ztjQ7T`eZk0;L{yK;aAzH3`hmk4auIpW{y%b8Uu}+0~~j>o$s5xZQ^Vmu2t#za?(*r zSNGZriH8*ac=cg>Fgp!EkLd1(XdhiQBi?^Y=EMrD39~hTE;3JN=oXE__vp!|i*Q({ zuaOeLDB?=Tuu^y9ST19u<5sHH%56l*tplvi`l%8&cP3G*E!SS(T3oGfh$PSYMs-xr z>bxg(Po#0ylJQFa%1xlGA$2m~ob8YgVWRONGhg5nW&QV*EOMjlpd`dUX=c()tpf(W z{F`#$mYI(zWS)i7DP^8ynW6FHP-E=;y}bNd4Xa9ZS?hmQw;<$u9!5NPZWJJd%o8CB zKf4nA^o}QY*IH!;n2!Tva^rXsu#MK=AH48>I%vhKXSsB#+U8~~2SUH<&-zvG1>=Cv AjQ{`u literal 0 HcmV?d00001 diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index e43f07b1..2d68da9c 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -81,9 +81,17 @@ class Python3Parser(PythonParser): set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST + set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter + JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER + set_comp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter + JUMP_BACK RETURN_VALUE RETURN_LAST + set_comp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter + JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter COME_FROM JUMP_BACK RETURN_VALUE RETURN_LAST + set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter + COME_FROM JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER comp_body ::= dict_comp_body comp_body ::= set_comp_body @@ -101,6 +109,12 @@ class Python3Parser(PythonParser): stmt ::= dict_comp_func dict_comp_func ::= BUILD_MAP_0 LOAD_ARG FOR_ITER store comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST + dict_comp_func ::= BUILD_MAP_0 LOAD_ARG FOR_ITER store + comp_iter JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER + dict_comp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store + comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST + dict_comp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store + comp_iter JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER comp_iter ::= comp_if_not comp_if_not ::= expr jmp_true comp_iter diff --git a/uncompyle6/parsers/parse30.py b/uncompyle6/parsers/parse30.py index f1d2546c..03dc7e6d 100644 --- a/uncompyle6/parsers/parse30.py +++ b/uncompyle6/parsers/parse30.py @@ -77,11 +77,11 @@ class Python30Parser(Python31Parser): set_comp_func ::= set_comp_header LOAD_ARG FOR_ITER store comp_iter - JUMP_BACK COME_FROM POP_TOP JUMP_BACK + JUMP_BACK RETURN_VALUE RETURN_LAST set_comp_func ::= set_comp_header LOAD_ARG FOR_ITER store comp_iter - JUMP_BACK COME_FROM POP_TOP JUMP_BACK + JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER list_comp_header ::= BUILD_LIST_0 DUP_TOP STORE_FAST From c0957d956f1ee5a011879f5efecd13fc03c7a0cf Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 13 Aug 2023 07:25:12 -0400 Subject: [PATCH 02/24] Simpilfy grammar via ending_return --- ....pyc => 07_for_if_else-continue.pyc-notyet} | Bin uncompyle6/parser.py | 3 +++ uncompyle6/parsers/parse27.py | 8 ++------ uncompyle6/parsers/parse3.py | 14 ++++---------- uncompyle6/parsers/parse30.py | 12 ++---------- uncompyle6/parsers/parse37.py | 17 +++++++---------- uncompyle6/parsers/parse38.py | 6 ++++-- 7 files changed, 22 insertions(+), 38 deletions(-) rename test/bytecode_2.7_run/{07_for_if_else-continue.pyc => 07_for_if_else-continue.pyc-notyet} (100%) diff --git a/test/bytecode_2.7_run/07_for_if_else-continue.pyc b/test/bytecode_2.7_run/07_for_if_else-continue.pyc-notyet similarity index 100% rename from test/bytecode_2.7_run/07_for_if_else-continue.pyc rename to test/bytecode_2.7_run/07_for_if_else-continue.pyc-notyet diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 8b91f040..7873cd08 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -304,6 +304,9 @@ class PythonParser(GenericASTBuilder): c_stmts ::= lastc_stmt c_stmts ::= continues + ending_return ::= RETURN_VALUE RETURN_LAST + ending_return ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER + lastc_stmt ::= iflaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtc diff --git a/uncompyle6/parsers/parse27.py b/uncompyle6/parsers/parse27.py index 06498caa..eee5ef06 100644 --- a/uncompyle6/parsers/parse27.py +++ b/uncompyle6/parsers/parse27.py @@ -39,14 +39,10 @@ class Python27Parser(Python2Parser): stmt ::= dict_comp_func dict_comp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store - comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST - dict_comp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store - comp_iter JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER + comp_iter JUMP_BACK ending_return set_comp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter - JUMP_BACK RETURN_VALUE RETURN_LAST - set_comp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter - JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER + JUMP_BACK ending_return comp_iter ::= comp_if_not comp_if_not ::= expr jmp_true comp_iter diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 2d68da9c..290632f8 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -79,19 +79,13 @@ class Python3Parser(PythonParser): stmt ::= set_comp_func + # TODO this can be simplified set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter - JUMP_BACK RETURN_VALUE RETURN_LAST - set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter - JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER + JUMP_BACK ending_return set_comp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter - JUMP_BACK RETURN_VALUE RETURN_LAST - set_comp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter - JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER - + JUMP_BACK ending_return set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter - COME_FROM JUMP_BACK RETURN_VALUE RETURN_LAST - set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter - COME_FROM JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER + COME_FROM JUMP_BACK ending_return comp_body ::= dict_comp_body comp_body ::= set_comp_body diff --git a/uncompyle6/parsers/parse30.py b/uncompyle6/parsers/parse30.py index 03dc7e6d..b36f8d41 100644 --- a/uncompyle6/parsers/parse30.py +++ b/uncompyle6/parsers/parse30.py @@ -77,12 +77,8 @@ class Python30Parser(Python31Parser): set_comp_func ::= set_comp_header LOAD_ARG FOR_ITER store comp_iter - JUMP_BACK + JUMP_BACK ending_return RETURN_VALUE RETURN_LAST - set_comp_func ::= set_comp_header - LOAD_ARG FOR_ITER store comp_iter - JUMP_BACK - RETURN_VALUE_LAMBDA LAMBDA_MARKER list_comp_header ::= BUILD_LIST_0 DUP_TOP STORE_FAST list_comp ::= list_comp_header @@ -112,11 +108,7 @@ class Python30Parser(Python31Parser): dict_comp_func ::= BUILD_MAP_0 DUP_TOP STORE_FAST LOAD_ARG FOR_ITER store - dict_comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST - dict_comp_func ::= BUILD_MAP_0 - DUP_TOP STORE_FAST - LOAD_ARG FOR_ITER store - dict_comp_iter JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER + dict_comp_iter JUMP_BACK ending_return stmt ::= try_except30 try_except30 ::= SETUP_EXCEPT suite_stmts_opt diff --git a/uncompyle6/parsers/parse37.py b/uncompyle6/parsers/parse37.py index ad2e6248..a1994543 100644 --- a/uncompyle6/parsers/parse37.py +++ b/uncompyle6/parsers/parse37.py @@ -62,6 +62,9 @@ class Python37Parser(Python37BaseParser): c_stmts ::= lastc_stmt c_stmts ::= continues + ending_return ::= RETURN_VALUE RETURN_LAST + ending_return ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER + lastc_stmt ::= iflaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtc @@ -739,15 +742,11 @@ class Python37Parser(Python37BaseParser): stmt ::= set_comp_func + # TODO: simplify this set_comp_func ::= BUILD_SET_0 LOAD_ARG for_iter store comp_iter - JUMP_BACK RETURN_VALUE RETURN_LAST + JUMP_BACK ending_return set_comp_func ::= BUILD_SET_0 LOAD_ARG for_iter store comp_iter - JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER - - set_comp_func ::= BUILD_SET_0 LOAD_ARG for_iter store comp_iter - COME_FROM JUMP_BACK RETURN_VALUE RETURN_LAST - set_comp_func ::= BUILD_SET_0 LOAD_ARG for_iter store comp_iter - COME_FROM JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER + COME_FROM JUMP_BACK ending_return comp_body ::= dict_comp_body comp_body ::= set_comp_body @@ -763,9 +762,7 @@ class Python37Parser(Python37BaseParser): stmt ::= dict_comp_func dict_comp_func ::= BUILD_MAP_0 LOAD_ARG for_iter store - comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST - dict_comp_func ::= BUILD_MAP_0 LOAD_ARG for_iter store - comp_iter JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER + comp_iter JUMP_BACK ending_return comp_iter ::= comp_if comp_iter ::= comp_if_not diff --git a/uncompyle6/parsers/parse38.py b/uncompyle6/parsers/parse38.py index e36e65d8..1d62ce0d 100644 --- a/uncompyle6/parsers/parse38.py +++ b/uncompyle6/parsers/parse38.py @@ -121,7 +121,8 @@ class Python38Parser(Python37Parser): for38 ::= expr get_for_iter store for_block forelsestmt38 ::= expr get_for_iter store for_block POP_BLOCK else_suite - forelsestmt38 ::= expr get_for_iter store for_block JUMP_BACK _come_froms else_suite + forelsestmt38 ::= expr get_for_iter store for_block JUMP_BACK _come_froms + else_suite forelselaststmt38 ::= expr get_for_iter store for_block POP_BLOCK else_suitec forelselaststmtl38 ::= expr get_for_iter store for_block POP_BLOCK else_suitel @@ -130,7 +131,8 @@ class Python38Parser(Python37Parser): except_return_value ::= POP_BLOCK return except_return_value ::= expr POP_BLOCK RETURN_VALUE - whilestmt38 ::= _come_froms testexpr l_stmts_opt COME_FROM JUMP_BACK POP_BLOCK + whilestmt38 ::= _come_froms testexpr l_stmts_opt COME_FROM JUMP_BACK + POP_BLOCK whilestmt38 ::= _come_froms testexpr l_stmts_opt JUMP_BACK POP_BLOCK whilestmt38 ::= _come_froms testexpr l_stmts_opt JUMP_BACK come_froms whilestmt38 ::= _come_froms testexpr returns POP_BLOCK From 9829e0461171ea286d8a7cc6209f077cbf9c4841 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 17 Aug 2023 19:33:47 -0400 Subject: [PATCH 03/24] Bug in collection printing ... `"%s" % value` can fail if value is a tuple --- uncompyle6/semantics/n_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncompyle6/semantics/n_actions.py b/uncompyle6/semantics/n_actions.py index 0747202b..a1768329 100644 --- a/uncompyle6/semantics/n_actions.py +++ b/uncompyle6/semantics/n_actions.py @@ -272,7 +272,7 @@ class NonterminalActions: if self.version < (3, 0, 0): value = "%r" % elem.pattr else: - value = "%s" % elem.pattr + value = "%s" % str(elem.pattr) else: assert elem.kind == "ADD_VALUE_VAR" value = "%s" % elem.pattr From 20c58e2e2a8d77e7cecb225ea8b22191e5c9c608 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 26 Aug 2023 14:15:23 -0400 Subject: [PATCH 04/24] Small semantic action acceptance change --- uncompyle6/semantics/customize38.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncompyle6/semantics/customize38.py b/uncompyle6/semantics/customize38.py index 94e62070..10a83b80 100644 --- a/uncompyle6/semantics/customize38.py +++ b/uncompyle6/semantics/customize38.py @@ -128,7 +128,7 @@ def customize_for_version38(self, version): "whilestmt38": ( "%|while %c:\n%+%c%-\n\n", (1, ("bool_op", "testexpr", "testexprc")), - (2, ("l_stmts", "pass")), + (2, ("l_stmts", "l_stmts_opt", "pass")), ), "whileTruestmt38": ( "%|while True:\n%+%c%-\n\n", From 803678e9b46cfba2a30e13f8684379c94800e345 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 26 Aug 2023 14:39:42 -0400 Subject: [PATCH 05/24] Track recent xdis changes --- uncompyle6/scanners/scanner26.py | 16 +++++++++------- uncompyle6/scanners/scanner37base.py | 13 +++++++++---- uncompyle6/util.py | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 9c77476c..35ff5ff6 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -48,8 +48,7 @@ class Scanner26(scan.Scanner2): return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - """ - Create "tokens" the bytecode of an Python code object. Largely these + """Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing easier. returning a list of uncompyle6 Token's. @@ -57,14 +56,17 @@ class Scanner26(scan.Scanner2): Some transformations are made to assist the deparsing grammar: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - operands with stack argument counts or flag masks are appended to the opcode name, e.g.: + - operands with stack argument counts or flag masks are appended to the + opcode name, e.g.: * BUILD_LIST, BUILD_SET - * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional + arguments - EXTENDED_ARGS instructions are removed - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. + Also, when we encounter certain tokens, we add them to a set + which will cause custom grammar rules. Specifically, variable + arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific + rules for the specific number of arguments they take. """ if not show_asm: diff --git a/uncompyle6/scanners/scanner37base.py b/uncompyle6/scanners/scanner37base.py index bc27e41a..aa3e934f 100644 --- a/uncompyle6/scanners/scanner37base.py +++ b/uncompyle6/scanners/scanner37base.py @@ -221,7 +221,7 @@ class Scanner37Base(Scanner): if show_asm in ("both", "before"): print("\n# ---- before tokenization:") - bytecode.disassemble_bytes( + self.insts = bytecode.disassemble_bytes( co.co_code, varnames=co.co_varnames, names=co.co_names, @@ -229,6 +229,9 @@ class Scanner37Base(Scanner): cells=bytecode._cell_names, linestarts=bytecode._linestarts, asm_format="extended", + filename=co.co_filename, + show_source=True, + first_line_number=co.co_firstlineno, ) # "customize" is in the process of going away here @@ -302,6 +305,8 @@ class Scanner37Base(Scanner): inst.starts_line, inst.is_jump_target, inst.has_extended_arg, + None, + None, ) # Get jump targets @@ -348,9 +353,9 @@ class Scanner37Base(Scanner): j = tokens_append( j, Token( - come_from_name, - jump_offset, - repr(jump_offset), + opname=come_from_name, + attr=jump_offset, + pattr=repr(jump_offset), offset="%s_%s" % (inst.offset, jump_idx), has_arg=True, opc=self.opc, diff --git a/uncompyle6/util.py b/uncompyle6/util.py index 79b4fbe6..129e7666 100644 --- a/uncompyle6/util.py +++ b/uncompyle6/util.py @@ -3,7 +3,7 @@ # More could be done here though. from math import copysign -from xdis.codetype import UnicodeForPython3 +from xdis.cross_types import UnicodeForPython3 from xdis.version_info import PYTHON_VERSION_TRIPLE def get_code_name(code) -> str: From 34ef91312ed6d365ff9018ee0af3e3c8f3076951 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 3 Sep 2023 09:36:06 -0400 Subject: [PATCH 06/24] Revise to not zip attachment expected --- .github/ISSUE_TEMPLATE/bug-report.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index 2cd81f4e..60c6cd6a 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -9,10 +9,10 @@ The issue may be flagged to make it easier for those looking for illegal activit If you are reporting a bug in decompilation, it will probably not be acted upon unless it is narrowed to a small example. You may have to do some work remove -extraneous code from the source example. Most bugs can be expressed in 30 lines of +extraneous code from the source example. Most bugs can be expressed in 30 lines of code. -Bugs are not for asking questions about a problem you +Issues are not for asking questions about a problem you are trying to solve that involve the use of uncompyle6 along the way, although I may be more tolerant of this if you sponsor the project. @@ -57,7 +57,7 @@ Prerequisites/Caveats disassembler and produces valid results. * Try to make the bytecode that exhibits a bug as small as possible. * Don't put bytecode and corresponding source code on any service that - requires registration to download. + requires registration to download. Instead attach it as a zip file. * When you open a bug report there is no privacy. If you need privacy, then contact me by email and explain who you are and the need for privacy. But be mindful that you may be asked to sponsor the project for the @@ -86,7 +86,7 @@ $ uncompyle6 $ ``` -Provide links to the Python bytecode. For example, you can create a +Attach a zip file to the Python bytecode or a gist with the information. If you have the correct source code, you can add that too. From 0c18d3504312ca0f3d9d5d1bca2d8bc0ae38ee36 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 29 Sep 2023 22:08:28 -0400 Subject: [PATCH 07/24] Bump python master version default --- admin-tools/setup-master.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/admin-tools/setup-master.sh b/admin-tools/setup-master.sh index 181f857e..706e81fd 100755 --- a/admin-tools/setup-master.sh +++ b/admin-tools/setup-master.sh @@ -1,5 +1,5 @@ #!/bin/bash -PYTHON_VERSION=3.8.17 +PYTHON_VERSION=3.8.18 function checkout_version { local repo=$1 From 0ea75cadca37e93d8a18225581f1ca541ef50428 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 6 Oct 2023 02:44:41 -0400 Subject: [PATCH 08/24] Small bit of linting --- uncompyle6/scanner.py | 25 +++++++++---------------- uncompyle6/scanners/scanner15.py | 25 ++++++++++++++++--------- uncompyle6/scanners/scanner37base.py | 19 +++++++++++-------- 3 files changed, 36 insertions(+), 33 deletions(-) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 811f1917..f8fcebfb 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016, 2018-2022 by Rocky Bernstein +# Copyright (c) 2016, 2018-2023 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -21,7 +21,8 @@ scanner/ingestion module. From here we call various version-specific scanners, e.g. for Python 2.7 or 3.4. """ -from typing import Optional, Tuple +from types import ModuleType +from typing import Optional, Tuple, Union from array import array from collections import namedtuple @@ -101,12 +102,15 @@ class Code(object): self._tokens, self._customize = scanner.ingest(co, classname, show_asm=show_asm) -class Scanner(object): +class Scanner: def __init__(self, version: tuple, show_asm=None, is_pypy=False): self.version = version self.show_asm = show_asm self.is_pypy = is_pypy + # Temoorary initialization. + self.opc = ModuleType("uninitialized") + if version[:2] in PYTHON_VERSIONS: v_str = f"""opcode_{version_tuple_to_str(version, start=0, end=2, delimiter="")}""" if is_pypy: @@ -319,15 +323,6 @@ class Scanner(object): def next_offset(self, op, offset: int) -> int: return xdis.next_offset(op, self.opc, offset) - def print_bytecode(self): - for i in self.op_range(0, len(self.code)): - op = self.code[i] - if op in self.JUMP_OPS: - dest = self.get_target(i, op) - print("%i\t%s\t%i" % (i, self.opname[op], dest)) - else: - print("%i\t%s\t" % (i, self.opname[op])) - def first_instr(self, start: int, end: int, instr, target=None, exact=True): """ Find the first in the block from start to end. @@ -483,7 +478,6 @@ class Scanner(object): result = [] extended_arg = 0 for offset in self.op_range(start, end): - op = code[offset] if op == self.opc.EXTENDED_ARG: @@ -542,7 +536,6 @@ class Scanner(object): offset = inst.offset continue if last_was_extarg: - # j = self.stmts.index(inst.offset) # self.lines[j] = offset @@ -595,7 +588,7 @@ class Scanner(object): target = parent["end"] return target - def setTokenClass(self, tokenClass) -> Token: + def setTokenClass(self, tokenClass: Token) -> Token: self.Token = tokenClass return self.Token @@ -621,7 +614,7 @@ def parse_fn_counts_30_35(argc: int) -> Tuple[int, int, int]: return ((argc & 0xFF), (argc >> 8) & 0xFF, annotate_count) -def get_scanner(version, is_pypy=False, show_asm=None): +def get_scanner(version: Union[str, tuple], is_pypy=False, show_asm=None) -> Scanner: # If version is a string, turn that into the corresponding float. if isinstance(version, str): diff --git a/uncompyle6/scanners/scanner15.py b/uncompyle6/scanners/scanner15.py index 2df854fe..179dc086 100644 --- a/uncompyle6/scanners/scanner15.py +++ b/uncompyle6/scanners/scanner15.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2018, 2021-2022 by Rocky Bernstein +# Copyright (c) 2016-2018, 2021-2023 by Rocky Bernstein """ Python 1.5 bytecode decompiler massaging. @@ -7,12 +7,15 @@ grammar parsing. """ import uncompyle6.scanners.scanner21 as scan + # from uncompyle6.scanners.scanner26 import ingest as ingest26 # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_15 + JUMP_OPS = opcode_15.JUMP_OPS + # We base this off of 2.2 instead of the other way around # because we cleaned things up this way. # The history is that 2.7 support is the cleanest, @@ -23,7 +26,7 @@ class Scanner15(scan.Scanner21): self.opc = opcode_15 self.opname = opcode_15.opname self.version = (1, 5) - self.genexpr_name = '' + self.genexpr_name = "" return def ingest(self, co, classname=None, code_objects={}, show_asm=None): @@ -36,18 +39,22 @@ class Scanner15(scan.Scanner21): Some transformations are made to assist the deparsing grammar: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - operands with stack argument counts or flag masks are appended to the opcode name, e.g.: + - operands with stack argument counts or flag masks are appended to the + opcode name, e.g.: * BUILD_LIST, BUILD_SET - * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional + arguments - EXTENDED_ARGS instructions are removed - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. + Also, when we encounter certain tokens, we add them to a set which will cause + custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or + BUILD_LIST cause specific rules for the specific number of arguments they take. """ - tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm) + tokens, customize = scan.Scanner21.ingest( + self, co, classname, code_objects, show_asm + ) for t in tokens: if t.op == self.opc.UNPACK_LIST: - t.kind = 'UNPACK_LIST_%d' % t.attr + t.kind = "UNPACK_LIST_%d" % t.attr pass return tokens, customize diff --git a/uncompyle6/scanners/scanner37base.py b/uncompyle6/scanners/scanner37base.py index aa3e934f..93a834fb 100644 --- a/uncompyle6/scanners/scanner37base.py +++ b/uncompyle6/scanners/scanner37base.py @@ -188,8 +188,7 @@ class Scanner37Base(Scanner): return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - """ - Create "tokens" the bytecode of an Python code object. Largely these + """Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing easier. returning a list of uncompyle6 Token's. @@ -197,14 +196,18 @@ class Scanner37Base(Scanner): Some transformations are made to assist the deparsing grammar: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - operands with stack argument counts or flag masks are appended to the opcode name, e.g.: - * BUILD_LIST, BUILD_SET - * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + - operands with stack argument counts or flag masks are appended to the + opcode name, e.g.: + * BUILD_LIST, BUILD_SET + * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional + arguments - EXTENDED_ARGS instructions are removed - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. + Also, when we encounter certain tokens, we add them to a set + which will cause custom grammar rules. Specifically, variable + arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific + rules for the specific number of arguments they take. + """ def tokens_append(j, token): From 77d727541b27da137c5512b064ee1a512b225f71 Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 10 Oct 2023 09:20:18 -0400 Subject: [PATCH 09/24] Note -F -extended in pydisasm --- HOW-TO-REPORT-A-BUG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index ae98f187..d3c924b1 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -22,7 +22,7 @@ TL;DR (too long; didn't read) * Don't do something illegal. And don't ask me to do something illegal or help you do something illegal * We already have an infinite supply of decompilation bugs that need fixing, and an automated mechanism for finding more. Decompilation bugs get addressed by easiness to fix and by whim. If you expect yours to be fixed ahead of those, you need to justify why. * When asking for help, you may be asked for what you've tried on your own first. There are plenty of sources of information about this code. -* If you are looking for *timely* help or support, well, that is typically known paid service. I don't really have a mechanism for that since I have a full-time job. But supporting the project is an approximation. +* If you are looking for *timely* help or support, well, that is typically known as a _paid_ service. I don't really have a mechanism for that since I have a full-time job. But supporting the project is an approximation. * Submitting a bug or issue report that is likely to get acted upon may require a bit of effort on your part to make it easy for the problem solver. If you are not willing to do that, please don't waste our time. As indicated above, supporting the project will increase the likelihood of your issue getting noticed and acted upon. # Ethics @@ -74,7 +74,7 @@ obfuscation. Checking if bytecode is valid is pretty simple: disassemble the code. Python comes with a disassembly module called `dis`. A prerequisite module for this package, `xdis` has a cross-python version -disassembler called `pydisasm`. +disassembler called `pydisasm`. Using that with the `-F extended` option, generally provides a more comprehensive disassembly than is provided by other disassemblers. ## Semantic equivalence vs. exact source code From e9120eab45801782fbf645fbf2b7235aac046574 Mon Sep 17 00:00:00 2001 From: "R. Bernstein" Date: Sat, 14 Oct 2023 18:38:21 -0400 Subject: [PATCH 10/24] Update HOW-TO-REPORT-A-BUG.md grammar typo --- HOW-TO-REPORT-A-BUG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOW-TO-REPORT-A-BUG.md b/HOW-TO-REPORT-A-BUG.md index d3c924b1..4fc430c2 100644 --- a/HOW-TO-REPORT-A-BUG.md +++ b/HOW-TO-REPORT-A-BUG.md @@ -37,7 +37,7 @@ confidentiality. You may be asked about the authorship or claimed ownership of t For many open-source projects bugs where the expectation is that bugs are rare, reporting bugs in a *thoughtful* way can be helpful. See also [How to Ask Questions the Smart Way](http://www.catb.org/~esr/faqs/smart-questions.html). -In this project though, most of the bug reports boil down to the something like: I have I am trying to reverse engineer some code that I am not the author/owner and that person doesn't want me to have access to. I am hitting a problem somewhere along the line which might have to do with decompilation, but it could be something else like how the bytecode was extracted, some problem in deliberately obfuscated code, or the use some kind of Python bytecode version that isn't supported by the decompiler. +In this project though, most of the bug reports boil down to the something like: I am trying to reverse engineer some code that I am not the author/owner and that person doesn't want me to have access to. I am hitting a problem somewhere along the line which might have to do with decompilation, but it could be something else like how the bytecode was extracted, some problem in deliberately obfuscated code, or the use some kind of Python bytecode version that isn't supported by the decompiler. While you are free to report these, unless you sponsor the project, I may close them with about the same amount of effort spent that I think was used to open the report for them. And if you spent a considerable amount of time to create the bug report but didn't follow instructions given here and in the issue template, I am sorry in advance. Just go back, read, and follow instructions. From dcc9d1a5713ecda16eee6f0244b76f1a71a653d6 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 17 Dec 2023 10:52:32 -0500 Subject: [PATCH 11/24] Fix spelling via "codespell" --- pytest/test_fjt.py | 2 +- pytest/validate.py | 2 +- setup.py | 4 ++-- test/decompyle/test_prettyprint.py | 2 +- uncompyle6/bin/pydisassemble.py | 2 +- uncompyle6/main.py | 2 +- uncompyle6/parser.py | 6 +++--- uncompyle6/parsers/parse26.py | 4 ++-- uncompyle6/parsers/parse3.py | 4 ++-- uncompyle6/parsers/parse30.py | 8 ++++---- uncompyle6/parsers/parse35.py | 4 ++-- uncompyle6/parsers/parse36.py | 6 +++--- uncompyle6/parsers/parse37.py | 8 ++++---- uncompyle6/parsers/parse37base.py | 2 +- uncompyle6/parsers/parse38.py | 2 +- uncompyle6/scanner.py | 2 +- uncompyle6/scanners/scanner2.py | 16 ++++++++-------- uncompyle6/scanners/scanner3.py | 8 ++++---- uncompyle6/scanners/scanner37.py | 4 ++-- uncompyle6/semantics/check_ast.py | 2 +- uncompyle6/semantics/consts.py | 14 +++++++------- uncompyle6/semantics/customize3.py | 4 ++-- uncompyle6/semantics/fragments.py | 12 ++++++------ uncompyle6/semantics/helper.py | 4 ++-- uncompyle6/semantics/make_function1.py | 6 +++--- uncompyle6/semantics/make_function2.py | 2 +- uncompyle6/semantics/make_function3.py | 4 ++-- uncompyle6/semantics/pysource.py | 20 ++++++++++---------- uncompyle6/semantics/transform.py | 2 +- uncompyle6/verify.py | 4 ++-- 30 files changed, 81 insertions(+), 81 deletions(-) diff --git a/pytest/test_fjt.py b/pytest/test_fjt.py index 6c91aa4d..9a59124a 100644 --- a/pytest/test_fjt.py +++ b/pytest/test_fjt.py @@ -27,7 +27,7 @@ def test_if_in_for(): fjt = scan.find_jump_targets(False) ## FIXME: the data below is wrong. - ## we get different results currenty as well. + ## we get different results currently as well. ## We need to probably fix both the code ## and the test below # assert {15: [3], 69: [66], 63: [18]} == fjt diff --git a/pytest/validate.py b/pytest/validate.py index f1dd0193..4a730fb2 100644 --- a/pytest/validate.py +++ b/pytest/validate.py @@ -67,7 +67,7 @@ def are_instructions_equal(i1, i2): Determine if two instructions are approximately equal, ignoring certain fields which we allow to differ, namely: - * code objects are ignore (should probaby be checked) due to address + * code objects are ignore (should probably be checked) due to address * line numbers :param i1: left instruction to compare diff --git a/setup.py b/setup.py index 192d106a..515ee367 100755 --- a/setup.py +++ b/setup.py @@ -5,8 +5,8 @@ import sys """Setup script for the 'uncompyle6' distribution.""" SYS_VERSION = sys.version_info[0:2] -if not ((2, 4) <= SYS_VERSION < (3, 12)): - mess = "Python Release 2.6 .. 3.11 are supported in this code branch." +if not ((2, 4) <= SYS_VERSION < (3, 13)): + mess = "Python Release 2.6 .. 3.12 are supported in this code branch." if (2, 4) <= SYS_VERSION <= (2, 7): mess += ( "\nFor your Python, version %s, use the python-2.4 code/branch." diff --git a/test/decompyle/test_prettyprint.py b/test/decompyle/test_prettyprint.py index 957d72eb..634041a6 100644 --- a/test/decompyle/test_prettyprint.py +++ b/test/decompyle/test_prettyprint.py @@ -1,6 +1,6 @@ """ test_prettyprint.py -- source test pattern for tesing the prettyprint - funcionality of decompyle + functionality of decompyle This source is part of the decompyle test suite. diff --git a/uncompyle6/bin/pydisassemble.py b/uncompyle6/bin/pydisassemble.py index a2ea3659..be1cb152 100755 --- a/uncompyle6/bin/pydisassemble.py +++ b/uncompyle6/bin/pydisassemble.py @@ -23,7 +23,7 @@ Disassemble/Tokenize FILE with in the way that is done to assist uncompyle6 in parsing the instruction stream. For example instructions with variable-length arguments like CALL_FUNCTION and BUILD_LIST have argument counts appended to the instruction name, and -COME_FROM psuedo instructions are inserted into the instruction stream. +COME_FROM pseudo instructions are inserted into the instruction stream. Bit flag values encoded in an operand are expanding, EXTENDED_ARG value are folded into the following instruction operand. diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 669643e6..f7da7430 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -66,7 +66,7 @@ def decompile( """ ingests and deparses a given code block 'co' - if `bytecode_version` is None, use the current Python intepreter + if `bytecode_version` is None, use the current Python interpreter version. Caller is responsible for closing `out` and `mapstream` diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 7873cd08..6fd6da2f 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -221,7 +221,7 @@ class PythonParser(GenericASTBuilder): This appears in CALL_FUNCTION or CALL_METHOD (PyPy) tokens """ - # Low byte indicates number of positional paramters, + # Low byte indicates number of positional parameters, # high byte number of keyword parameters assert token.kind.startswith("CALL_FUNCTION") or token.kind.startswith("CALL_METHOD") args_pos = token.attr & 0xFF @@ -600,12 +600,12 @@ class PythonParser(GenericASTBuilder): compare ::= compare_single compare_single ::= expr expr COMPARE_OP - # A compare_chained is two comparisions, as in: x <= y <= z + # A compare_chained is two comparisons, as in: x <= y <= z compare_chained ::= expr compared_chained_middle ROT_TWO POP_TOP _come_froms compare_chained_right ::= expr COMPARE_OP JUMP_FORWARD - # Non-null kvlist items are broken out in the indiviual grammars + # Non-null kvlist items are broken out in the individual grammars kvlist ::= # Positional arguments in make_function diff --git a/uncompyle6/parsers/parse26.py b/uncompyle6/parsers/parse26.py index 74370369..9873ca78 100644 --- a/uncompyle6/parsers/parse26.py +++ b/uncompyle6/parsers/parse26.py @@ -307,7 +307,7 @@ class Python26Parser(Python2Parser): and ::= expr JUMP_IF_FALSE POP_TOP expr JUMP_IF_FALSE POP_TOP - # A "compare_chained" is two comparisions like x <= y <= z + # A "compare_chained" is two comparisons like x <= y <= z compare_chained ::= expr compared_chained_middle ROT_TWO COME_FROM POP_TOP _come_froms compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP @@ -466,7 +466,7 @@ class Python26Parser(Python2Parser): ja_attr = ast[4].attr return tokens[last].offset != ja_attr elif lhs == "try_except": - # We need to distingush "try_except" from "tryelsestmt"; we do that + # We need to distinguish "try_except" from "tryelsestmt"; we do that # by checking the jump before the END_FINALLY # If we have: # insn diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 290632f8..813f6660 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2022 Rocky Bernstein +# Copyright (c) 2015-2023 Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -634,7 +634,7 @@ class Python3Parser(PythonParser): self.add_unique_rule(rule, token.kind, uniq_param, customize) def add_make_function_rule(self, rule, opname, attr, customize): - """Python 3.3 added a an addtional LOAD_STR before MAKE_FUNCTION and + """Python 3.3 added a an additional LOAD_STR before MAKE_FUNCTION and this has an effect on many rules. """ if self.version >= (3, 3): diff --git a/uncompyle6/parsers/parse30.py b/uncompyle6/parsers/parse30.py index b36f8d41..20405db5 100644 --- a/uncompyle6/parsers/parse30.py +++ b/uncompyle6/parsers/parse30.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2017, 2022 Rocky Bernstein +# Copyright (c) 2016-2017, 2022-2023 Rocky Bernstein """ spark grammar differences over Python 3.1 for Python 3.0. """ @@ -31,8 +31,8 @@ class Python30Parser(Python31Parser): # In many ways Python 3.0 code generation is more like Python 2.6 than # it is 2.7 or 3.1. So we have a number of 2.6ish (and before) rules below - # Specifically POP_TOP is more prevelant since there is no POP_JUMP_IF_... - # instructions + # Specifically POP_TOP is more prevalant since there is no POP_JUMP_IF_... + # instructions. _ifstmts_jump ::= c_stmts JUMP_FORWARD _come_froms POP_TOP COME_FROM _ifstmts_jump ::= c_stmts COME_FROM POP_TOP @@ -208,7 +208,7 @@ class Python30Parser(Python31Parser): come_froms POP_TOP POP_BLOCK COME_FROM_LOOP - # A "compare_chained" is two comparisions like x <= y <= z + # A "compare_chained" is two comparisons like x <= y <= z compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP jmp_false compared_chained_middle _come_froms compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py index 86043ef2..9599e6cc 100644 --- a/uncompyle6/parsers/parse35.py +++ b/uncompyle6/parsers/parse35.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2017, 2019, 2021 Rocky Bernstein +# Copyright (c) 2016-2017, 2019, 2021, 2023 Rocky Bernstein """ spark grammar differences over Python 3.4 for Python 3.5. """ @@ -258,7 +258,7 @@ class Python35Parser(Python34Parser): ('pos_arg ' * args_pos) + ('kwarg ' * args_kw) + kw + token.kind) - # Note: semantic actions make use of the fact of wheter "args_pos" + # Note: semantic actions make use of the fact of whether "args_pos" # zero or not in creating a template rule. self.add_unique_rule(rule, token.kind, args_pos, customize) else: diff --git a/uncompyle6/parsers/parse36.py b/uncompyle6/parsers/parse36.py index 280b23ba..54b5a078 100644 --- a/uncompyle6/parsers/parse36.py +++ b/uncompyle6/parsers/parse36.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2020, 2022 Rocky Bernstein +# Copyright (c) 2016-2020, 2022-2023 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,7 +58,7 @@ class Python36Parser(Python35Parser): come_froms JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP # 3.6 due to jump optimization, we sometimes add RETURN_END_IF where - # RETURN_VALUE is meant. Specifcally this can happen in + # RETURN_VALUE is meant. Specifically, this can happen in # ifelsestmt -> ...else_suite _. suite_stmts... (last) stmt return ::= return_expr RETURN_END_IF return ::= return_expr RETURN_VALUE COME_FROM @@ -404,7 +404,7 @@ class Python36Parser(Python35Parser): JUMP_LOOP COME_FROM POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP - # FIXME this is a workaround for probalby some bug in the Earley parser + # FIXME this is a workaround for probably some bug in the Earley parser # if we use get_aiter, then list_comp_async doesn't match, and I don't # understand why. expr_get_aiter ::= expr GET_AITER diff --git a/uncompyle6/parsers/parse37.py b/uncompyle6/parsers/parse37.py index a1994543..5eff88a2 100644 --- a/uncompyle6/parsers/parse37.py +++ b/uncompyle6/parsers/parse37.py @@ -224,11 +224,11 @@ class Python37Parser(Python37BaseParser): compare ::= compare_single compare_single ::= expr expr COMPARE_OP - # A compare_chained is two comparisions like x <= y <= z + # A compare_chained is two comparisons like x <= y <= z compare_chained ::= expr compared_chained_middle ROT_TWO POP_TOP _come_froms compare_chained_right ::= expr COMPARE_OP JUMP_FORWARD - # Non-null kvlist items are broken out in the indiviual grammars + # Non-null kvlist items are broken out in the individual grammars kvlist ::= # Positional arguments in make_function @@ -1144,7 +1144,7 @@ class Python37Parser(Python37BaseParser): come_froms JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP # 3.6 due to jump optimization, we sometimes add RETURN_END_IF where - # RETURN_VALUE is meant. Specifcally this can happen in + # RETURN_VALUE is meant. Specifically this can happen in # ifelsestmt -> ...else_suite _. suite_stmts... (last) stmt return ::= return_expr RETURN_END_IF return ::= return_expr RETURN_VALUE COME_FROM @@ -1377,7 +1377,7 @@ class Python37Parser(Python37BaseParser): JUMP_BACK COME_FROM POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP - # FIXME this is a workaround for probalby some bug in the Earley parser + # FIXME this is a workaround for probably some bug in the Earley parser # if we use get_aiter, then list_comp_async doesn't match, and I don't # understand why. expr_get_aiter ::= expr GET_AITER diff --git a/uncompyle6/parsers/parse37base.py b/uncompyle6/parsers/parse37base.py index 39940afe..f5ef7065 100644 --- a/uncompyle6/parsers/parse37base.py +++ b/uncompyle6/parsers/parse37base.py @@ -38,7 +38,7 @@ class Python37BaseParser(PythonParser): return "%s_0" % (token.kind) def add_make_function_rule(self, rule, opname, attr, customize): - """Python 3.3 added a an addtional LOAD_STR before MAKE_FUNCTION and + """Python 3.3 added a an additional LOAD_STR before MAKE_FUNCTION and this has an effect on many rules. """ new_rule = rule % "LOAD_STR " diff --git a/uncompyle6/parsers/parse38.py b/uncompyle6/parsers/parse38.py index 1d62ce0d..1a638c01 100644 --- a/uncompyle6/parsers/parse38.py +++ b/uncompyle6/parsers/parse38.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2020, 2022 Rocky Bernstein +# Copyright (c) 2017-2020, 2022-2023 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index f8fcebfb..912c4a0f 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -599,7 +599,7 @@ def parse_fn_counts_30_35(argc: int) -> Tuple[int, int, int]: In Python 3.0 to 3.5 MAKE_CLOSURE and MAKE_FUNCTION encode arguments counts of positional, default + named, and annotation arguments a particular kind of encoding where each of - the entry a a packe byted value of the lower 24 bits + the entry a a packed byted value of the lower 24 bits of ``argc``. The high bits of argc may have come from an EXTENDED_ARG instruction. Here, we unpack the values from the ``argc`` int and return a triple of the diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 77d7fcae..bd1f2769 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2022 by Rocky Bernstein +# Copyright (c) 2015-2023 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # @@ -55,7 +55,7 @@ class Scanner2(Scanner): self.load_asserts = set([]) # Create opcode classification sets - # Note: super initilization above initializes self.opc + # Note: super initialization above initializes self.opc # Ops that start SETUP_ ... We will COME_FROM with these names # Some blocks and END_ statements. And they can start @@ -430,7 +430,7 @@ class Scanner2(Scanner): # EXTENDED_ARG doesn't appear in instructions, # but is instead the next opcode folded into it, and has the offset - # of the EXTENDED_ARG. Therefor in self.offset2nist_index we'll find + # of the EXTENDED_ARG. Therefore in self.offset2nist_index we'll find # the instruction at the previous EXTENDED_ARG offset which is 3 # bytes back. if j is None and offset > self.opc.ARG_MAX_VALUE: @@ -925,7 +925,7 @@ class Scanner2(Scanner): # Is it an "and" inside an "if" or "while" block if op == self.opc.PJIF: - # Search for other POP_JUMP_IF_...'s targetting the + # Search for other POP_JUMP_IF_...'s targeting the # same target, of the current POP_JUMP_... instruction, # starting from current offset, and filter everything inside inner 'or' # jumps and mid-line ifs @@ -1024,7 +1024,7 @@ class Scanner2(Scanner): ): self.fixed_jumps[offset] = rtarget else: - # note test for < 2.7 might be superflous although informative + # note test for < 2.7 might be superfluous although informative # for 2.7 a different branch is taken and the below code is handled # under: elif op in self.pop_jump_if_or_pop # below @@ -1114,7 +1114,7 @@ class Scanner2(Scanner): if code_pre_rtarget in self.jump_forward: if_end = self.get_target(pre_rtarget) - # Is this a loop and not an "if" statment? + # Is this a loop and not an "if" statement? if (if_end < pre_rtarget) and (pre[if_end] in self.setup_loop_targets): if if_end > start: @@ -1337,9 +1337,9 @@ class Scanner2(Scanner): # FIXME FIXME FIXME # All the conditions are horrible, and I am not sure I - # undestand fully what's going l + # understand fully what's going l # We REALLY REALLY need a better way to handle control flow - # Expecially for < 2.7 + # Especially for < 2.7 if label is not None and label != -1: if self.version[:2] == (2, 7): # FIXME: rocky: I think we need something like this... diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 62c47d74..a56d47cc 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -62,7 +62,7 @@ class Scanner3(Scanner): super(Scanner3, self).__init__(version, show_asm, is_pypy) # Create opcode classification sets - # Note: super initilization above initializes self.opc + # Note: super initialization above initializes self.opc # For ops that start SETUP_ ... we will add COME_FROM with these names # at the their targets. @@ -228,7 +228,7 @@ class Scanner3(Scanner): assert count <= i if collection_type == "CONST_DICT": - # constant dictonaries work via BUILD_CONST_KEY_MAP and + # constant dictionaries work via BUILD_CONST_KEY_MAP and # handle the values() like sets and lists. # However the keys() are an LOAD_CONST of the keys. # adjust offset to account for this @@ -1130,7 +1130,7 @@ class Scanner3(Scanner): # Is it an "and" inside an "if" or "while" block if op == self.opc.POP_JUMP_IF_FALSE: - # Search for another POP_JUMP_IF_FALSE targetting the same op, + # Search for another POP_JUMP_IF_FALSE targeting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs match = self.rem_or( @@ -1337,7 +1337,7 @@ class Scanner3(Scanner): self.not_continue.add(pre_rtarget) elif code[pre_rtarget] in rtarget_break: self.structs.append({"type": "if-then", "start": start, "end": rtarget}) - # It is important to distingish if this return is inside some sort + # It is important to distinguish if this return is inside some sort # except block return jump_prev = prev_op[offset] if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: diff --git a/uncompyle6/scanners/scanner37.py b/uncompyle6/scanners/scanner37.py index 99517ee9..1a505e43 100644 --- a/uncompyle6/scanners/scanner37.py +++ b/uncompyle6/scanners/scanner37.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2019, 2021-2022 by Rocky Bernstein +# Copyright (c) 2016-2019, 2021-2023 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -51,7 +51,7 @@ class Scanner37(Scanner37Base): assert count <= i if collection_type == "CONST_DICT": - # constant dictonaries work via BUILD_CONST_KEY_MAP and + # constant dictionaries work via BUILD_CONST_KEY_MAP and # handle the values() like sets and lists. # However the keys() are an LOAD_CONST of the keys. # adjust offset to account for this diff --git a/uncompyle6/semantics/check_ast.py b/uncompyle6/semantics/check_ast.py index 203ff0a9..1f0f0c7b 100644 --- a/uncompyle6/semantics/check_ast.py +++ b/uncompyle6/semantics/check_ast.py @@ -21,7 +21,7 @@ def checker(ast, in_loop, errors): if ast.kind in ("aug_assign1", "aug_assign2") and ast[0][0] == "and": text = str(ast) error_text = ( - "\n# improper augmented assigment (e.g. +=, *=, ...):\n#\t" + "\n# improper augmented assignment (e.g. +=, *=, ...):\n#\t" + "\n# ".join(text.split("\n")) + "\n" ) diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index 18d5b9e2..0b20a86e 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2022 by Rocky Bernstein +# Copyright (c) 2017-2023 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ minint = -sys.maxsize - 1 maxint = sys.maxsize -# Operator precidence See +# Operator precedence See # https://docs.python.org/2/reference/expressions.html#operator-precedence # or # https://docs.python.org/3/reference/expressions.html#operator-precedence @@ -37,11 +37,11 @@ maxint = sys.maxsize # various templates we use odd values. Avoiding equal-precedent comparisons # avoids ambiguity what to do when the precedence is equal. -# The precidence of a key below applies the key, a node, and the its -# *parent*. A node however sometimes sets the precidence for its -# children. For example, "call" has precidence 2 so we don't get +# The precedence of a key below applies the key, a node, and the its +# *parent*. A node however sometimes sets the precedence for its +# children. For example, "call" has precedence 2 so we don't get # additional the additional parenthesis of: ".. op (call())". However -# for call's children, it parameters, we set the the precidence high, +# for call's children, it parameters, we set the the precedence high, # say to 100, to make sure we avoid additional prenthesis in # call((.. op ..)). @@ -428,7 +428,7 @@ TABLE_DIRECT = { "expr_stmt": ( "%|%p\n", - # When a statment contains only a named_expr (:=) + # When a statement contains only a named_expr (:=) # the named_expr should have parenthesis around it. (0, "expr", PRECEDENCE["named_expr"] - 1) ), diff --git a/uncompyle6/semantics/customize3.py b/uncompyle6/semantics/customize3.py index 798ba2f3..e0a9f026 100644 --- a/uncompyle6/semantics/customize3.py +++ b/uncompyle6/semantics/customize3.py @@ -226,7 +226,7 @@ def customize_for_version3(self, version): assert node[0] == "expr" if node[0][0] == "get_iter": # Skip over yield_from.expr.get_iter which adds an - # extra iter(). Maybe we can do in tranformation phase instead? + # extra iter(). Maybe we can do in transformation phase instead? template = ("yield from %c", (0, "expr")) self.template_engine(template, node[0][0]) else: @@ -318,7 +318,7 @@ def customize_for_version3(self, version): # FIXME: the real situation is that when derived from # function_def_annotate we the name has been filled in. # But when derived from funcdefdeco it hasn't Would like a better - # way to distinquish. + # way to distinguish. if self.f.getvalue()[-4:] == "def ": self.write(get_code_name(code_node.attr)) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 9233d8c7..edd009e7 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -1347,7 +1347,7 @@ class FragmentsWalker(pysource.SourceWalker, object): selectedText = text[start:finish] # Compute offsets relative to the beginning of the - # line rather than the beinning of the text + # line rather than the beginning of the text. try: lineStart = text[:start].rindex("\n") + 1 except ValueError: @@ -1355,7 +1355,7 @@ class FragmentsWalker(pysource.SourceWalker, object): adjustedStart = start - lineStart # If selected text is greater than a single line - # just show the first line plus elipses. + # just show the first line plus ellipsis. lines = selectedText.split("\n") if len(lines) > 1: adjustedEnd = len(lines[0]) - adjustedStart @@ -1428,7 +1428,7 @@ class FragmentsWalker(pysource.SourceWalker, object): p = node.parent orig_parent = p # If we can get different text, use that as the parent, - # otherwise we'll use the immeditate parent + # otherwise we'll use the immediatate parent. while p and ( hasattr(p, "parent") and p.start == node.start and p.finish == node.finish ): @@ -1778,7 +1778,7 @@ class FragmentsWalker(pysource.SourceWalker, object): n_set = n_tuple = n_build_set = n_list def template_engine(self, entry, startnode): - """The format template interpetation engine. See the comment at the + """The format template interpretation engine. See the comment at the beginning of this module for the how we interpret format specifications such as %c, %C, and so on. """ @@ -1961,7 +1961,7 @@ class FragmentsWalker(pysource.SourceWalker, object): # FIXME figure out how to get these cases to be table driven. # 2. subroutine calls. It the last op is the call and for purposes of printing - # we don't need to print anything special there. However it encompases the + # we don't need to print anything special there. However it encompasses the # entire string of the node fn(...) if startnode.kind == "call": last_node = startnode[-1] @@ -2166,7 +2166,7 @@ def code_deparse_around_offset( return deparsed -# Deprecated. Here still for compatability +# Deprecated. Here still for compatibility def deparse_code_around_offset( name, offset, diff --git a/uncompyle6/semantics/helper.py b/uncompyle6/semantics/helper.py index afaf7b73..27c21a0d 100644 --- a/uncompyle6/semantics/helper.py +++ b/uncompyle6/semantics/helper.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 Rocky Bernstein +# Copyright (c) 2022-2023 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -43,7 +43,7 @@ def escape_string(s, quotes=('"', "'", '"""', "'''")): s = s.replace(orig, replace) return "%s%s%s" % (quote, s, quote) -# FIXME: this and find_globals could be paramaterized with one of the +# FIXME: this and find_globals could be parameterized with one of the # above global ops def find_all_globals(node, globs): """Search Syntax Tree node to find variable names that are global.""" diff --git a/uncompyle6/semantics/make_function1.py b/uncompyle6/semantics/make_function1.py index e9c8f3b0..09f95b4f 100644 --- a/uncompyle6/semantics/make_function1.py +++ b/uncompyle6/semantics/make_function1.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2022 by Rocky Bernstein +# Copyright (c) 2015-2023 by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # # This program is free software: you can redistribute it and/or modify @@ -31,7 +31,7 @@ from xdis import iscode def make_function1(self, node, is_lambda, nested=1, code_node=None): """ - Dump function defintion, doc string, and function body. + Dump function definition, doc string, and function body. This code is specialied for Python 2. """ @@ -40,7 +40,7 @@ def make_function1(self, node, is_lambda, nested=1, code_node=None): - handle defaults - handle format tuple parameters """ - # if formal parameter is a tuple, the paramater name + # if formal parameter is a tuple, the parameter name # starts with a dot (eg. '.1', '.2') args = tree[0] del tree[0] diff --git a/uncompyle6/semantics/make_function2.py b/uncompyle6/semantics/make_function2.py index be93c796..fb186f4f 100644 --- a/uncompyle6/semantics/make_function2.py +++ b/uncompyle6/semantics/make_function2.py @@ -34,7 +34,7 @@ from uncompyle6.show import maybe_show_tree_param_default def make_function2(self, node, is_lambda, nested=1, code_node=None): """ - Dump function defintion, doc string, and function body. + Dump function definition, doc string, and function body. This code is specialied for Python 2. """ diff --git a/uncompyle6/semantics/make_function3.py b/uncompyle6/semantics/make_function3.py index ad4d80e4..36c98426 100644 --- a/uncompyle6/semantics/make_function3.py +++ b/uncompyle6/semantics/make_function3.py @@ -37,7 +37,7 @@ def make_function3_annotate( self, node, is_lambda, nested=1, code_node=None, annotate_last=-1 ): """ - Dump function defintion, doc string, and function + Dump function definition, doc string, and function body. This code is specialized for Python 3""" def build_param(ast, name, default): @@ -310,7 +310,7 @@ def make_function3(self, node, is_lambda, nested=1, code_node=None): # the object on the stack, for keyword-only parameters # * parameter annotation objects # * a tuple listing the parameter names for the annotations - # (only if there are ony annotation objects) + # (only if there are only annotation objects) # * the code associated with the function (at TOS1) # * the qualified name of the function (at TOS) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 98fb1e6f..b52f90f9 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -91,7 +91,7 @@ Python. # the second item is the nonterminal name and the precedence is given last. # # %C evaluate/travers children recursively, with sibling children separated by the -# given string. It needs a 3-tuple: a starting node, the maximimum +# given string. It needs a 3-tuple: a starting node, the maximum # value of an end node, and a string to be inserted between sibling children # # %, Append ',' if last %C only printed one item. This is mostly for tuples @@ -99,12 +99,12 @@ Python. # other tuples. The specifier takes no arguments # # %P same as %C but sets operator precedence. Its argument is a 4-tuple: -# the node low and high indices, the separator, a string the precidence +# the node low and high indices, the separator, a string the precedence # value, an integer. # # %D Same as `%C` this is for left-recursive lists like kwargs where goes # to epsilon at the beginning. It needs a 3-tuple: a starting node, the -# maximimum value of an end node, and a string to be inserted between +# maximum value of an end node, and a string to be inserted between # sibling children. If we were to use `%C` an extra separator with an # epsilon would appear at the beginning. # @@ -119,7 +119,7 @@ Python. # %[N]{EXPR} Python eval(EXPR) in context of node[N]. Takes no arguments # # %[N]{%X} evaluate/recurse on child node[N], using specifier %X. -# %X can be one of the above, e.g. %c, %p, etc. Takes the arguemnts +# %X can be one of the above, e.g. %c, %p, etc. Takes the arguments # that the specifier uses. # # %% literal '%'. Takes no arguments. @@ -214,22 +214,22 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): of both the syntax tree and language we should produce. `out' is IO-like file pointer to where the output should go. It - whould have a getvalue() method. + would have a getvalue() method. `scanner' is a method to call when we need to scan tokens. Sometimes in producing output we will run across further tokens that need - to be scaned. + to be scanned. If `showast' is True, we print the syntax tree. `compile_mode' is is either 'exec' or 'single'. It is the compile mode that was used to create the Syntax Tree and specifies a - gramar variant within a Python version to use. + grammar variant within a Python version to use. `is_pypy` should be True if the Syntax Tree was generated for PyPy. `linestarts` is a dictionary of line number to bytecode offset. This - can sometimes assist in determinte which kind of source-code construct + can sometimes assist in determining which kind of source-code construct to use when there is ambiguity. """ @@ -680,7 +680,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): pass def template_engine(self, entry, startnode): - """The format template interpetation engine. See the comment at the + """The format template interpretation engine. See the comment at the beginning of this module for the how we interpret format specifications such as %c, %C, and so on. """ @@ -970,7 +970,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): # within the function definition assert node[1] == "store" # if lhs is not a UNPACK_TUPLE (or equiv.), - # add parenteses to make this a tuple + # add parentheses to make this a tuple # if node[1][0] not in ('unpack', 'unpack_list'): result = self.traverse(node[1]) if not (result.startswith("(") and result.endswith(")")): diff --git a/uncompyle6/semantics/transform.py b/uncompyle6/semantics/transform.py index 464ca911..e3f96c83 100644 --- a/uncompyle6/semantics/transform.py +++ b/uncompyle6/semantics/transform.py @@ -263,7 +263,7 @@ class TreeTransform(GenericASTTraversal, object): # if elif elif def n_ifelsestmt(self, node, preprocess=False): """ - Transformation involving if..else statments. + Transformation involving if..else statements. For example diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 431b6801..33a525e9 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -185,7 +185,7 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""): # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) - # assume _both_ code objects to be new stle classes + # assume _both_ code objects to be new style classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes @@ -205,7 +205,7 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""): # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal - # if this compare succeds, simply return + # if this compare succeeds, simply return # return pass From 7a2348e4ccdb31cb1704aebe6fcc9cfbd3fb07d7 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Fri, 19 Jan 2024 23:20:13 +0100 Subject: [PATCH 12/24] Fix typos --- HISTORY.md | 14 ++++---- NEWS.md | 38 ++++++++++----------- NEW_FEATURES.rst | 4 +-- README.rst | 8 ++--- test/simple_source/bug27+/04_try_tryelse.py | 2 +- test/simple_source/bug36/02_kwargs.py | 2 +- test/simple_source/stmts/11_return_val.py | 2 +- uncompyle6/parsers/parse35.py | 2 +- uncompyle6/parsers/parse37.py | 2 +- uncompyle6/semantics/fragments.py | 2 +- 10 files changed, 38 insertions(+), 38 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index d2b6893f..1377ee67 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,7 +7,7 @@ In the interest of shortening what is written here, I am going to start where we For the earlier history up to 2006 and the code up until Python 2.4, which I find interesting, look at that link. Sometime around 2014 was the dawn of ["uncompyle" and PyPI](https://pypi.python.org/pypi/uncompyle/1.1) — the era of -public version control. Dan Pascu's code although not public used [darcs](http://darcs.net/) for version control. I converted the darcs to to git and put this at [decompyle-2.4](https://github.com/rocky/decompile-2.4). +public version control. Dan Pascu's code although not public used [darcs](http://darcs.net/) for version control. I converted the darcs repository to git and put this at [decompyle-2.4](https://github.com/rocky/decompile-2.4). # uncompyle, unpyc @@ -17,7 +17,7 @@ The project exists not only on [github](https://github.com/gstarnberger/uncompyl [bitbucket](https://bitbucket.org/gstarnberger/uncompyle) and later the defunct [google code](https://code.google.com/archive/p/unpyc/) under the name _unpyc_. The git/svn history goes back to 2009. Somewhere in there the name was changed from "decompyle" to "unpyc" by Keknehv, and then to "uncompyle" by Guenther Starnberger. -The name Thomas Grainger isn't found in (m)any of the commits in the several years of active development. First Keknehv worked on this up to Python 2.5 or so while acceping Python bytecode back to 2.0 or so. Then "hamled" made a few commits earler on, while Eike Siewertsen made a few commits later on. But mostly "wibiti", and Guenther Starnberger got the code to where uncompyle2 was around 2012. +The name Thomas Grainger isn't found in (m)any of the commits in the several years of active development. First Keknehv worked on this up to Python 2.5 or so while accepting Python bytecode back to 2.0 or so. Then "hamled" made a few commits earlier on, while Eike Siewertsen made a few commits later on. But mostly "wibiti", and Guenther Starnberger got the code to where uncompyle2 was around 2012. While John Aycock and Hartmut Goebel were well versed in compiler technology, those that have come afterwards don't seem to have been as facile in it. Furthermore, documentation or guidance on how the decompiler code worked, comparison to a conventional compiler pipeline, how to add new constructs, or debug grammars was weak. Some of the grammar tracing and error reporting was a bit weak as well. @@ -38,7 +38,7 @@ I started working on this late 2015, mostly to add fragment support. In that, I * this project - grammar and semantic actions for decompiling ([uncompyle6](https://pypi.python.org/pypi/uncompyle6)). - `uncompyle6`, abandons the idea found in some 2.7 version of `uncompyle` that support Python 2.6 and 2.5 by trying to rewite opcodes at the bytecode level. + `uncompyle6`, abandons the idea found in some 2.7 version of `uncompyle` that support Python 2.6 and 2.5 by trying to rewrite opcodes at the bytecode level. Having a grammar per Python version is simpler to maintain, cleaner and it scales indefinitely. @@ -68,13 +68,13 @@ project is largely by Michael Hansen and Darryl Pogue. If they supported getting # So you want to write a decompiler for Python? -If you think, as I am sure will happen in the future, "hey, I can just write a decompiler from scratch and not have to deal with all all of the complexity in uncompyle6", think again. What is likely to happen is that you'll get at best a 90% solution working for a single Python release that will be obsolete in about a year, and more obsolete each subsequent year. +If you think, as I am sure will happen in the future, "hey, I can just write a decompiler from scratch and not have to deal with all of the complexity in uncompyle6", think again. What is likely to happen is that you'll get at best a 90% solution working for a single Python release that will be obsolete in about a year, and more obsolete each subsequent year. Writing a decompiler for Python gets harder as it Python progresses. Writing decompiler for Python 3.7 isn't as easy as it was for Python 2.2. For one thing, now that Python has a well-established AST, that opens another interface by which code can be improved. In Python 3.10 I am seeing (for the first time?) bytecode getting moved around so that it is no longer the case that line numbers have to be strictly increasing as bytecode offsets increase. And I am seeing dead code appear as well. -That said, if you still feel you want to write a single version decompiler, look at the test cases in this project and talk to me. I may have some ideas that I haven't made public yet. See also what I've wrtten about the on how this code works and on [decompilation in dynamic runtime languages](http://rocky.github.io/Deparsing-Paper.pdf) in general. +That said, if you still feel you want to write a single version decompiler, look at the test cases in this project and talk to me. I may have some ideas that I haven't made public yet. See also what I've written about the on how this code works and on [decompilation in dynamic runtime languages](http://rocky.github.io/Deparsing-Paper.pdf) in general. @@ -82,8 +82,8 @@ That said, if you still feel you want to write a single version decompiler, look This project deparses using an Earley-algorithm parse. But in order to do this accurately, the process of tokenization is a bit more involved in the scanner. We don't just disassemble bytecode and use the opcode name. That aspect hasn't changed from the very first decompilers. However understanding _what_ information needs to be made explicit and what pseudo instructions to add that accomplish this has taken some time to understand. -Earley-algorithm parsers have gotten negative press, most notably by the dragon book. Having used this a bit, I am convinced having a system that handles ambiguous grammars is the right thing to do and matches the problem well. Iin practice the speed of the parser isn't a problem when one understand what's up. And this has taken a little while to understand. -Earley-algorim parsers for context free languages or languages that are to a large extent context free and tend to be linear and the grammar stears towards left recursive rules. There is a technique for improving LL right recursion, but our parser doesn't have that yet. +Earley-algorithm parsers have gotten negative press, most notably by the dragon book. Having used this a bit, I am convinced having a system that handles ambiguous grammars is the right thing to do and matches the problem well. In practice the speed of the parser isn't a problem when one understand what's up. And this has taken a little while to understand. +Earley-algorithm parsers for context free languages or languages that are to a large extent context free and tend to be linear and the grammar steers towards left recursive rules. There is a technique for improving LL right recursion, but our parser doesn't have that yet. The [decompiling paper](http://rocky.github.io/Deparsing-Paper.pdf) discusses these aspects in a more detail. diff --git a/NEWS.md b/NEWS.md index 9f33e295..887a597c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,7 +8,7 @@ * Correct 2.5-7 relative import formatting * Miscellaneous bug fixing * remove \n in lambda -* Python 2.6 gramar cleanup +* Python 2.6 grammar cleanup * Correct some Python 2.6 chain compare decompilation * Ensure no parenthesis subscript slices * Correct 2.x formatting "slice2" nonterminal @@ -35,7 +35,7 @@ ================ * Fragment parsing was borked. This means deparsing in trepan2/trepan3k was broken -* 3.7+: narrow precedence for call tatement +* 3.7+: narrow precedence for call statement * del_stmt -> delete to better match Python AST * 3.8+ Add another `forelsestmt` (found only in a loop) * 3.8+ Add precedence on walrus operator @@ -66,7 +66,7 @@ Mostly small miscellaneous bug fixes 3.7.1: 2020-6-12 Fleetwood66 ==================================================== -Released to pick up new xdis version which has fixes to read bytestings better on 3.x +Released to pick up new xdis version which has fixes to read bytestrings better on 3.x * Handle 3.7+ "else" branch removal adAs seen in `_cmp()` of `python3.8/distutils/version.py` with optimization `-O2` * 3.6+ "with" and "with .. as" grammar improvements @@ -89,10 +89,10 @@ More upheaval in xdis which we need to track here. 3.6.6: 2020-4-20 Love in the time of Cholera ============================================ -The main reason for this release is an incompatablity bump in xdis which handles +The main reason for this release is an incompatibility bump in xdis which handles 3.7 SipHash better. -* Go over "yield" as an expression precidence +* Go over "yield" as an expression precedence * Some small alignment with code in decompyle3 for "or" and "and" was done @@ -118,7 +118,7 @@ The main focus in this release was fix some of the more glaring problems creapt `uncompyle6` code is at a plateau where what is most needed is a code refactoring. In doing this, until everything refactored and replaced, decomplation may get worse. Therefore, this release largely serves as a checkpoint before more major upheaval. -The upheaval, in started last release, I believe the pinnicle was around c90ff51 which wasn't a release. I suppose I should tag that. +The upheaval, in started last release, I believe the pinnacle was around c90ff51 which wasn't a release. I suppose I should tag that. After c90ff5, I started down the road of redoing control flow in a more comprehensible, debuggable, and scalable way. See [The Control Flow Mess](https://github.com/rocky/python-uncompyle6/wiki/The-Control-Flow-Mess) @@ -132,7 +132,7 @@ In the decompyle3 code, I've gone down the road making the grammar goal symbol b I cringe in thinking about how the code has lived for so long without noticing such a simple stupidity, and lapse of sufficient thought. -Some stats from testing. The below give numbers of decompiled tests from Python's test suite which succesfully ran +Some stats from testing. The below give numbers of decompiled tests from Python's test suite which successfully ran ``` Version test-suites passing @@ -175,14 +175,14 @@ On the most recent Python versions I regularly decompile thousands of Python pro Does this mean the decompiler works perfectly? No. There are still a dozen or so failing programs, although the actual number of bugs is probably smaller though. -However, in perparation of a more major refactoring of the parser grammar, this release was born. +However, in preparation of a more major refactoring of the parser grammar, this release was born. In many cases, decompilation is better. But there are some cases where decompilation has gotten worse. For lack of time (and interest) 3.0 bytecode suffered a hit. Possibly some code in the 3.x range did too. In time and with cleaner refactored code, this will come back. -Commit c90ff51 was a local maxiumum before, I started reworking the grammar to separate productions that were specific to loops versus those that are not in loops. -In the middle of that I added another grammar simplication to remove singleton productions of the form `sstmts-> stmts`. These were always was a bit ugly, and complicated output. +Commit c90ff51 was a local maximum before, I started reworking the grammar to separate productions that were specific to loops versus those that are not in loops. +In the middle of that I added another grammar simplification to remove singleton productions of the form `sstmts-> stmts`. These were always was a bit ugly, and complicated output. -At any rate if decompilation fails, you can try c90ff51. Or another decompiler. `unpyc37` is pretty good for 3.7. wibiti `uncompyle2` is great for 2.7. `pycdc` is mediocre for Python before 3.5 or so, and not that good for the most recent Python. Geerally these programs will give some sort of answer even if it isn't correct. +At any rate if decompilation fails, you can try c90ff51. Or another decompiler. `unpyc37` is pretty good for 3.7. wibiti `uncompyle2` is great for 2.7. `pycdc` is mediocre for Python before 3.5 or so, and not that good for the most recent Python. Generally these programs will give some sort of answer even if it isn't correct. decompyle3 isn't that good for 3.7 and worse for 3.8, but right now it does things no other Python decompiler like `unpyc37` or `pycdc` does. For example, `decompyle3` handles variable annotations. As always, the issue trackers for the various programs will give you a sense for what needs to be done. For now, I've given up on reporting issues in the other decompilers because there are already enough issues reported, and they are just not getting fixed anyway. @@ -213,7 +213,7 @@ indicate when an import contains a dotted import. Similarly, code for 3.7 `import .. as ` is basically the same as `from .. import`, the only difference is the target of the name changes to an "alias" in the former. As a result, the disambiguation is now done on the semantic -action side, rathero than in parsing grammar rules. +action side, rather than in parsing grammar rules. Some small specific fixes: @@ -246,13 +246,13 @@ versions better. This however comes with a big decompilation speed penalty. When we redo control flow this should go back to normal, but for now, accuracy is more important than speed. -Another `assert` transform rule was added. Parser rules to distingish +Another `assert` transform rule was added. Parser rules to distinguish `try/finally` in 3.8 were added and we are more stringent about what can be turned into an `assert`. There was some grammar cleanup here too. A number of small bugs were fixed, and some administrative changes to -make `make check-short` really be short, but check more throughly what +make `make check-short` really be short, but check more thoroughly what it checks. minimum xdis version needed was bumped to include in the newer 3.6-3.9 releases. See the `ChangeLog` for details. @@ -261,7 +261,7 @@ newer 3.6-3.9 releases. See the `ChangeLog` for details. ============================= The main focus in this release was more accurate decompilation especially -for 3.7 and 3.8. However there are some improvments to Python 2.x as well, +for 3.7 and 3.8. However there are some improvements to Python 2.x as well, including one of the long-standing problems of detecting the difference between `try ... ` and `try else ...`. @@ -269,11 +269,11 @@ With this release we now rebase Python 3.7 on off of a 3.7 base; This is also as it is (now) in decompyle3. This facilitates removing some of the cruft in control-flow detection in the 2.7 uncompyle2 base. -Alas, decompilation speed for 3.7 on is greatly increased. Hopefull +Alas, decompilation speed for 3.7 on is greatly increased. Hopefully this is temporary (cough, cough) until we can do a static control flow pass. -Finally, runing in 3.9-dev is tolerated. We can disassemble, but no parse tables yet. +Finally, running in 3.9-dev is tolerated. We can disassemble, but no parse tables yet. 3.5.1 2019-11-17 JNC @@ -566,7 +566,7 @@ function calls and definitions. - Misc pydisasm fixes - Weird comprehension bug seen via new loctraceback - Fix Python 3.5+ CALL_FUNCTION_VAR and BUILD_LIST_UNPACK in call; with this - we can can handle 3.5+ f(a, b, *c, *d, *e) now + we can handle 3.5+ f(a, b, *c, *d, *e) now 2.15.1 2018-02-05 ===================== @@ -661,7 +661,7 @@ Overall: better 3.6 decompiling and some much needed code refactoring and cleanu - Handle `EXTENDED_ARGS` better. While relevant to all Python versions it is most noticeable in version 3.6+ where in switching to wordcodes the size of operands has been reduced from 2^16 to 2^8. `JUMP` instruction then often need EXTENDED_ARGS. -- Refactor find_jump_targets() with via working of of instructions rather the bytecode array. +- Refactor find_jump_targets() with via working of instructions rather the bytecode array. - use `--weak-verify` more and additional fuzzing on verify() - fragment parser now ignores errors in nested function definitions; an parameter was added to assist here. Ignoring errors may be okay because the fragment parser often just needs, diff --git a/NEW_FEATURES.rst b/NEW_FEATURES.rst index 725c0cdd..e1873c2e 100644 --- a/NEW_FEATURES.rst +++ b/NEW_FEATURES.rst @@ -171,7 +171,7 @@ Expanding decompiler availability to multiple Python Versions -------------------------------------------------------------- Above we mention decompiling multiple versions of bytecode from a -single Python interpreter. We we talk about having the decompiler +single Python interpreter. We talk about having the decompiler runnable from multiple versions of Python, independent of the set of bytecode that the decompiler supports. @@ -185,7 +185,7 @@ implemented correctly. These also make excellent programs to check whether a program has decompiled correctly. Aside from this, debugging can be easier as well. To assist -understanding bytcode and single stepping it see `x-python +understanding bytecode and single stepping it see `x-python `_ and the debugger for it `trepan-xpy `_. diff --git a/README.rst b/README.rst index a2ba61e2..cb6b40cd 100644 --- a/README.rst +++ b/README.rst @@ -41,7 +41,7 @@ although compatible with the original intention, is yet a little bit different. See this_ for more information. Python fragment deparsing given an instruction offset is useful in -showing stack traces and can be encorporated into any program that +showing stack traces and can be incorporated into any program that wants to show a location in more detail than just a line number at runtime. This code can be also used when source-code information does not exist and there is just bytecode. Again, my debuggers make use of @@ -161,8 +161,8 @@ Python syntax changes, you should use this option if the bytecode is the right bytecode for the Python interpreter that will be checking the syntax. -You can also cross compare the results with either another version of -`uncompyle6` since there are are sometimes regressions in decompiling +You can also cross compare the results with another version of +`uncompyle6` since there are sometimes regressions in decompiling specific bytecode as the overall quality improves. For Python 3.7 and 3.8, the code in decompyle3_ is generally @@ -199,7 +199,7 @@ On the lower end of Python versions, decompilation seems pretty good although we don't have any automated testing in place for Python's distributed tests. Also, we don't have a Python interpreter for versions 1.6, and 2.0. -In the Python 3 series, Python support is is strongest around 3.4 or +In the Python 3 series, Python support is strongest around 3.4 or 3.3 and drops off as you move further away from those versions. Python 3.0 is weird in that it in some ways resembles 2.6 more than it does 3.1 or 2.7. Python 3.6 changes things drastically by using word codes diff --git a/test/simple_source/bug27+/04_try_tryelse.py b/test/simple_source/bug27+/04_try_tryelse.py index 366f3c6f..9ab39042 100644 --- a/test/simple_source/bug27+/04_try_tryelse.py +++ b/test/simple_source/bug27+/04_try_tryelse.py @@ -1,5 +1,5 @@ # From 2.7 test_normalize.py -# Bug has to to with finding the end of the tryelse block. I think thrown +# Bug has to do with finding the end of the tryelse block. I think thrown # off by the "continue". In instructions the COME_FROM for END_FINALLY # was at the wrong offset because some sort of "rtarget" was adjust. diff --git a/test/simple_source/bug36/02_kwargs.py b/test/simple_source/bug36/02_kwargs.py index 5b5af9e9..bb3b6ca3 100644 --- a/test/simple_source/bug36/02_kwargs.py +++ b/test/simple_source/bug36/02_kwargs.py @@ -5,7 +5,7 @@ def bug(self, j, a, b): self.parse_comment(a, b, report=3) # From 3.6 fnmatch.py -# Bug was precidence parenthesis around decorator +# Bug was precedence parenthesis around decorator import functools @functools.lru_cache(maxsize=256, typed=True) diff --git a/test/simple_source/stmts/11_return_val.py b/test/simple_source/stmts/11_return_val.py index c5076423..fc13d53b 100644 --- a/test/simple_source/stmts/11_return_val.py +++ b/test/simple_source/stmts/11_return_val.py @@ -1,6 +1,6 @@ # 2.5.6 decimal.py # Bug on 2.5 and 2.6 by incorrectly changing opcode to -# RETURN_VALUE to psuedo op: RETURN_END_IF +# RETURN_VALUE to pseudo op: RETURN_END_IF def _formatparam(param, value=None, quote=True): if value is not None and len(value) > 0: if isinstance(value, tuple): diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py index 9599e6cc..1220916e 100644 --- a/uncompyle6/parsers/parse35.py +++ b/uncompyle6/parsers/parse35.py @@ -114,7 +114,7 @@ class Python35Parser(Python34Parser): ifelsestmtl ::= testexpr c_stmts_opt jb_else else_suitel # 3.5 Has jump optimization which can route the end of an - # "if/then" back to to a loop just before an else. + # "if/then" back to a loop just before an else. jump_absolute_else ::= jb_else jump_absolute_else ::= CONTINUE ELSE diff --git a/uncompyle6/parsers/parse37.py b/uncompyle6/parsers/parse37.py index 5eff88a2..0affa33b 100644 --- a/uncompyle6/parsers/parse37.py +++ b/uncompyle6/parsers/parse37.py @@ -558,7 +558,7 @@ class Python37Parser(Python37BaseParser): ifelsestmtl ::= testexpr_cf c_stmts_opt jb_else else_suitel # 3.5 Has jump optimization which can route the end of an - # "if/then" back to to a loop just before an else. + # "if/then" back to a loop just before an else. jump_absolute_else ::= jb_else jump_absolute_else ::= CONTINUE ELSE diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index edd009e7..a2783911 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -2093,7 +2093,7 @@ def code_deparse( ) # Just when you think we've forgotten about what we - # were supposed to to: Generate source from the Syntax ree! + # were supposed to do: Generate source from the Syntax tree! deparsed.gen_source(deparsed.ast, co.co_name, customize) deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text)) From d249c522a702f622cfd6678fda00ce3dc9fb615e Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 3 Feb 2024 12:37:48 -0500 Subject: [PATCH 13/24] Fix up linemap option --- uncompyle6/main.py | 22 +++++----- uncompyle6/semantics/fragments.py | 19 +++++---- uncompyle6/semantics/linemap.py | 69 ++++++++++++++++--------------- uncompyle6/semantics/pysource.py | 25 ++++++----- 4 files changed, 73 insertions(+), 62 deletions(-) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index f7da7430..4285736c 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2023 Rocky Bernstein +# Copyright (C) 2018-2024 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -35,12 +35,15 @@ from uncompyle6.version import __version__ def _get_outstream(outfile: str) -> Any: - dir = os.path.dirname(outfile) + """ + Return an opened output file descriptor for ``outfile``. + """ + dir_name = os.path.dirname(outfile) failed_file = outfile + "_failed" if os.path.exists(failed_file): os.remove(failed_file) try: - os.makedirs(dir) + os.makedirs(dir_name) except OSError: pass return open(outfile, mode="w", encoding="utf-8") @@ -50,7 +53,7 @@ def decompile( co, bytecode_version: Tuple[int] = PYTHON_VERSION_TRIPLE, out=sys.stdout, - showasm: Optional[str]=None, + showasm: Optional[str] = None, showast={}, timestamp=None, showgrammar=False, @@ -118,13 +121,12 @@ def decompile( if isinstance(mapstream, str): mapstream = _get_outstream(mapstream) + debug_opts = {"asm": showasm, "tree": showast, "grammar": showgrammar} + deparsed = deparse_code_with_map( - bytecode_version, - co, - out, - showasm, - showast, - showgrammar, + co=co, + out=out, + version=bytecode_version, code_objects=code_objects, is_pypy=is_pypy, ) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index a2783911..40dfbcc0 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2019, 2021-2023 by Rocky Bernstein +# Copyright (c) 2015-2019, 2021-2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -75,7 +75,6 @@ from xdis import iscode from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE import uncompyle6.parser as python_parser -from uncompyle6 import parser from uncompyle6.parsers.treenode import SyntaxTree from uncompyle6.scanner import Code, Token, get_scanner from uncompyle6.semantics import pysource @@ -89,7 +88,7 @@ from uncompyle6.semantics.consts import ( TABLE_DIRECT, escape, ) -from uncompyle6.semantics.pysource import ParserError, StringIO +from uncompyle6.semantics.pysource import DEFAULT_DEBUG_OPTS, ParserError, StringIO from uncompyle6.show import maybe_show_asm, maybe_show_tree NodeInfo = namedtuple("NodeInfo", "node start finish") @@ -1118,7 +1117,13 @@ class FragmentsWalker(pysource.SourceWalker, object): n_classdefdeco2 = n_classdef def gen_source( - self, ast, name, customize, is_lambda=False, returnNone=False, debug_opts=None + self, + ast, + name, + customize, + is_lambda=False, + returnNone=False, + debug_opts=DEFAULT_DEBUG_OPTS, ): """convert parse tree to Python source code""" @@ -1204,7 +1209,7 @@ class FragmentsWalker(pysource.SourceWalker, object): self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index self.p.opc = self.scanner.opc - ast = parser.parse(self.p, tokens, customize, code) + ast = python_parser.parse(self.p, tokens, customize, code) self.p.insts = p_insts except (python_parser.ParserError, AssertionError) as e: raise ParserError(e, tokens, {}) @@ -2065,11 +2070,11 @@ def code_deparse( # Build Syntax Tree from tokenized and massaged disassembly. # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast) - show_ast = debug_opts.get("ast", None) + show_tree = debug_opts.get("tree", False) deparsed = walker( version, scanner, - showast=show_ast, + showast=show_tree, debug_parser=debug_parser, compile_mode=compile_mode, is_pypy=is_pypy, diff --git a/uncompyle6/semantics/linemap.py b/uncompyle6/semantics/linemap.py index 23eafdae..3a447afb 100644 --- a/uncompyle6/semantics/linemap.py +++ b/uncompyle6/semantics/linemap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 by Rocky Bernstein +# Copyright (c) 2018, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -12,96 +12,97 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . + +from uncompyle6.semantics.fragments import FragmentsWalker, code_deparse as fragments_code_deparse from uncompyle6.semantics.pysource import SourceWalker, code_deparse -import uncompyle6.semantics.fragments as fragments + # FIXME: does this handle nested code, and lambda properly class LineMapWalker(SourceWalker): def __init__(self, *args, **kwargs): - super(LineMapWalker, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.source_linemap = {} self.current_line_number = 1 def write(self, *data): """Augment write routine to keep track of current line""" - for l in data: + for line in data: ## print("XXX write: '%s'" % l) - for i in str(l): - if i == '\n': + for i in str(line): + if i == "\n": self.current_line_number += 1 pass pass pass - return super(LineMapWalker, self).write(*data) + return super().write(*data) # Note n_expr needs treatment too def default(self, node): """Augment write default routine to record line number changes""" - if hasattr(node, 'linestart'): + if hasattr(node, "linestart"): if node.linestart: self.source_linemap[self.current_line_number] = node.linestart - return super(LineMapWalker, self).default(node) + return super().default(node) def n_LOAD_CONST(self, node): - if hasattr(node, 'linestart'): + if hasattr(node, "linestart"): if node.linestart: self.source_linemap[self.current_line_number] = node.linestart - return super(LineMapWalker, self).n_LOAD_CONST(node) + return super().n_LOAD_CONST(node) -class LineMapFragmentWalker(fragments.FragmentsWalker, LineMapWalker): +class LineMapFragmentWalker(LineMapWalker, FragmentsWalker): def __init__(self, *args, **kwargs): - super(LineMapFragmentWalker, self).__init__(*args, **kwargs) - self.source_linemap = {} - self.current_line_number = 0 + super().__init__(*args, **kwargs) + def deparse_code_with_map(*args, **kwargs): """ Like deparse_code but saves line number correspondences. Deprecated. Use code_deparse_with_map """ - kwargs['walker'] = LineMapWalker + kwargs["walker"] = LineMapWalker return code_deparse(*args, **kwargs) + def code_deparse_with_map(*args, **kwargs): """ Like code_deparse but saves line number correspondences. """ - kwargs['walker'] = LineMapWalker + kwargs["walker"] = LineMapWalker return code_deparse(*args, **kwargs) -def deparse_code_with_fragments_and_map(*args, **kwargs): - """ - Like deparse_code_with_map but saves fragments. - Deprecated. Use code_deparse_with_fragments_and_map - """ - kwargs['walker'] = LineMapFragmentWalker - return fragments.deparse_code(*args, **kwargs) def code_deparse_with_fragments_and_map(*args, **kwargs): """ Like code_deparse_with_map but saves fragments. """ - kwargs['walker'] = LineMapFragmentWalker - return fragments.code_deparse(*args, **kwargs) + kwargs["walker"] = LineMapFragmentWalker + return fragments_code_deparse(*args, **kwargs) + + +if __name__ == "__main__": -if __name__ == '__main__': def deparse_test(co): "This is a docstring" deparsed = code_deparse_with_map(co) - a = 1; b = 2 + a = 1 + b = 2 print("\n") - linemap = [(line_no, deparsed.source_linemap[line_no]) - for line_no in - sorted(deparsed.source_linemap.keys())] + linemap = [ + (line_no, deparsed.source_linemap[line_no]) + for line_no in sorted(deparsed.source_linemap.keys()) + ] print(linemap) deparsed = code_deparse_with_fragments_and_map(co) print("\n") - linemap2 = [(line_no, deparsed.source_linemap[line_no]) - for line_no in - sorted(deparsed.source_linemap.keys())] + linemap2 = [ + (line_no, deparsed.source_linemap[line_no]) + for line_no in sorted(deparsed.source_linemap.keys()) + ] print(linemap2) # assert linemap == linemap2 return + deparse_test(deparse_test.__code__) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index b52f90f9..a43075c9 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2023 by Rocky Bernstein +# Copyright (c) 2015-2024 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -196,6 +196,10 @@ class SourceWalkerError(Exception): class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): + """ + Class to traverses a Parse Tree of the bytecode instruction built from parsing to produce some sort of source text. + The Parse tree may be turned an Abstract Syntax tree as an intermediate step. + """ stacked_params = ("f", "indent", "is_lambda", "_globals") def __init__( @@ -245,24 +249,24 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): is_pypy=is_pypy, ) - # Initialize p_lambda on demand - self.p_lambda = None - - self.treeTransform = TreeTransform(version=self.version, show_ast=showast) + self.ast_errors = [] + self.currentclass = None + self.classes = [] self.debug_parser = dict(debug_parser) - self.showast = showast + # Initialize p_lambda on demand + self.line_number = 1 + self.linemap = {} + self.p_lambda = None self.params = params self.param_stack = [] self.ERROR = None self.prec = 100 self.return_none = False self.mod_globs = set() - self.currentclass = None - self.classes = [] + self.showast = showast self.pending_newlines = 0 self.linestarts = linestarts - self.line_number = 1 - self.ast_errors = [] + self.treeTransform = TreeTransform(version=self.version, show_ast=showast) # FIXME: have p.insts update in a better way # modularity is broken here self.insts = scanner.insts @@ -1257,7 +1261,6 @@ def code_deparse( assert iscode(co) - if version is None: version = PYTHON_VERSION_TRIPLE From 9839cfe93b7f31367507d165f532bce582818c7b Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 3 Feb 2024 12:44:30 -0500 Subject: [PATCH 14/24] Add pre-commit hook --- .pre-commit-config.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..b51fd593 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +default_language_version: + python: python +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-merge-conflict + - id: debug-statements + stages: [commit] + - id: end-of-file-fixer + stages: [commit] +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + stages: [commit] +- repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black + language_version: python3 + stages: [commit] From db6c7159f83b7ded526e7fd5c32f7e3917200108 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 3 Feb 2024 14:47:08 -0500 Subject: [PATCH 15/24] lint --- .gitignore | 3 +- test/test_pythonlib.py | 2 +- uncompyle6/main.py | 44 +++++---- uncompyle6/semantics/linemap.py | 14 +-- uncompyle6/semantics/pysource.py | 148 ++++++++++++++++++------------- 5 files changed, 119 insertions(+), 92 deletions(-) diff --git a/.gitignore b/.gitignore index 34a0cc62..195e4cfa 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.pyo *_dis *~ +.mypy_cache /.cache /.eggs /.hypothesis @@ -10,7 +11,6 @@ /.pytest_cache /.python-version /.tox -.mypy_cache /.venv* /README /__pkginfo__.pyc @@ -20,6 +20,7 @@ /tmp /uncompyle6.egg-info /unpyc +/venv ChangeLog __pycache__ build diff --git a/test/test_pythonlib.py b/test/test_pythonlib.py index 6d06e1d2..99d2f90b 100755 --- a/test/test_pythonlib.py +++ b/test/test_pythonlib.py @@ -216,7 +216,7 @@ def do_tests(src_dir, obj_patterns, target_dir, opts): print("Output directory: ", target_dir) try: _, _, failed_files, failed_verify = main( - src_dir, target_dir, files, [], do_verify=opts["do_verify"] + src_dir, target_dir, files, [] ) if failed_files != 0: sys.exit(2) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 4285736c..44f23380 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -90,7 +90,7 @@ def decompile( run_pypy_str = "PyPy " if IS_PYPY else "" sys_version_lines = sys.version.split("\n") if source_encoding: - write("# -*- coding: %s -*-" % source_encoding) + write(f"# -*- coding: {source_encoding} -*-") write( "# uncompyle6 version %s\n" "# %sPython bytecode version base %s%s\n# Decompiled from: %sPython %s" @@ -104,9 +104,9 @@ def decompile( ) ) if co.co_filename: - write("# Embedded file name: %s" % co.co_filename) + write(f"# Embedded file name: {co.co_filename}") if timestamp: - write("# Compiled at: %s" % datetime.datetime.fromtimestamp(timestamp)) + write(f"# Compiled at: {datetime.datetime.fromtimestamp(timestamp)}") if source_size: write("# Size of source mod 2**32: %d bytes" % source_size) @@ -129,13 +129,14 @@ def decompile( version=bytecode_version, code_objects=code_objects, is_pypy=is_pypy, + debug_opts=debug_opts, ) header_count = 3 + len(sys_version_lines) linemap = [ (line_no, deparsed.source_linemap[line_no] + header_count) for line_no in sorted(deparsed.source_linemap.keys()) ] - mapstream.write("\n\n# %s\n" % linemap) + mapstream.write(f"\n\n# {linemap}\n") else: if do_fragments: deparse_fn = code_deparse_fragments @@ -163,11 +164,11 @@ def compile_file(source_path: str) -> str: basename = source_path if hasattr(sys, "pypy_version_info"): - bytecode_path = "%s-pypy%s.pyc" % (basename, version_tuple_to_str()) + bytecode_path = f"{basename}-pypy{version_tuple_to_str()}.pyc" else: - bytecode_path = "%s-%s.pyc" % (basename, version_tuple_to_str()) + bytecode_path = f"{basename}-{version_tuple_to_str()}.pyc" - print("compiling %s to %s" % (source_path, bytecode_path)) + print(f"compiling {source_path} to {bytecode_path}") py_compile.compile(source_path, bytecode_path, "exec") return bytecode_path @@ -232,7 +233,6 @@ def decompile_file( compile_mode="exec", ) ] - co = None return deparsed @@ -245,7 +245,6 @@ def main( outfile=None, showasm: Optional[str] = None, showast={}, - do_verify=False, showgrammar=False, source_encoding=None, raise_on_error=False, @@ -274,7 +273,7 @@ def main( infile = os.path.join(in_base, filename) # print("XXX", infile) if not os.path.exists(infile): - sys.stderr.write("File '%s' doesn't exist. Skipped\n" % infile) + sys.stderr.write(f"File '{infile}' doesn't exist. Skipped\n") continue if do_linemaps: @@ -322,13 +321,13 @@ def main( ): if e[0] != last_mod: line = "=" * len(e[0]) - outstream.write("%s\n%s\n%s\n" % (line, e[0], line)) + outstream.write(f"{line}\n{e[0]}\n{line}\n") last_mod = e[0] info = offsets[e] - extractInfo = d.extract_node_info(info) - outstream.write("%s" % info.node.format().strip() + "\n") - outstream.write(extractInfo.selectedLine + "\n") - outstream.write(extractInfo.markerLine + "\n\n") + extract_info = d.extract_node_info(info) + outstream.write(f"{info.node.format().strip()}" + "\n") + outstream.write(extract_info.selectedLine + "\n") + outstream.write(extract_info.markerLine + "\n\n") pass pass tot_files += 1 @@ -349,14 +348,14 @@ def main( if str(e).startswith("Unsupported Python"): sys.stdout.write("\n") sys.stderr.write( - "\n# Unsupported bytecode in file %s\n# %s\n" % (infile, e) + f"\n# Unsupported bytecode in file {infile}\n# {e}\n" ) else: if outfile: outstream.close() os.remove(outfile) sys.stdout.write("\n") - sys.stderr.write("\nLast file: %s " % (infile)) + sys.stderr.write(f"\nLast file: {infile} ") raise # except: @@ -376,7 +375,7 @@ def main( okay_files += 1 if not current_outfile: mess = "\n# okay decompiling" - # mem_usage = __memUsage() + # mem_usage = __mem_usage() print(mess, infile) if current_outfile: sys.stdout.write( @@ -384,7 +383,6 @@ def main( % ( infile, status_msg( - do_verify, tot_files, okay_files, failed_files, @@ -405,14 +403,14 @@ def main( except Exception: pass pass - return (tot_files, okay_files, failed_files, verify_failed_files) + return tot_files, okay_files, failed_files, verify_failed_files # ---- main ---- if sys.platform.startswith("linux") and os.uname()[2][:2] in ["2.", "3.", "4."]: - def __memUsage(): + def __mem_sage(): mi = open("/proc/self/stat", "r") mu = mi.readline().split()[22] mi.close() @@ -420,11 +418,11 @@ if sys.platform.startswith("linux") and os.uname()[2][:2] in ["2.", "3.", "4."]: else: - def __memUsage(): + def __mem_usage(): return "" -def status_msg(do_verify, tot_files, okay_files, failed_files, verify_failed_files): +def status_msg(tot_files, okay_files, failed_files, verify_failed_files): if tot_files == 1: if failed_files: return "\n# decompile failed" diff --git a/uncompyle6/semantics/linemap.py b/uncompyle6/semantics/linemap.py index 3a447afb..1c760b43 100644 --- a/uncompyle6/semantics/linemap.py +++ b/uncompyle6/semantics/linemap.py @@ -1,5 +1,6 @@ # Copyright (c) 2018, 2024 by Rocky Bernstein # + # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or @@ -13,7 +14,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from uncompyle6.semantics.fragments import FragmentsWalker, code_deparse as fragments_code_deparse +from uncompyle6.semantics.fragments import ( + FragmentsWalker, + code_deparse as fragments_code_deparse, +) from uncompyle6.semantics.pysource import SourceWalker, code_deparse @@ -25,9 +29,9 @@ class LineMapWalker(SourceWalker): self.current_line_number = 1 def write(self, *data): - """Augment write routine to keep track of current line""" + """Augment write routine to keep track of current line.""" for line in data: - ## print("XXX write: '%s'" % l) + # print(f"XXX write: '{line}'") for i in str(line): if i == "\n": self.current_line_number += 1 @@ -39,7 +43,7 @@ class LineMapWalker(SourceWalker): # Note n_expr needs treatment too def default(self, node): - """Augment write default routine to record line number changes""" + """Augment default-write routine to record line number changes.""" if hasattr(node, "linestart"): if node.linestart: self.source_linemap[self.current_line_number] = node.linestart @@ -85,7 +89,7 @@ def code_deparse_with_fragments_and_map(*args, **kwargs): if __name__ == "__main__": def deparse_test(co): - "This is a docstring" + """This is a docstring""" deparsed = code_deparse_with_map(co) a = 1 b = 2 diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index a43075c9..5a6566fe 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -141,17 +141,25 @@ from uncompyle6.parsers.treenode import SyntaxTree from uncompyle6.scanner import Code, get_scanner from uncompyle6.scanners.tok import Token from uncompyle6.semantics.check_ast import checker -from uncompyle6.semantics.consts import (ASSIGN_TUPLE_PARAM, - INDENT_PER_LEVEL, LINE_LENGTH, MAP, - MAP_DIRECT, NAME_MODULE, NONE, PASS, - PRECEDENCE, RETURN_LOCALS, - RETURN_NONE, TAB, TABLE_R, escape) +from uncompyle6.semantics.consts import ( + ASSIGN_TUPLE_PARAM, + INDENT_PER_LEVEL, + LINE_LENGTH, + MAP, + MAP_DIRECT, + NAME_MODULE, + NONE, + PASS, + PRECEDENCE, + RETURN_LOCALS, + RETURN_NONE, + TAB, + TABLE_R, + escape, +) from uncompyle6.semantics.customize import customize_for_version from uncompyle6.semantics.gencomp import ComprehensionMixin -from uncompyle6.semantics.helper import ( - find_globals_and_nonlocals, - print_docstring -) +from uncompyle6.semantics.helper import find_globals_and_nonlocals, print_docstring from uncompyle6.semantics.make_function1 import make_function1 from uncompyle6.semantics.make_function2 import make_function2 from uncompyle6.semantics.make_function3 import make_function3 @@ -162,9 +170,11 @@ from uncompyle6.semantics.transform import TreeTransform, is_docstring from uncompyle6.show import maybe_show_tree from uncompyle6.util import better_repr -DEFAULT_DEBUG_OPTS = {"asm": False, "tree": False, "grammar": False} -def unicode(x): return x +def unicode(x): + return x + + from io import StringIO PARSER_DEFAULT_DEBUG = { @@ -200,6 +210,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): Class to traverses a Parse Tree of the bytecode instruction built from parsing to produce some sort of source text. The Parse tree may be turned an Abstract Syntax tree as an intermediate step. """ + stacked_params = ("f", "indent", "is_lambda", "_globals") def __init__( @@ -288,7 +299,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.in_format_string = None # hide_internal suppresses displaying the additional instructions that sometimes - # exist in code but but were not written in the source code. + # exist in code but were not written in the source code. # An example is: # __module__ = __name__ self.hide_internal = True @@ -355,7 +366,6 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): indent += " " i = 0 for node in ast: - if hasattr(node, "__repr1__"): if enumerate_children: child = self.str_with_template1(node, indent, i) @@ -375,9 +385,9 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): i += 1 return rv - def indent_if_source_nl(self, line_number, indent): + def indent_if_source_nl(self, line_number: int, indent: int): if line_number != self.line_number: - self.write("\n" + self.indent + INDENT_PER_LEVEL[:-1]) + self.write("\n" + indent + INDENT_PER_LEVEL[:-1]) return self.line_number f = property( @@ -685,7 +695,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def template_engine(self, entry, startnode): """The format template interpretation engine. See the comment at the - beginning of this module for the how we interpret format + beginning of this module for how we interpret format specifications such as %c, %C, and so on. """ @@ -729,20 +739,31 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): if isinstance(index[1], str): # if node[index[0]] != index[1]: # from trepan.api import debug; debug() - assert node[index[0]] == index[1], ( - "at %s[%d], expected '%s' node; got '%s'" - % (node.kind, arg, index[1], node[index[0]].kind,) + assert ( + node[index[0]] == index[1] + ), "at %s[%d], expected '%s' node; got '%s'" % ( + node.kind, + arg, + index[1], + node[index[0]].kind, ) else: - assert node[index[0]] in index[1], ( - "at %s[%d], expected to be in '%s' node; got '%s'" - % (node.kind, arg, index[1], node[index[0]].kind,) + assert ( + node[index[0]] in index[1] + ), "at %s[%d], expected to be in '%s' node; got '%s'" % ( + node.kind, + arg, + index[1], + node[index[0]].kind, ) index = index[0] - assert isinstance(index, int), ( - "at %s[%d], %s should be int or tuple" - % (node.kind, arg, type(index),) + assert isinstance( + index, int + ), "at %s[%d], %s should be int or tuple" % ( + node.kind, + arg, + type(index), ) try: @@ -765,14 +786,22 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): if len(tup) == 3: (index, nonterm_name, self.prec) = tup if isinstance(tup[1], str): - assert node[index] == nonterm_name, ( - "at %s[%d], expected '%s' node; got '%s'" - % (node.kind, arg, nonterm_name, node[index].kind,) + assert ( + node[index] == nonterm_name + ), "at %s[%d], expected '%s' node; got '%s'" % ( + node.kind, + arg, + nonterm_name, + node[index].kind, ) else: - assert node[tup[0]] in tup[1], ( - "at %s[%d], expected to be in '%s' node; got '%s'" - % (node.kind, arg, index[1], node[index[0]].kind,) + assert ( + node[tup[0]] in tup[1] + ), "at %s[%d], expected to be in '%s' node; got '%s'" % ( + node.kind, + arg, + index[1], + node[index[0]].kind, ) else: @@ -885,52 +914,51 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): "CALL_FUNCTION_VAR_KW", "CALL_FUNCTION_KW", ): - # FIXME: handle everything in customize. # Right now, some of this is here, and some in that. if v == 0: - str = "%c(%C" # '%C' is a dummy here ... - p2 = (0, 0, None) # .. because of the None in this + template_str = "%c(%C" # '%C' is a dummy here ... + p2 = (0, 0, None) # because of the None in this else: - str = "%c(%C, " + template_str = "%c(%C, " p2 = (1, -2, ", ") if op == "CALL_FUNCTION_VAR": # Python 3.5 only puts optional args (the VAR part) # the lowest down the stack if self.version == (3, 5): - if str == "%c(%C, ": + if template_str == "%c(%C, ": entry = ("%c(*%C, %c)", 0, p2, -2) - elif str == "%c(%C": + elif template_str == "%c(%C": entry = ("%c(*%C)", 0, (1, 100, "")) elif self.version == (3, 4): # CALL_FUNCTION_VAR's top element of the stack contains # the variable argument list if v == 0: - str = "%c(*%c)" - entry = (str, 0, -2) + template_str = "%c(*%c)" + entry = (template_str, 0, -2) else: - str = "%c(%C, *%c)" - entry = (str, 0, p2, -2) + template_str = "%c(%C, *%c)" + entry = (template_str, 0, p2, -2) else: - str += "*%c)" - entry = (str, 0, p2, -2) + template_str += "*%c)" + entry = (template_str, 0, p2, -2) elif op == "CALL_FUNCTION_KW": - str += "**%c)" - entry = (str, 0, p2, -2) + template_str += "**%c)" + entry = (template_str, 0, p2, -2) elif op == "CALL_FUNCTION_VAR_KW": - str += "*%c, **%c)" + template_str += "*%c, **%c)" # Python 3.5 only puts optional args (the VAR part) # the lowest down the stack na = v & 0xFF # positional parameters if self.version == (3, 5) and na == 0: if p2[2]: p2 = (2, -2, ", ") - entry = (str, 0, p2, 1, -2) + entry = (template_str, 0, p2, 1, -2) else: if p2[2]: p2 = (1, -3, ", ") - entry = (str, 0, p2, -3, -2) + entry = (template_str, 0, p2, -3, -2) pass else: assert False, "Unhandled CALL_FUNCTION %s" % op @@ -1014,7 +1042,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): if ast[0] == "sstmt": ast[0] = ast[0][0] first_stmt = ast[0] - except: + except Exception: pass try: @@ -1023,7 +1051,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): del ast[0] first_stmt = ast[0] pass - except: + except Exception: pass have_qualname = False @@ -1035,17 +1063,15 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): if self.version < (3, 0): # Should we ditch this in favor of the "else" case? qualname = ".".join(self.classes) - QUAL_NAME = SyntaxTree( + qual_name_tree = SyntaxTree( "assign", [ SyntaxTree("expr", [Token("LOAD_CONST", pattr=qualname)]), - SyntaxTree( - "store", [Token("STORE_NAME", pattr="__qualname__")] - ), + SyntaxTree("store", [Token("STORE_NAME", pattr="__qualname__")]), ], ) # FIXME: is this right now that we've redone the grammar? - have_qualname = ast[0] == QUAL_NAME + have_qualname = ast[0] == qual_name_tree else: # Python 3.4+ has constants like 'cmp_to_key..K' # which are not simple classes like the < 3 case. @@ -1057,7 +1083,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): and first_stmt[1][0] == Token("STORE_NAME", pattr="__qualname__") ): have_qualname = True - except: + except Exception: pass if have_qualname: @@ -1078,7 +1104,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): try: # FIXME: Is there an extra [0]? docstring = ast[i][0][0][0][0].pattr - except: + except Exception: docstring = code.co_consts[0] if print_docstring(self, indent, docstring): self.println() @@ -1104,7 +1130,6 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): # else: # print stmt[-1] - globals, nonlocals = find_globals_and_nonlocals( ast, set(), set(), code, self.version ) @@ -1148,7 +1173,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): else: self.customize(customize) self.text = self.traverse(ast, is_lambda=is_lambda) - # In a formatted string using "lambda', we should not add "\n". + # In a formatted string using "lambda", we should not add "\n". # For example in: # f'{(lambda x:x)("8")!r}' # Adding a "\n" after "lambda x: x" will give an error message: @@ -1167,7 +1192,6 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): noneInNames=False, is_top_level_module=False, ): - # FIXME: DRY with fragments.py # assert isinstance(tokens[0], Token) @@ -1298,7 +1322,7 @@ def code_deparse( is_top_level_module=is_top_level_module, ) - #### XXX workaround for profiling + # XXX workaround for profiling if deparsed.ast is None: return None @@ -1406,7 +1430,7 @@ def deparse_code2str( if __name__ == "__main__": def deparse_test(co): - "This is a docstring" + """This is a docstring""" s = deparse_code2str(co) # s = deparse_code2str(co, debug_opts={"asm": "after", "tree": {'before': False, 'after': False}}) print(s) From 1e95ebd5f6adf95192bc14043531047e02e05236 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 3 Feb 2024 14:49:56 -0500 Subject: [PATCH 16/24] Bump 3.8 version to latest --- admin-tools/setup-master.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/admin-tools/setup-master.sh b/admin-tools/setup-master.sh index 181f857e..706e81fd 100755 --- a/admin-tools/setup-master.sh +++ b/admin-tools/setup-master.sh @@ -1,5 +1,5 @@ #!/bin/bash -PYTHON_VERSION=3.8.17 +PYTHON_VERSION=3.8.18 function checkout_version { local repo=$1 From 5f29d14608c2aceb160ba51a4deef647dd65393f Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 3 Feb 2024 15:08:58 -0500 Subject: [PATCH 17/24] Fix --linemap option, yet again. --- uncompyle6/main.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 44f23380..374980b9 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -121,7 +121,7 @@ def decompile( if isinstance(mapstream, str): mapstream = _get_outstream(mapstream) - debug_opts = {"asm": showasm, "tree": showast, "grammar": showgrammar} + debug_opts = {"asm": showasm, "tree": showast, "grammar": grammar} deparsed = deparse_code_with_map( co=co, @@ -347,9 +347,7 @@ def main( sys.stdout.write(f"\n{str(e)}\n") if str(e).startswith("Unsupported Python"): sys.stdout.write("\n") - sys.stderr.write( - f"\n# Unsupported bytecode in file {infile}\n# {e}\n" - ) + sys.stderr.write(f"\n# Unsupported bytecode in file {infile}\n# {e}\n") else: if outfile: outstream.close() From f7caf9b675a8a0fc7dfb89c9fcc8dfbe027d2cbd Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 3 Feb 2024 15:15:55 -0500 Subject: [PATCH 18/24] Remove strayh blank line --- uncompyle6/semantics/linemap.py | 1 - 1 file changed, 1 deletion(-) diff --git a/uncompyle6/semantics/linemap.py b/uncompyle6/semantics/linemap.py index 1c760b43..fa311b2e 100644 --- a/uncompyle6/semantics/linemap.py +++ b/uncompyle6/semantics/linemap.py @@ -1,6 +1,5 @@ # Copyright (c) 2018, 2024 by Rocky Bernstein # - # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or From 9772454a3b9716aee37f91b12e2f137d1a08bc04 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 3 Feb 2024 15:28:48 -0500 Subject: [PATCH 19/24] Sync fragments with decompyle3 --- uncompyle6/semantics/fragments.py | 73 ++++++++++++++++++------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 40dfbcc0..90a2ee57 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -64,7 +64,6 @@ The node position 0 will be associated with "import". # FIXME: DRY code with pysource import re -import sys from bisect import bisect_right from collections import namedtuple from typing import Optional @@ -88,7 +87,12 @@ from uncompyle6.semantics.consts import ( TABLE_DIRECT, escape, ) -from uncompyle6.semantics.pysource import DEFAULT_DEBUG_OPTS, ParserError, StringIO +from uncompyle6.semantics.pysource import ( + DEFAULT_DEBUG_OPTS, + TREE_DEFAULT_DEBUG, + ParserError, + StringIO, +) from uncompyle6.show import maybe_show_asm, maybe_show_tree NodeInfo = namedtuple("NodeInfo", "node start finish") @@ -150,12 +154,13 @@ class FragmentsWalker(pysource.SourceWalker, object): def __init__( self, - version, + version: tuple, scanner, - showast=False, + showast=TREE_DEFAULT_DEBUG, debug_parser=PARSER_DEFAULT_DEBUG, compile_mode="exec", - is_pypy=False, + is_pypy=IS_PYPY, + linestarts={}, tolerate_errors=True, ): pysource.SourceWalker.__init__( @@ -167,6 +172,7 @@ class FragmentsWalker(pysource.SourceWalker, object): debug_parser=debug_parser, compile_mode=compile_mode, is_pypy=is_pypy, + linestarts=linestarts, tolerate_errors=tolerate_errors, ) @@ -657,7 +663,7 @@ class FragmentsWalker(pysource.SourceWalker, object): n = ast[iter_index] - assert n == "comp_iter" + assert n == "comp_iter", n.kind # Find the comprehension body. It is the inner-most # node that is not list_.. . while n == "comp_iter": # list_iter @@ -716,7 +722,7 @@ class FragmentsWalker(pysource.SourceWalker, object): assert iscode(code), node[code_index] code_name = code.co_name - code = Code(code, self.scanner, self.currentclass) + code = Code(code, self.scanner, self.currentclass, self.debug_opts["asm"]) ast = self.build_ast(code._tokens, code._customize, code) @@ -1063,13 +1069,17 @@ class FragmentsWalker(pysource.SourceWalker, object): # Python 3.2 works like this subclass = load_closure[-2].attr else: - raise "Internal Error n_classdef: cannot find class body" + raise RuntimeError( + "Internal Error n_classdef: cannot find class body" + ) if hasattr(buildclass[3], "__len__"): subclass_info = buildclass[3] elif hasattr(buildclass[2], "__len__"): subclass_info = buildclass[2] else: - raise "Internal Error n_classdef: cannot superclass name" + raise RuntimeError( + "Internal Error n_classdef: cannot superclass name" + ) else: subclass = buildclass[1][0].attr subclass_info = node[0] @@ -1570,19 +1580,19 @@ class FragmentsWalker(pysource.SourceWalker, object): if node[0].kind.startswith("kvlist"): # Python 3.5+ style key/value list in dict kv_node = node[0] - l = list(kv_node) - length = len(l) + ll = list(kv_node) + length = len(ll) if kv_node[-1].kind.startswith("BUILD_MAP"): length -= 1 i = 0 while i < length: self.write(sep) - name = self.traverse(l[i], indent="") - l[i].parent = kv_node - l[i + 1].parent = kv_node + name = self.traverse(ll[i], indent="") + ll[i].parent = kv_node + ll[i + 1].parent = kv_node self.write(name, ": ") value = self.traverse( - l[i + 1], indent=self.indent + (len(name) + 2) * " " + ll[i + 1], indent=self.indent + (len(name) + 2) * " " ) self.write(sep, name, ": ", value) sep = line_seperator @@ -1592,25 +1602,25 @@ class FragmentsWalker(pysource.SourceWalker, object): elif len(node) > 1 and node[1].kind.startswith("kvlist"): # Python 3.0..3.4 style key/value list in dict kv_node = node[1] - l = list(kv_node) - if len(l) > 0 and l[0].kind == "kv3": + ll = list(kv_node) + if len(ll) > 0 and ll[0].kind == "kv3": # Python 3.2 does this kv_node = node[1][0] - l = list(kv_node) + ll = list(kv_node) i = 0 - while i < len(l): - l[i].parent = kv_node - l[i + 1].parent = kv_node + while i < len(ll): + ll[i].parent = kv_node + ll[i + 1].parent = kv_node key_start = len(self.f.getvalue()) + len(sep) - name = self.traverse(l[i + 1], indent="") + name = self.traverse(ll[i + 1], indent="") key_finish = key_start + len(name) val_start = key_finish + 2 value = self.traverse( - l[i], indent=self.indent + (len(name) + 2) * " " + ll[i], indent=self.indent + (len(name) + 2) * " " ) self.write(sep, name, ": ", value) - self.set_pos_info_recurse(l[i + 1], key_start, key_finish) - self.set_pos_info_recurse(l[i], val_start, val_start + len(value)) + self.set_pos_info_recurse(ll[i + 1], key_start, key_finish) + self.set_pos_info_recurse(ll[i], val_start, val_start + len(value)) sep = line_seperator i += 3 pass @@ -1814,7 +1824,7 @@ class FragmentsWalker(pysource.SourceWalker, object): if m.group("child"): node = node[int(m.group("child"))] node.parent = startnode - except: + except Exception: print(node.__dict__) raise @@ -1951,7 +1961,7 @@ class FragmentsWalker(pysource.SourceWalker, object): start = len(self.f.getvalue()) self.write(eval(expr, d, d)) self.set_pos_info(node, start, len(self.f.getvalue())) - except: + except Exception: print(node) raise m = escape.search(fmt, i) @@ -2001,7 +2011,7 @@ def deparse_code( showgrammar=False, code_objects={}, compile_mode="exec", - is_pypy=None, + is_pypy=IS_PYPY, walker=FragmentsWalker, ): debug_opts = {"asm": showasm, "ast": showast, "grammar": showgrammar} @@ -2054,7 +2064,7 @@ def code_deparse( is_pypy = IS_PYPY # store final output stream for case of error - scanner = get_scanner(version, is_pypy=is_pypy) + scanner = get_scanner(version, is_pypy=is_pypy, show_asm=debug_opts["asm"]) show_asm = debug_opts.get("asm", None) tokens, customize = scanner.ingest(co, code_objects=code_objects, show_asm=show_asm) @@ -2078,6 +2088,7 @@ def code_deparse( debug_parser=debug_parser, compile_mode=compile_mode, is_pypy=is_pypy, + linestarts=linestarts, ) is_top_level_module = co.co_name == "" @@ -2153,7 +2164,7 @@ def code_deparse_around_offset( assert iscode(co) if version is None: - version = sys.version_info[:3] + version = PYTHON_VERSION_TRIPLE if is_pypy is None: is_pypy = IS_PYPY @@ -2180,7 +2191,7 @@ def deparse_code_around_offset( out=StringIO(), showasm=False, showast=False, - showgrammar=False, + showgrammar=PARSER_DEFAULT_DEBUG, is_pypy=False, ): debug_opts = {"asm": showasm, "ast": showast, "grammar": showgrammar} From 9f9074c28520745f6ddce83f005fd067a66fe4dc Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 3 Feb 2024 15:43:07 -0500 Subject: [PATCH 20/24] Add a type annotation --- uncompyle6/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 374980b9..ca5ff13f 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -239,7 +239,7 @@ def decompile_file( # FIXME: combine into an options parameter def main( in_base: str, - out_base: str, + out_base: Optional[str], compiled_files: list, source_files: list, outfile=None, From e65a2db971b529c833711db77a979422b88fa9da Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 3 Feb 2024 18:40:38 -0500 Subject: [PATCH 21/24] Small tweak --- uncompyle6/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index ca5ff13f..671f0b2b 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -146,9 +146,9 @@ def decompile( co, out, bytecode_version, + is_pypy=is_pypy, debug_opts=debug_opts, compile_mode=compile_mode, - is_pypy=is_pypy, ) pass return deparsed @@ -247,7 +247,6 @@ def main( showast={}, showgrammar=False, source_encoding=None, - raise_on_error=False, do_linemaps=False, do_fragments=False, ) -> Tuple[int, int, int, int]: From b0e139e6cc25f1b85cfb48836a48d913ab9f3436 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 4 Feb 2024 12:16:17 -0500 Subject: [PATCH 22/24] Partial merge --- uncompyle6/parser.py | 4 +- uncompyle6/semantics/fragments.py | 116 +++++++++++++++++------------- 2 files changed, 67 insertions(+), 53 deletions(-) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 8b91f040..5ca3dc49 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -597,12 +597,12 @@ class PythonParser(GenericASTBuilder): compare ::= compare_single compare_single ::= expr expr COMPARE_OP - # A compare_chained is two comparisions, as in: x <= y <= z + # A compare_chained is two comparisons, as in: x <= y <= z compare_chained ::= expr compared_chained_middle ROT_TWO POP_TOP _come_froms compare_chained_right ::= expr COMPARE_OP JUMP_FORWARD - # Non-null kvlist items are broken out in the indiviual grammars + # Non-null kvlist items are broken out in the individual grammars kvlist ::= # Positional arguments in make_function diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 63dc9cf0..9ffe1c90 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2019, 2021-2023 by Rocky Bernstein +# Copyright (c) 2015-2019, 2021-2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -74,7 +74,6 @@ from xdis import iscode from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE import uncompyle6.parser as python_parser -from uncompyle6 import parser from uncompyle6.parsers.treenode import SyntaxTree from uncompyle6.scanner import Code, Token, get_scanner from uncompyle6.semantics import pysource @@ -88,7 +87,12 @@ from uncompyle6.semantics.consts import ( TABLE_DIRECT, escape, ) -from uncompyle6.semantics.pysource import ParserError, StringIO +from uncompyle6.semantics.pysource import ( + DEFAULT_DEBUG_OPTS, + TREE_DEFAULT_DEBUG, + ParserError, + StringIO, +) from uncompyle6.show import maybe_show_asm, maybe_show_tree NodeInfo = namedtuple("NodeInfo", "node start finish") @@ -152,10 +156,11 @@ class FragmentsWalker(pysource.SourceWalker, object): self, version, scanner, - showast=False, + showast=TREE_DEFAULT_DEBUG, debug_parser=PARSER_DEFAULT_DEBUG, compile_mode="exec", - is_pypy=False, + is_pypy=IS_PYPY, + linestarts={}, tolerate_errors=True, ): pysource.SourceWalker.__init__( @@ -167,18 +172,17 @@ class FragmentsWalker(pysource.SourceWalker, object): debug_parser=debug_parser, compile_mode=compile_mode, is_pypy=is_pypy, + linestarts=linestarts, tolerate_errors=tolerate_errors, ) - # Hide_internal suppresses displaying the additional instructions that sometimes + # hide_internal suppresses displaying the additional instructions that sometimes # exist in code but but were not written in the source code. # An example is: - # __module__ = __name__ - # - # If showing source code we generally don't want to show this. However - # in fragment deparsing we generally do need to see these instructions - # since we may be stopped at one. So here we do not want to suppress - # showing such instructions. + # __module__ = __name__ + # If showing source code we generally don't want to show this. However in fragment + # deparsing we generally do need to see these instructions since we may be stopped + # at one. So here we do not want to suppress showing such instructions. self.hide_internal = False self.offsets = {} self.last_finish = -1 @@ -659,7 +663,7 @@ class FragmentsWalker(pysource.SourceWalker, object): n = ast[iter_index] - assert n == "comp_iter" + assert n == "comp_iter", n.kind # Find the comprehension body. It is the inner-most # node that is not list_.. . while n == "comp_iter": # list_iter @@ -718,7 +722,7 @@ class FragmentsWalker(pysource.SourceWalker, object): assert iscode(code), node[code_index] code_name = code.co_name - code = Code(code, self.scanner, self.currentclass) + code = Code(code, self.scanner, self.currentclass, self.debug_opts["asm"]) ast = self.build_ast(code._tokens, code._customize, code) @@ -1065,13 +1069,17 @@ class FragmentsWalker(pysource.SourceWalker, object): # Python 3.2 works like this subclass = load_closure[-2].attr else: - raise "Internal Error n_classdef: cannot find class body" + raise RuntimeError( + "Internal Error n_classdef: cannot find class body" + ) if hasattr(buildclass[3], "__len__"): subclass_info = buildclass[3] elif hasattr(buildclass[2], "__len__"): subclass_info = buildclass[2] else: - raise "Internal Error n_classdef: cannot superclass name" + raise RuntimeError( + "Internal Error n_classdef: cannot superclass name" + ) else: subclass = buildclass[1][0].attr subclass_info = node[0] @@ -1119,7 +1127,13 @@ class FragmentsWalker(pysource.SourceWalker, object): n_classdefdeco2 = n_classdef def gen_source( - self, ast, name, customize, is_lambda=False, returnNone=False, debug_opts=None + self, + ast, + name, + customize, + is_lambda=False, + returnNone=False, + debug_opts=DEFAULT_DEBUG_OPTS, ): """convert parse tree to Python source code""" @@ -1205,7 +1219,7 @@ class FragmentsWalker(pysource.SourceWalker, object): self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index self.p.opc = self.scanner.opc - ast = parser.parse(self.p, tokens, customize, code) + ast = python_parser.parse(self.p, tokens, customize, code) self.p.insts = p_insts except (python_parser.ParserError, AssertionError) as e: raise ParserError(e, tokens, {}) @@ -1348,7 +1362,7 @@ class FragmentsWalker(pysource.SourceWalker, object): selectedText = text[start:finish] # Compute offsets relative to the beginning of the - # line rather than the beinning of the text + # line rather than the beginning of the text. try: lineStart = text[:start].rindex("\n") + 1 except ValueError: @@ -1356,7 +1370,7 @@ class FragmentsWalker(pysource.SourceWalker, object): adjustedStart = start - lineStart # If selected text is greater than a single line - # just show the first line plus elipses. + # just show the first line plus ellipsis. lines = selectedText.split("\n") if len(lines) > 1: adjustedEnd = len(lines[0]) - adjustedStart @@ -1429,7 +1443,7 @@ class FragmentsWalker(pysource.SourceWalker, object): p = node.parent orig_parent = p # If we can get different text, use that as the parent, - # otherwise we'll use the immeditate parent + # otherwise we'll use the immediatate parent. while p and ( hasattr(p, "parent") and p.start == node.start and p.finish == node.finish ): @@ -1566,19 +1580,19 @@ class FragmentsWalker(pysource.SourceWalker, object): if node[0].kind.startswith("kvlist"): # Python 3.5+ style key/value list in dict kv_node = node[0] - l = list(kv_node) - length = len(l) + ll = list(kv_node) + length = len(ll) if kv_node[-1].kind.startswith("BUILD_MAP"): length -= 1 i = 0 while i < length: self.write(sep) - name = self.traverse(l[i], indent="") - l[i].parent = kv_node - l[i + 1].parent = kv_node + name = self.traverse(ll[i], indent="") + ll[i].parent = kv_node + ll[i + 1].parent = kv_node self.write(name, ": ") value = self.traverse( - l[i + 1], indent=self.indent + (len(name) + 2) * " " + ll[i + 1], indent=self.indent + (len(name) + 2) * " " ) self.write(sep, name, ": ", value) sep = line_seperator @@ -1588,25 +1602,25 @@ class FragmentsWalker(pysource.SourceWalker, object): elif len(node) > 1 and node[1].kind.startswith("kvlist"): # Python 3.0..3.4 style key/value list in dict kv_node = node[1] - l = list(kv_node) - if len(l) > 0 and l[0].kind == "kv3": + ll = list(kv_node) + if len(ll) > 0 and ll[0].kind == "kv3": # Python 3.2 does this kv_node = node[1][0] - l = list(kv_node) + ll = list(kv_node) i = 0 - while i < len(l): - l[i].parent = kv_node - l[i + 1].parent = kv_node + while i < len(ll): + ll[i].parent = kv_node + ll[i + 1].parent = kv_node key_start = len(self.f.getvalue()) + len(sep) - name = self.traverse(l[i + 1], indent="") + name = self.traverse(ll[i + 1], indent="") key_finish = key_start + len(name) val_start = key_finish + 2 value = self.traverse( - l[i], indent=self.indent + (len(name) + 2) * " " + ll[i], indent=self.indent + (len(name) + 2) * " " ) self.write(sep, name, ": ", value) - self.set_pos_info_recurse(l[i + 1], key_start, key_finish) - self.set_pos_info_recurse(l[i], val_start, val_start + len(value)) + self.set_pos_info_recurse(ll[i + 1], key_start, key_finish) + self.set_pos_info_recurse(ll[i], val_start, val_start + len(value)) sep = line_seperator i += 3 pass @@ -1779,7 +1793,7 @@ class FragmentsWalker(pysource.SourceWalker, object): n_set = n_tuple = n_build_set = n_list def template_engine(self, entry, startnode): - """The format template interpetation engine. See the comment at the + """The format template interpretation engine. See the comment at the beginning of this module for the how we interpret format specifications such as %c, %C, and so on. """ @@ -1810,7 +1824,7 @@ class FragmentsWalker(pysource.SourceWalker, object): if m.group("child"): node = node[int(m.group("child"))] node.parent = startnode - except: + except Exception: print(node.__dict__) raise @@ -1947,7 +1961,7 @@ class FragmentsWalker(pysource.SourceWalker, object): start = len(self.f.getvalue()) self.write(eval(expr, d, d)) self.set_pos_info(node, start, len(self.f.getvalue())) - except: + except Exception: print(node) raise m = escape.search(fmt, i) @@ -1962,7 +1976,7 @@ class FragmentsWalker(pysource.SourceWalker, object): # FIXME figure out how to get these cases to be table driven. # 2. subroutine calls. It the last op is the call and for purposes of printing - # we don't need to print anything special there. However it encompases the + # we don't need to print anything special there. However it encompasses the # entire string of the node fn(...) if startnode.kind == "call": last_node = startnode[-1] @@ -1997,7 +2011,7 @@ def deparse_code( showgrammar=False, code_objects={}, compile_mode="exec", - is_pypy=None, + is_pypy=IS_PYPY, walker=FragmentsWalker, ): debug_opts = {"asm": showasm, "ast": showast, "grammar": showgrammar} @@ -2050,7 +2064,7 @@ def code_deparse( is_pypy = IS_PYPY # store final output stream for case of error - scanner = get_scanner(version, is_pypy=is_pypy) + scanner = get_scanner(version, is_pypy=is_pypy, show_asm=debug_opts["asm"]) show_asm = debug_opts.get("asm", None) tokens, customize = scanner.ingest(co, code_objects=code_objects, show_asm=show_asm) @@ -2066,14 +2080,15 @@ def code_deparse( # Build Syntax Tree from tokenized and massaged disassembly. # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast) - show_ast = debug_opts.get("ast", None) + show_tree = debug_opts.get("tree", False) deparsed = walker( version, scanner, - showast=show_ast, + showast=show_tree, debug_parser=debug_parser, compile_mode=compile_mode, is_pypy=is_pypy, + linestarts=linestarts, ) is_top_level_module = co.co_name == "" @@ -2094,7 +2109,7 @@ def code_deparse( ) # Just when you think we've forgotten about what we - # were supposed to to: Generate source from the Syntax ree! + # were supposed to do: Generate source from the Syntax tree! deparsed.gen_source(deparsed.ast, co.co_name, customize) deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text)) @@ -2149,7 +2164,7 @@ def code_deparse_around_offset( assert iscode(co) if version is None: - version = sys.version_info[:3] + version = PYTHON_VERSION_TRIPLE if is_pypy is None: is_pypy = IS_PYPY @@ -2167,7 +2182,7 @@ def code_deparse_around_offset( return deparsed -# Deprecated. Here still for compatability +# Deprecated. Here still for compatibility def deparse_code_around_offset( name, offset, @@ -2176,7 +2191,7 @@ def deparse_code_around_offset( out=StringIO(), showasm=False, showast=False, - showgrammar=False, + showgrammar=PARSER_DEFAULT_DEBUG, is_pypy=False, ): debug_opts = {"asm": showasm, "ast": showast, "grammar": showgrammar} @@ -2313,6 +2328,5 @@ def deparsed_find(tup, deparsed, code): # # deparse_test(get_code_for_fn(FragmentsWalker.fixup_offsets)) # # deparse_test(get_code_for_fn(FragmentsWalker.n_list)) # print("=" * 30) -# # deparse_test_around(408, 'n_list', -# get_code_for_fn(FragmentsWalker.n_build_list)) +# # deparse_test_around(408, 'n_list', get_code_for_fn(FragmentsWalker.n_build_list)) # # deparse_test(inspect.currentframe().f_code) From ef92f08f56ac5c735b5b81b5fafaf6f0aacb4e86 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 4 Feb 2024 12:29:30 -0500 Subject: [PATCH 23/24] Black files --- uncompyle6/main.py | 30 +++++++++++++----------- uncompyle6/parsers/parse37base.py | 32 ++++++++++++++------------ uncompyle6/scanner.py | 21 ++++++++--------- uncompyle6/scanners/scanner2.py | 32 +++++++++++++++----------- uncompyle6/scanners/scanner3.py | 31 ++++++++++++------------- uncompyle6/scanners/scanner37.py | 13 ++++++----- uncompyle6/scanners/scanner37base.py | 21 +++++++---------- uncompyle6/semantics/customize.py | 6 ++--- uncompyle6/semantics/customize38.py | 31 +++++++++++++++++-------- uncompyle6/semantics/make_function1.py | 16 +++++++------ uncompyle6/semantics/n_actions.py | 20 +++++----------- uncompyle6/semantics/pysource.py | 11 +++++---- 12 files changed, 137 insertions(+), 127 deletions(-) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 7f5902c3..a745df15 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -50,7 +50,7 @@ def _get_outstream(outfile): def decompile( co, - bytecode_version = PYTHON_VERSION_TRIPLE, + bytecode_version=PYTHON_VERSION_TRIPLE, out=sys.stdout, showasm=None, showast={}, @@ -83,13 +83,13 @@ def decompile( s += "\n" real_out.write(s) - assert iscode(co), ("%s does not smell like code" % co) + assert iscode(co), "%s does not smell like code" % co co_pypy_str = "PyPy " if is_pypy else "" run_pypy_str = "PyPy " if IS_PYPY else "" sys_version_lines = sys.version.split("\n") if source_encoding: - write(f"# -*- coding: {source_encoding} -*-") + write("# -*- coding: %s -*-" % source_encoding) write( "# uncompyle6 version %s\n" "# %sPython bytecode version base %s%s\n# Decompiled from: %sPython %s" @@ -103,9 +103,9 @@ def decompile( ) ) if co.co_filename: - write(f"# Embedded file name: {co.co_filename}") + write("# Embedded file name: %s" % co.co_filename) if timestamp: - write(f"# Compiled at: {datetime.datetime.fromtimestamp(timestamp)}") + write("# Compiled at: %s" % datetime.datetime.fromtimestamp(timestamp)) if source_size: write("# Size of source mod 2**32: %d bytes" % source_size) @@ -135,7 +135,7 @@ def decompile( (line_no, deparsed.source_linemap[line_no] + header_count) for line_no in sorted(deparsed.source_linemap.keys()) ] - mapstream.write(f"\n\n# {linemap}\n") + mapstream.write("\n\n# %s\n" % linemap) else: if do_fragments: deparse_fn = code_deparse_fragments @@ -163,11 +163,11 @@ def compile_file(source_path): basename = source_path if hasattr(sys, "pypy_version_info"): - bytecode_path = f"{basename}-pypy{version_tuple_to_str()}.pyc" + bytecode_path = "%s-pypy%s.pyc" % (basename, version_tuple_to_str()) else: - bytecode_path = f"{basename}-{version_tuple_to_str()}.pyc" + bytecode_path = "%s-%s.pyc" % (basename, version_tuple_to_str()) - print(f"compiling {source_path} to {bytecode_path}") + print("compiling %s to %s" % (source_path, bytecode_path)) py_compile.compile(source_path, bytecode_path, "exec") return bytecode_path @@ -271,7 +271,7 @@ def main( infile = os.path.join(in_base, filename) # print("XXX", infile) if not os.path.exists(infile): - sys.stderr.write(f"File '{infile}' doesn't exist. Skipped\n") + sys.stderr.write("File '%s' doesn't exist. Skipped\n" % infile) continue if do_linemaps: @@ -319,11 +319,11 @@ def main( ): if e[0] != last_mod: line = "=" * len(e[0]) - outstream.write(f"{line}\n{e[0]}\n{line}\n") + outstream.write("%s\n%s\n%s\n" % (line, e[0], line)) last_mod = e[0] info = offsets[e] extract_info = d.extract_node_info(info) - outstream.write(f"{info.node.format().strip()}" + "\n") + outstream.write("%s" % info.node.format().strip() + "\n") outstream.write(extract_info.selectedLine + "\n") outstream.write(extract_info.markerLine + "\n\n") pass @@ -345,13 +345,15 @@ def main( sys.stdout.write("\n%s\n" % str(e)) if str(e).startswith("Unsupported Python"): sys.stdout.write("\n") - sys.stderr.write(f"\n# Unsupported bytecode in file {infile}\n# {e}\n") + sys.stderr.write( + "\n# Unsupported bytecode in file %s\n# %s\n" % (infile, e) + ) else: if outfile: outstream.close() os.remove(outfile) sys.stdout.write("\n") - sys.stderr.write(f"\nLast file: {infile} ") + sys.stderr.write("\nLast file: %s " % (infile)) raise # except: diff --git a/uncompyle6/parsers/parse37base.py b/uncompyle6/parsers/parse37base.py index b9639e97..a3942f5f 100644 --- a/uncompyle6/parsers/parse37base.py +++ b/uncompyle6/parsers/parse37base.py @@ -2,11 +2,10 @@ """ Python 3.7 base code. We keep non-custom-generated grammar rules out of this file. """ -from uncompyle6.parser import ParserError, PythonParser, nop_func -from uncompyle6.parsers.treenode import SyntaxTree from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from spark_parser.spark import rule2str +from uncompyle6.parser import ParserError, PythonParser, nop_func from uncompyle6.parsers.reducecheck import ( and_invalid, ifelsestmt, @@ -16,9 +15,10 @@ from uncompyle6.parsers.reducecheck import ( or_check, testtrue, tryelsestmtl3, - while1stmt, while1elsestmt, + while1stmt, ) +from uncompyle6.parsers.treenode import SyntaxTree class Python37BaseParser(PythonParser): @@ -54,7 +54,7 @@ class Python37BaseParser(PythonParser): expr call CALL_FUNCTION_3 - """ + """ # FIXME: I bet this can be simplified # look for next MAKE_FUNCTION for i in range(i + 1, len(tokens)): @@ -104,7 +104,6 @@ class Python37BaseParser(PythonParser): # organization for this. For example, arrange organize by opcode base? def customize_grammar_rules(self, tokens, customize): - is_pypy = False # For a rough break out on the first word. This may @@ -321,18 +320,24 @@ class Python37BaseParser(PythonParser): elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"): if opname == "BUILD_CONST_DICT": - rule = """ + rule = ( + """ add_consts ::= ADD_VALUE* const_list ::= COLLECTION_START add_consts %s dict ::= const_list expr ::= dict - """ % opname + """ + % opname + ) else: - rule = """ + rule = ( + """ add_consts ::= ADD_VALUE* const_list ::= COLLECTION_START add_consts %s expr ::= const_list - """ % opname + """ + % opname + ) self.addRule(rule, nop_func) elif opname_base == "BUILD_CONST_KEY_MAP": @@ -348,7 +353,6 @@ class Python37BaseParser(PythonParser): self.addRule(rule, nop_func) elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"): - if opname == "BUILD_MAP_UNPACK": self.addRule( """ @@ -525,7 +529,6 @@ class Python37BaseParser(PythonParser): "CALL_FUNCTION_VAR_KW", ) ) or opname.startswith("CALL_FUNCTION_KW"): - if opname == "CALL_FUNCTION" and token.attr == 1: rule = """ expr ::= dict_comp @@ -1259,12 +1262,11 @@ class Python37BaseParser(PythonParser): if fn: return fn(self, lhs, n, rule, ast, tokens, first, last) except Exception: - import sys, traceback + import sys + import traceback print( - ("Exception in %s %s\n" - + "rule: %s\n" - + "offsets %s .. %s") + ("Exception in %s %s\n" + "rule: %s\n" + "offsets %s .. %s") % ( fn.__name__, sys.exc_info()[1], diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 0af8304c..e7cfa608 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -21,12 +21,10 @@ scanner/ingestion module. From here we call various version-specific scanners, e.g. for Python 2.7 or 3.4. """ +import sys from array import array from collections import namedtuple -import sys -from uncompyle6.scanners.tok import Token -from xdis.version_info import IS_PYPY, version_tuple_to_str import xdis from xdis import ( Bytecode, @@ -36,6 +34,9 @@ from xdis import ( instruction_size, next_offset, ) +from xdis.version_info import IS_PYPY, version_tuple_to_str + +from uncompyle6.scanners.tok import Token # The byte code versions we support. # Note: these all have to be tuples of 2 ints @@ -80,6 +81,7 @@ CANONIC2VERSION["3.5.2"] = 3.5 intern = sys.intern L65536 = 65536 + def long(num): return num @@ -108,9 +110,6 @@ class Scanner: self.show_asm = show_asm self.is_pypy = is_pypy - # Temoorary initialization. - self.opc = ModuleType("uninitialized") - if version[:2] in PYTHON_VERSIONS: v_str = "opcode_%s" % version_tuple_to_str( version, start=0, end=2, delimiter="" @@ -130,9 +129,7 @@ class Scanner: # FIXME: This weird Python2 behavior is not Python3 self.resetTokenClass() - def bound_collection_from_tokens( - self, tokens, t, i, collection_type - ): + def bound_collection_from_tokens(self, tokens, t, i, collection_type): count = t.attr assert isinstance(count, int) @@ -334,7 +331,7 @@ class Scanner: else: print("%i\t%s\t" % (i, self.opname[op])) - def first_instr(self, start: int, end: int, instr, target=None, exact=True): + def first_instr(self, start, end, instr, target=None, exact=True): """ Find the first in the block from start to end. is any python bytecode instruction or a list of opcodes @@ -622,8 +619,7 @@ def parse_fn_counts_30_35(argc): return ((argc & 0xFF), (argc >> 8) & 0xFF, annotate_count) -def get_scanner(version: Union[str, tuple], is_pypy=False, show_asm=None) -> Scanner: - +def get_scanner(version, is_pypy=False, show_asm=None): # If version is a string, turn that into the corresponding float. if isinstance(version, str): if version not in canonic_python_version: @@ -684,5 +680,6 @@ if __name__ == "__main__": # scanner = get_scanner('2.7.13', True) # scanner = get_scanner(sys.version[:5], False) from xdis.version_info import PYTHON_VERSION_TRIPLE + scanner = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY, True) tokens, customize = scanner.ingest(co, {}, show_asm="after") diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 695f5fa5..fbd242f7 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -36,13 +36,13 @@ Finally we save token information. from __future__ import print_function from copy import copy - -from xdis import code2num, iscode, op_has_argument, instruction_size -from xdis.bytecode import _get_const_info -from uncompyle6.scanner import Scanner, Token - from sys import intern +from xdis import code2num, instruction_size, iscode, op_has_argument +from xdis.bytecode import _get_const_info + +from uncompyle6.scanner import Scanner, Token + class Scanner2(Scanner): def __init__(self, version, show_asm=None, is_pypy=False): @@ -236,7 +236,6 @@ class Scanner2(Scanner): # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() for i in self.op_range(0, codelen): - # We need to detect the difference between: # raise AssertionError # and @@ -328,9 +327,14 @@ class Scanner2(Scanner): "BUILD_SET", ): t = Token( - op_name, oparg, pattr, offset, + op_name, + oparg, + pattr, + offset, self.linestarts.get(offset, None), - op, has_arg, self.opc + op, + has_arg, + self.opc, ) collection_type = op_name.split("_")[1] next_tokens = self.bound_collection_from_tokens( @@ -541,14 +545,17 @@ class Scanner2(Scanner): for s in stmt_list: if code[s] == self.opc.JUMP_ABSOLUTE and s not in pass_stmts: target = self.get_target(s) - if target > s or (self.lines and self.lines[last_stmt].l_no == self.lines[s].l_no): + if target > s or ( + self.lines and self.lines[last_stmt].l_no == self.lines[s].l_no + ): stmts.remove(s) continue j = self.prev[s] while code[j] == self.opc.JUMP_ABSOLUTE: j = self.prev[j] if ( - self.version >= (2, 3) and self.opname_for_offset(j) == "LIST_APPEND" + self.version >= (2, 3) + and self.opname_for_offset(j) == "LIST_APPEND" ): # list comprehension stmts.remove(s) continue @@ -925,7 +932,6 @@ class Scanner2(Scanner): # Is it an "and" inside an "if" or "while" block if op == self.opc.PJIF: - # Search for other POP_JUMP_IF_...'s targeting the # same target, of the current POP_JUMP_... instruction, # starting from current offset, and filter everything inside inner 'or' @@ -1117,7 +1123,6 @@ class Scanner2(Scanner): # Is this a loop and not an "if" statement? if (if_end < pre_rtarget) and (pre[if_end] in self.setup_loop_targets): - if if_end > start: return else: @@ -1467,11 +1472,12 @@ class Scanner2(Scanner): if __name__ == "__main__": import inspect + from xdis.version_info import PYTHON_VERSION_TRIPLE co = inspect.currentframe().f_code tokens, customize = Scanner2(PYTHON_VERSION_TRIPLE).ingest(co) for t in tokens: - print(t) + print(t) pass diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 46295765..3491a25c 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -35,20 +35,18 @@ Finally we save token information. from __future__ import print_function -from xdis import iscode, instruction_size, Instruction -from xdis.bytecode import _get_const_info +import sys -from uncompyle6.scanners.tok import Token -from uncompyle6.scanner import parse_fn_counts_30_35 -from uncompyle6.util import get_code_name import xdis # Get all the opcodes into globals import xdis.opcodes.opcode_33 as op3 +from xdis import Instruction, instruction_size, iscode +from xdis.bytecode import _get_const_info -from uncompyle6.scanner import Scanner, CONST_COLLECTIONS - -import sys +from uncompyle6.scanner import CONST_COLLECTIONS, Scanner, parse_fn_counts_30_35 +from uncompyle6.scanners.tok import Token +from uncompyle6.util import get_code_name intern = sys.intern @@ -261,7 +259,7 @@ class Scanner3(Scanner): opname="COLLECTION_START", attr=collection_enum, pattr=collection_type, - offset= "%s_0" % start_offset, + offset="%s_0" % start_offset, linestart=False, has_arg=True, has_extended_arg=False, @@ -296,7 +294,8 @@ class Scanner3(Scanner): return new_tokens def bound_map_from_inst( - self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int): + self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int + ): """ Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can be parsed much faster, but inserting the @@ -379,9 +378,7 @@ class Scanner3(Scanner): ) return new_tokens - def ingest( - self, co, classname=None, code_objects={}, show_asm=None - ): + def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing @@ -647,7 +644,9 @@ class Scanner3(Scanner): ) pattr = "%s positional, %s keyword only, %s annotated" % ( - pos_args, name_pair_args, annotate_args + pos_args, + name_pair_args, + annotate_args, ) if name_pair_args > 0 and annotate_args > 0: @@ -1542,10 +1541,10 @@ class Scanner3(Scanner): if __name__ == "__main__": - from xdis.version_info import PYTHON_VERSION_TRIPLE - import inspect + from xdis.version_info import PYTHON_VERSION_TRIPLE + co = inspect.currentframe().f_code tokens, customize = Scanner3(PYTHON_VERSION_TRIPLE).ingest(co) diff --git a/uncompyle6/scanners/scanner37.py b/uncompyle6/scanners/scanner37.py index 3206ba09..4a4f74b7 100644 --- a/uncompyle6/scanners/scanner37.py +++ b/uncompyle6/scanners/scanner37.py @@ -22,14 +22,13 @@ This sets up opcodes Python's 3.7 and calls a generalized scanner routine for Python 3. """ -from uncompyle6.scanner import CONST_COLLECTIONS -from uncompyle6.scanners.tok import Token - -from uncompyle6.scanners.scanner37base import Scanner37Base - # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_37 as opc +from uncompyle6.scanner import CONST_COLLECTIONS +from uncompyle6.scanners.scanner37base import Scanner37Base +from uncompyle6.scanners.tok import Token + # bytecode verification, verify(), uses JUMP_OPS from here JUMP_OPs = opc.JUMP_OPS @@ -193,4 +192,6 @@ if __name__ == "__main__": print(t.format()) pass else: - print("Need to be Python 3.7 to demo; I am version %s." % version_tuple_to_str()) + print( + "Need to be Python 3.7 to demo; I am version %s." % version_tuple_to_str() + ) diff --git a/uncompyle6/scanners/scanner37base.py b/uncompyle6/scanners/scanner37base.py index ccbcec93..499538bd 100644 --- a/uncompyle6/scanners/scanner37base.py +++ b/uncompyle6/scanners/scanner37base.py @@ -29,18 +29,16 @@ For example: Finally we save token information. """ -from xdis import iscode, instruction_size, Instruction -from xdis.bytecode import _get_const_info +import sys -from uncompyle6.scanner import Token import xdis # Get all the opcodes into globals import xdis.opcodes.opcode_37 as op3 +from xdis import Instruction, instruction_size, iscode +from xdis.bytecode import _get_const_info -from uncompyle6.scanner import Scanner - -import sys +from uncompyle6.scanner import Scanner, Token globals().update(op3.opmap) @@ -252,7 +250,6 @@ class Scanner37Base(Scanner): n = len(self.insts) for i, inst in enumerate(self.insts): - # We need to detect the difference between: # raise AssertionError # and @@ -282,7 +279,6 @@ class Scanner37Base(Scanner): # To simplify things we want to untangle this. We also # do this loop before we compute jump targets. for i, inst in enumerate(self.insts): - # One artifact of the "too-small" operand problem, is that # some backward jumps, are turned into forward jumps to another # "extended arg" backward jump to the same location. @@ -319,7 +315,6 @@ class Scanner37Base(Scanner): j = 0 for i, inst in enumerate(self.insts): - argval = inst.argval op = inst.opcode @@ -707,9 +702,7 @@ class Scanner37Base(Scanner): # Finish filling the list for last statement slist += [codelen] * (codelen - len(slist)) - def detect_control_flow( - self, offset, targets, inst_index - ): + def detect_control_flow(self, offset, targets, inst_index): """ Detect type of block structures and their boundaries to fix optimized jumps in python2.3+ @@ -956,5 +949,7 @@ if __name__ == "__main__": for t in tokens: print(t) else: - print("Need to be Python 3.7 to demo; I am version %s." % version_tuple_to_str()) + print( + "Need to be Python 3.7 to demo; I am version %s." % version_tuple_to_str() + ) pass diff --git a/uncompyle6/semantics/customize.py b/uncompyle6/semantics/customize.py index c708ca37..e2bb3cda 100644 --- a/uncompyle6/semantics/customize.py +++ b/uncompyle6/semantics/customize.py @@ -17,15 +17,15 @@ """ from uncompyle6.parsers.treenode import SyntaxTree +from uncompyle6.scanners.tok import Token from uncompyle6.semantics.consts import ( INDENT_PER_LEVEL, NO_PARENTHESIS_EVER, PRECEDENCE, - TABLE_R, TABLE_DIRECT, + TABLE_R, ) from uncompyle6.semantics.helper import flatten_list -from uncompyle6.scanners.tok import Token def customize_for_version(self, is_pypy, version): @@ -87,7 +87,7 @@ def customize_for_version(self, is_pypy, version): if line_number != self.line_number: sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] pass - self.write("%s%s" (sep, value)) + self.write("%s%s" % (sep, value)) sep = ", " assert n >= len(kwargs_names) diff --git a/uncompyle6/semantics/customize38.py b/uncompyle6/semantics/customize38.py index dc3b1d31..36f89a20 100644 --- a/uncompyle6/semantics/customize38.py +++ b/uncompyle6/semantics/customize38.py @@ -23,8 +23,8 @@ from uncompyle6.semantics.consts import PRECEDENCE, TABLE_DIRECT from uncompyle6.semantics.customize37 import FSTRING_CONVERSION_MAP from uncompyle6.semantics.helper import escape_string, strip_quotes -def customize_for_version38(self, version): +def customize_for_version38(self, version): # FIXME: pytest doesn't add proper keys in testing. Reinstate after we have fixed pytest. # for lhs in 'for forelsestmt forelselaststmt ' # 'forelselaststmtc tryfinally38'.split(): @@ -40,10 +40,10 @@ def customize_for_version38(self, version): ), "async_forelse_stmt38": ( "%|async for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n", - (7, 'store'), - (0, 'expr'), - (8, 'for_block'), - (-1, 'else_suite') + (7, "store"), + (0, "expr"), + (8, "for_block"), + (-1, "else_suite"), ), "async_with_stmt38": ( "%|async with %c:\n%+%c%-\n", @@ -70,8 +70,15 @@ def customize_for_version38(self, version): ), # Python 3.8 reverses the order of keys and items # from all prior versions of Python. - "dict_comp_body": ("%c: %c", (0, "expr"), (1, "expr"),), - "except_cond1a": ("%|except %c:\n", (1, "expr"),), + "dict_comp_body": ( + "%c: %c", + (0, "expr"), + (1, "expr"), + ), + "except_cond1a": ( + "%|except %c:\n", + (1, "expr"), + ), "except_cond_as": ( "%|except %c as %c:\n", (1, "expr"), @@ -124,7 +131,11 @@ def customize_for_version38(self, version): "pop_return": ("%|return %c\n", (1, "return_expr")), "popb_return": ("%|return %c\n", (0, "return_expr")), "pop_ex_return": ("%|return %c\n", (0, "return_expr")), - "set_for": (" for %c in %c", (2, "store"), (0, "expr_or_arg"),), + "set_for": ( + " for %c in %c", + (2, "store"), + (0, "expr_or_arg"), + ), "whilestmt38": ( "%|while %c:\n%+%c%-\n\n", (1, ("bool_op", "testexpr", "testexprc")), @@ -322,7 +333,9 @@ def customize_for_version38(self, version): f_conversion = self.traverse(formatted_value, indent="") # Remove leaving "f" and quotes conversion = strip_quotes(f_conversion[1:]) - f_str = "f%s" % escape_string(("%s%s" % (value_equal, conversion)) + post_str) + f_str = "f%s" % escape_string( + ("%s%s" % (value_equal, conversion)) + post_str + ) self.write(f_str) self.in_format_string = old_in_format_string diff --git a/uncompyle6/semantics/make_function1.py b/uncompyle6/semantics/make_function1.py index 4b47e1eb..7abb6368 100644 --- a/uncompyle6/semantics/make_function1.py +++ b/uncompyle6/semantics/make_function1.py @@ -17,16 +17,18 @@ All the crazy things we have to do to handle Python functions in Python before 3.0. The saga of changes continues in 3.0 and above and in other files. """ -from uncompyle6.scanner import Code -from uncompyle6.semantics.parser_error import ParserError +from xdis import iscode + from uncompyle6.parser import ParserError as ParserError2 +from uncompyle6.scanner import Code from uncompyle6.semantics.helper import ( - print_docstring, find_all_globals, find_globals_and_nonlocals, find_none, + print_docstring, ) -from xdis import iscode +from uncompyle6.semantics.parser_error import ParserError + def make_function1(self, node, is_lambda, nested=1, code_node=None): """ @@ -36,8 +38,8 @@ def make_function1(self, node, is_lambda, nested=1, code_node=None): def build_param(tree, param_names: list) -> tuple: """build parameters: - - handle defaults - - handle format tuple parameters + - handle defaults + - handle format tuple parameters """ # if formal parameter is a tuple, the parameter name # starts with a dot (eg. '.1', '.2') @@ -186,5 +188,5 @@ def make_function1(self, node, is_lambda, nested=1, code_node=None): tree, code.co_name, code._customize, is_lambda=is_lambda, returnNone=rn ) - code._tokens = None # save memory + code._tokens = None # save memory code._customize = None # save memory diff --git a/uncompyle6/semantics/n_actions.py b/uncompyle6/semantics/n_actions.py index bf364a46..27df0557 100644 --- a/uncompyle6/semantics/n_actions.py +++ b/uncompyle6/semantics/n_actions.py @@ -16,22 +16,12 @@ Custom Nonterminal action functions. See NonterminalActions docstring. """ -from uncompyle6.semantics.consts import ( - INDENT_PER_LEVEL, - NONE, - PRECEDENCE, - minint, -) - from uncompyle6.parsers.treenode import SyntaxTree from uncompyle6.scanners.tok import Token +from uncompyle6.semantics.consts import INDENT_PER_LEVEL, NONE, PRECEDENCE, minint +from uncompyle6.semantics.helper import find_code_node, flatten_list from uncompyle6.util import better_repr, get_code_name -from uncompyle6.semantics.helper import ( - find_code_node, - flatten_list, -) - class NonterminalActions: """ @@ -227,8 +217,10 @@ class NonterminalActions: else: # from trepan.api import debug; debug() raise TypeError( - ("Internal Error: n_const_list expects dict, list set, or set; got %s" - % lastnodetype) + ( + "Internal Error: n_const_list expects dict, list set, or set; got %s" + % lastnodetype + ) ) self.indent_more(INDENT_PER_LEVEL) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 605db2d7..cd57ca59 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -773,7 +773,8 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): """ Expanding '%s' in template '%s[%s]': %s is invalid; has only %d entries - """ % (node.kind, entry, arg, index, len(node)) + """ + % (node.kind, entry, arg, index, len(node)) ) self.preorder(node[index]) @@ -1343,10 +1344,10 @@ def code_deparse( if expected_start: assert ( deparsed.ast == expected_start - ), ( - "Should have parsed grammar start to '%s'; got: %s" % - (expected_start, deparsed.ast.kind) - ) + ), "Should have parsed grammar start to '%s'; got: %s" % ( + expected_start, + deparsed.ast.kind, + ) # save memory del tokens From 42ed183dbbc3f01c5db3b6711ff87b671b79c115 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 4 Feb 2024 12:36:08 -0500 Subject: [PATCH 24/24] Fix imports --- uncompyle6/semantics/pysource.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 5a6566fe..eef9591c 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -135,8 +135,7 @@ from spark_parser import GenericASTTraversal from xdis import COMPILER_FLAG_BIT, iscode from xdis.version_info import PYTHON_VERSION_TRIPLE -import uncompyle6.parser as python_parser -from uncompyle6.parser import get_python_parser +from uncompyle6.parser import get_python_parser, parse from uncompyle6.parsers.treenode import SyntaxTree from uncompyle6.scanner import Code, get_scanner from uncompyle6.scanners.tok import Token @@ -1209,11 +1208,11 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): p_insts = self.p.insts self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index - ast = python_parser.parse(self.p, tokens, customize, code) + ast = parse(self.p, tokens, customize, code) self.customize(customize) self.p.insts = p_insts - except (python_parser.ParserError, AssertionError) as e: + except (ParserError, AssertionError) as e: raise ParserError(e, tokens, self.p.debug["reduce"]) transform_tree = self.treeTransform.transform(ast, code) self.maybe_show_tree(ast, phase="after") @@ -1248,9 +1247,9 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index self.p.opc = self.scanner.opc - ast = python_parser.parse(self.p, tokens, customize, code) + ast = parse(self.p, tokens, customize, code) self.p.insts = p_insts - except (python_parser.ParserError, AssertionError) as e: + except (ParserError, AssertionError) as e: raise ParserError(e, tokens, self.p.debug["reduce"]) checker(ast, False, self.ast_errors)