diff --git a/__pkginfo__.py b/__pkginfo__.py index 723c15a3..98fa63d5 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018, 2020-2021 Rocky Bernstein +# Copyright (C) 2018, 2020-2021 2024 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -32,9 +32,11 @@ # 3.3 | pip | 10.0.1 | # 3.4 | pip | 19.1.1 | +import os.path as osp + # Things that change more often go here. copyright = """ -Copyright (C) 2015-2021 Rocky Bernstein . +Copyright (C) 2015-2021, 2024 Rocky Bernstein . """ classifiers = [ @@ -75,7 +77,7 @@ entry_points = { ] } ftp_url = None -install_requires = ["spark-parser >= 1.8.9, < 1.9.0", "xdis >= 6.0.8, < 6.2.0"] +install_requires = ["click", "spark-parser >= 1.8.9, < 1.9.0", "xdis >= 6.0.8, < 6.2.0"] license = "GPL3" mailing_list = "python-debugger@googlegroups.com" @@ -88,21 +90,18 @@ web = "https://github.com/rocky/python-uncompyle6/" zip_safe = True -import os.path - - def get_srcdir(): - filename = os.path.normcase(os.path.dirname(os.path.abspath(__file__))) - return os.path.realpath(filename) + filename = osp.normcase(osp.dirname(osp.abspath(__file__))) + return osp.realpath(filename) srcdir = get_srcdir() def read(*rnames): - return open(os.path.join(srcdir, *rnames)).read() + return open(osp.join(srcdir, *rnames)).read() -# Get info from files; set: long_description and __version__ +# Get info from files; set: long_description and VERSION long_description = read("README.rst") + "\n" exec(read("uncompyle6/version.py")) diff --git a/admin-tools/merge-for-2.4.sh b/admin-tools/merge-for-2.4.sh new file mode 100755 index 00000000..f8c55395 --- /dev/null +++ b/admin-tools/merge-for-2.4.sh @@ -0,0 +1,5 @@ +#/bin/bash +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-2.4.sh; then + git merge python-3.0-to-3.2 +fi diff --git a/admin-tools/merge-for-3.0.sh b/admin-tools/merge-for-3.0.sh new file mode 100755 index 00000000..7fc1a596 --- /dev/null +++ b/admin-tools/merge-for-3.0.sh @@ -0,0 +1,5 @@ +#/bin/bash +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-3.0.sh; then + git merge python-3.3-to-3.5 +fi diff --git a/admin-tools/merge-for-3.3.sh b/admin-tools/merge-for-3.3.sh new file mode 100755 index 00000000..aade2e77 --- /dev/null +++ b/admin-tools/merge-for-3.3.sh @@ -0,0 +1,5 @@ +#/bin/bash +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-3.3.sh; then + git merge master +fi diff --git a/uncompyle6/bin/pydisassemble.py b/uncompyle6/bin/pydisassemble.py index 15e9ecfa..7e53c380 100755 --- a/uncompyle6/bin/pydisassemble.py +++ b/uncompyle6/bin/pydisassemble.py @@ -1,9 +1,22 @@ #!/usr/bin/env python -# Mode: -*- python -*- # -# Copyright (c) 2015-2016, 2018, 2020, 2022-2023 by Rocky Bernstein -# +# Copyright (c) 2015-2016, 2018, 2020, 2022-2024 +# by Rocky Bernstein # +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + import getopt import os import sys @@ -48,10 +61,9 @@ PATTERNS = ("*.pyc", "*.pyo") def main(): - Usage_short = ( - """usage: %s FILE... + usage_short = ( + f"""usage: {program} FILE... Type -h for for full help.""" - % program ) if len(sys.argv) == 1: @@ -72,7 +84,7 @@ Type -h for for full help.""" print(__doc__) sys.exit(1) elif opt in ("-V", "--version"): - print("%s %s" % (program, __version__)) + print(f"{program} {__version__}") sys.exit(0) else: print(opt) diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index 0d41d073..ae112e6e 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # Mode: -*- python -*- # -# Copyright (c) 2015-2017, 2019-2020, 2023 by Rocky Bernstein +# Copyright (c) 2015-2017, 2019-2020, 2023-2024 +# by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # -import getopt import os import sys import time @@ -16,6 +16,12 @@ from uncompyle6.version import __version__ program = "uncompyle6" + +def usage(): + print(__doc__) + sys.exit(1) + + __doc__ = """ Usage: %s [OPTIONS]... [ FILE | DIR]... @@ -68,44 +74,18 @@ Extensions of generated files: (program,) * 5 ) -program = "uncompyle6" - -def usage(): - print(__doc__) - sys.exit(1) - - -def main_bin(): - if not ( - sys.version_info[0:2] - in ( - (2, 4), - (2, 5), - (2, 6), - (2, 7), - (3, 0), - (3, 1), - (3, 2), - (3, 3), - (3, 4), - (3, 5), - (3, 6), - (3, 7), - (3, 8), - (3, 9), - (3, 10), - (3, 11), - ) ): print('Error: %s requires Python 2.4-3.10' % program) sys.exit(-1) - recurse_dirs = False + numproc = 0 - outfile = "-" + out_base = None + out_base = None source_paths = [] timestamp = False timestampfmt = "# %Y.%m.%d %H:%M:%S %Z" + pyc_paths = files try: opts, pyc_paths = getopt.getopt( @@ -182,7 +162,7 @@ def main_bin(): sys.stderr.write(opt) usage() - # expand directory if specified + # Expand directory if "recurse" was specified. if recurse_dirs: expanded_files = [] for f in pyc_paths: @@ -216,15 +196,32 @@ def main_bin(): out_base = outfile outfile = None + # A second -a turns show_asm="after" into show_asm="before" + if asm_plus or asm: + asm_opt = "both" if asm_plus else "after" + else: + asm_opt = None + if timestamp: print(time.strftime(timestampfmt)) if numproc <= 1: + show_ast = {"before": tree or tree_plus, "after": tree_plus} try: result = main( - src_base, out_base, pyc_paths, source_paths, outfile, **options + src_base, + out_base, + pyc_paths, + source_paths, + outfile, + showasm=asm_opt, + showgrammar=show_grammar, + showast=show_ast, + do_verify=verify, + do_linemaps=linemaps, + start_offset=start_offset, + stop_offset=stop_offset, ) - result = [options.get("do_verify", None)] + list(result) if len(pyc_paths) > 1: mess = status_msg(*result) print("# " + mess) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 020d43fa..f89fc70c 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -13,10 +13,14 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import ast import datetime import os +import os.path as osp import py_compile +import subprocess import sys +import tempfile from xdis import iscode from xdis.load import load_module @@ -38,9 +42,9 @@ def _get_outstream(outfile): """ Return an opened output file descriptor for ``outfile``. """ - dir_name = os.path.dirname(outfile) + dir_name = osp.dirname(outfile) failed_file = outfile + "_failed" - if os.path.exists(failed_file): + if osp.exists(failed_file): os.remove(failed_file) try: os.makedirs(dir_name) @@ -48,6 +52,17 @@ def _get_outstream(outfile): pass return open(outfile, 'wb') +def syntax_check(filename: str) -> bool: + with open(filename) as f: + source = f.read() + valid = True + try: + ast.parse(source) + except SyntaxError: + valid = False + return valid + + def decompile( co, bytecode_version=PYTHON_VERSION_TRIPLE, @@ -59,11 +74,13 @@ def decompile( source_encoding=None, code_objects={}, source_size=None, - is_pypy=False, + is_pypy: bool = False, magic_int=None, mapstream=None, do_fragments=False, compile_mode="exec", + start_offset: int = 0, + stop_offset: int = -1, ): """ ingests and deparses a given code block 'co' @@ -152,11 +169,12 @@ def decompile( compile_mode=compile_mode, ) header_count = 3 + len(sys_version_lines) - linemap = [ - (line_no, deparsed.source_linemap[line_no] + header_count) - for line_no in sorted(deparsed.source_linemap.keys()) - ] - mapstream.write("\n\n# %s\n" % linemap) + if deparsed is not None: + linemap = [ + (line_no, deparsed.source_linemap[line_no] + header_count) + for line_no in sorted(deparsed.source_linemap.keys()) + ] + mapstream.write("\n\n# %s\n" % linemap) else: if do_fragments: deparse_fn = code_deparse_fragments @@ -169,8 +187,11 @@ def decompile( is_pypy=is_pypy, debug_opts=debug_opts, compile_mode=compile_mode, + start_offset=start_offset, + stop_offset=stop_offset, ) pass + real_out.write("\n") return deparsed except pysource.SourceWalkerError, e: # deparsing failed @@ -194,7 +215,7 @@ def compile_file(source_path): def decompile_file( - filename, + filename: str, outstream=None, showasm=None, showast={}, @@ -202,6 +223,8 @@ def decompile_file( source_encoding=None, mapstream=None, do_fragments=False, + start_offset=0, + stop_offset=-1, ): """ decompile Python byte-code file (.pyc). Return objects to @@ -231,6 +254,8 @@ def decompile_file( is_pypy=is_pypy, magic_int=magic_int, mapstream=mapstream, + start_offset=start_offset, + stop_offset=stop_offset, ), ) else: @@ -251,6 +276,8 @@ def decompile_file( mapstream=mapstream, do_fragments=do_fragments, compile_mode="exec", + start_offset=start_offset, + stop_offset=stop_offset, ) ] return deparsed @@ -265,10 +292,13 @@ def main( outfile=None, showasm=None, showast={}, - showgrammar=False, + do_verify=None, + showgrammar: bool = False, source_encoding=None, do_linemaps=False, do_fragments=False, + start_offset: int = 0, + stop_offset: int = -1, ): """ in_base base directory for input files @@ -281,7 +311,8 @@ def main( - files below out_base out_base=... - stdout out_base=None, outfile=None """ - tot_files = okay_files = failed_files = verify_failed_files = 0 + tot_files = okay_files = failed_files = 0 + verify_failed_files = 0 if do_verify else 0 current_outfile = outfile linemap_stream = None @@ -289,9 +320,9 @@ def main( compiled_files.append(compile_file(source_path)) for filename in compiled_files: - infile = os.path.join(in_base, filename) + infile = osp.join(in_base, filename) # print("XXX", infile) - if not os.path.exists(infile): + if not osp.exists(infile): sys.stderr.write("File '%s' doesn't exist. Skipped\n" % infile) continue @@ -304,14 +335,19 @@ def main( if outfile: # outfile was given as parameter outstream = _get_outstream(outfile) elif out_base is None: - outstream = sys.stdout + out_base = tempfile.mkdtemp(prefix="py-dis-") + if do_verify and filename.endswith(".pyc"): + current_outfile = osp.join(out_base, filename[0:-1]) + outstream = open(current_outfile, "w") + else: + outstream = sys.stdout if do_linemaps: linemap_stream = sys.stdout else: if filename.endswith(".pyc"): - current_outfile = os.path.join(out_base, filename[0:-1]) + current_outfile = osp.join(out_base, filename[0:-1]) else: - current_outfile = os.path.join(out_base, filename) + "_dis" + current_outfile = osp.join(out_base, filename) + "_dis" pass pass @@ -319,9 +355,9 @@ def main( # print(current_outfile, file=sys.stderr) - # Try to uncompile the input file + # Try to decompile the input file. try: - deparsed = decompile_file( + deparsed_objects = decompile_file( infile, outstream, showasm, @@ -330,11 +366,13 @@ def main( source_encoding, linemap_stream, do_fragments, + start_offset, + stop_offset, ) if do_fragments: - for d in deparsed: + for deparsed_object in deparsed_objects: last_mod = None - offsets = d.offsets + offsets = deparsed_object.offsets for e in sorted( [k for k in offsets.keys() if isinstance(k[1], int)] ): @@ -343,11 +381,58 @@ def main( outstream.write("%s\n%s\n%s\n" % (line, e[0], line)) last_mod = e[0] info = offsets[e] - extract_info = d.extract_node_info(info) + extract_info = deparse_object.extract_node_info(info) outstream.write("%s" % info.node.format().strip() + "\n") outstream.write(extract_info.selectedLine + "\n") outstream.write(extract_info.markerLine + "\n\n") pass + + if do_verify: + for deparsed_object in deparsed_objects: + deparsed_object.f.close() + if PYTHON_VERSION_TRIPLE[:2] != deparsed_object.version[:2]: + sys.stdout.write( + "\n# skipping running %s; it is %s and we are %s" + % ( + deparsed_object.f.name, + version_tuple_to_str(deparsed_object.version, end=2), + version_tuple_to_str(PYTHON_VERSION_TRIPLE, end=2), + ) + ) + else: + check_type = "syntax check" + if do_verify == "run": + check_type = "run" + if PYTHON_VERSION_TRIPLE >= (3, 7): + result = subprocess.run( + [sys.executable, deparsed_object.f.name], + capture_output=True, + ) + valid = result.returncode == 0 + output = result.stdout.decode() + if output: + print(output) + pass + else: + result = subprocess.run( + [sys.executable, deparsed_object.f.name], + ) + valid = result.returncode == 0 + pass + if not valid: + print(result.stderr.decode()) + + else: + valid = syntax_check(deparsed_object.f.name) + + if not valid: + verify_failed_files += 1 + sys.stderr.write( + "\n# %s failed on file %s\n" + % (check_type, deparsed_object.f.name) + ) + + # sys.stderr.write("Ran %\n" % deparsed_object.f.name) pass tot_files += 1 except (ValueError, SyntaxError, ParserError, pysource.SourceWalkerError): diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 667010f4..a331a3f3 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -869,6 +869,29 @@ class Python3Parser(PythonParser): rule = "starred ::= %s %s" % ("expr " * v, opname) self.addRule(rule, nop_func) + elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"): + if opname == "BUILD_CONST_DICT": + rule = ( + """ + add_consts ::= ADD_VALUE* + const_list ::= COLLECTION_START add_consts %s + dict ::= const_list + expr ::= dict + """ + % opname + ) + else: + rule = ( + """ + add_consts ::= ADD_VALUE* + const_list ::= COLLECTION_START add_consts %s + expr ::= const_list + """ + % opname + ) + self.addRule(rule, nop_func) + +>>>>>>> python-3.0-to-3.2 elif opname_base in ( "BUILD_LIST", "BUILD_SET", @@ -1191,6 +1214,8 @@ class Python3Parser(PythonParser): self.add_unique_rule(rule, opname, token.attr, customize) elif (3, 3) <= self.version < (3, 6): + # FIXME move this into version-specific custom rules. + # In fact, some of this has been done for 3.3. if annotate_args > 0: rule = ( "mkfunc_annotate ::= %s%s%sannotate_tuple load_closure LOAD_CODE LOAD_STR %s" @@ -1454,9 +1479,6 @@ class Python3Parser(PythonParser): ) ) if self.version >= (3, 3): - # Normally we remove EXTENDED_ARG from the opcodes, but in the case of - # annotated functions can use the EXTENDED_ARG tuple to signal we have an annotated function. - # Yes this is a little hacky if self.version == (3, 3): # 3.3 puts kwargs before pos_arg pos_kw_tuple = ( @@ -1470,17 +1492,17 @@ class Python3Parser(PythonParser): ("kwargs " * kw_args_count), ) rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR %s" % ( pos_kw_tuple[0], pos_kw_tuple[1], - ("call " * annotate_args), + ("annotate_arg " * annotate_args), opname, ) ) self.add_unique_rule(rule, opname, token.attr, customize) rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR %s" % ( pos_kw_tuple[0], pos_kw_tuple[1], @@ -1489,9 +1511,8 @@ class Python3Parser(PythonParser): ) ) else: - # See above comment about use of EXTENDED_ARG rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE %s" % ( ("kwargs " * kw_args_count), ("pos_arg " * (pos_args_count)), @@ -1501,7 +1522,7 @@ class Python3Parser(PythonParser): ) self.add_unique_rule(rule, opname, token.attr, customize) rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE %s" % ( ("kwargs " * kw_args_count), ("pos_arg " * pos_args_count), diff --git a/uncompyle6/parsers/parse33.py b/uncompyle6/parsers/parse33.py index c0c22189..ce1fc672 100644 --- a/uncompyle6/parsers/parse33.py +++ b/uncompyle6/parsers/parse33.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016 Rocky Bernstein +# Copyright (c) 2016, 2024 Rocky Bernstein """ spark grammar differences over Python 3.2 for Python 3.3. """ @@ -8,7 +8,6 @@ from uncompyle6.parsers.parse32 import Python32Parser class Python33Parser(Python32Parser): - def p_33on(self, args): """ # Python 3.3+ adds yield from. @@ -18,13 +17,22 @@ class Python33Parser(Python32Parser): """ def customize_grammar_rules(self, tokens, customize): - self.remove_rules(""" + self.remove_rules( + """ # 3.3+ adds POP_BLOCKS whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP COME_FROM_LOOP whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP COME_FROM_LOOP - """) + """ + ) super(Python33Parser, self).customize_grammar_rules(tokens, customize) + + # FIXME: move 3.3 stuff out of parse3.py and put it here. + # for i, token in enumerate(tokens): + # opname = token.kind + # opname_base = opname[: opname.rfind("_")] + return + class Python33ParserSingle(Python33Parser, PythonParserSingle): pass diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index c04039d8..88cff1d1 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -97,6 +97,10 @@ class Code(object): """ def __init__(self, co, scanner, classname=None, show_asm=None): + # Full initialization is given below, but for linters + # well set up some initial values. + self.co_code = None # Really either bytes for >= 3.0 and string in < 3.0 + for i in dir(co): if i.startswith("co_"): setattr(self, i, getattr(co, i)) @@ -429,7 +433,7 @@ class Scanner: """ try: None in instr - except: + except Exception: instr = [instr] first = self.offset2inst_index[start] diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index b3d520da..45cec8dd 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -468,6 +468,7 @@ class Scanner3(Scanner): last_op_was_break = False new_tokens = [] + operand_value = 0 for i, inst in enumerate(self.insts): opname = inst.opname @@ -518,10 +519,11 @@ class Scanner3(Scanner): op = inst.opcode if opname == "EXTENDED_ARG": - # FIXME: The EXTENDED_ARG is used to signal annotation - # parameters - if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION: + if i + 1 < n: + operand_value = argval << 16 continue + else: + operand_value = 0 if inst.offset in jump_targets: jump_idx = 0 @@ -628,7 +630,7 @@ class Scanner3(Scanner): attr = attr[:4] # remove last value: attr[5] == False else: pos_args, name_pair_args, annotate_args = parse_fn_counts_30_35( - inst.argval + inst.argval + operand_value ) pattr = "%s positional, %s keyword only, %s annotated" % ( diff --git a/uncompyle6/scanners/scanner33.py b/uncompyle6/scanners/scanner33.py index c3ef4a2a..73de8a71 100644 --- a/uncompyle6/scanners/scanner33.py +++ b/uncompyle6/scanners/scanner33.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2019, 2021-2022 by Rocky Bernstein +# Copyright (c) 2015-2019, 2021-2022, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,16 +21,20 @@ scanner routine for Python 3. # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_33 as opc -JUMP_OPS = opc.JUMP_OPS from uncompyle6.scanners.scanner3 import Scanner3 -class Scanner33(Scanner3): +JUMP_OPS = opc.JUMP_OPS + + +class Scanner33(Scanner3): def __init__(self, show_asm=False, is_pypy=False): Scanner3.__init__(self, (3, 3), show_asm) return + pass + if __name__ == "__main__": from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index a4f908a1..4017282b 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -2054,6 +2054,8 @@ def code_deparse( code_objects={}, compile_mode="exec", walker=FragmentsWalker, + start_offset: int = 0, + stop_offset: int = -1, ): """ Convert the code object co into a python source fragment. @@ -2088,6 +2090,22 @@ def code_deparse( tokens, customize = scanner.ingest(co, code_objects=code_objects, show_asm=show_asm) tokens, customize = scanner.ingest(co) + + if start_offset > 0: + for i, t in enumerate(tokens): + # If t.offset is a string, we want to skip this. + if isinstance(t.offset, int) and t.offset >= start_offset: + tokens = tokens[i:] + break + + if stop_offset > -1: + for i, t in enumerate(tokens): + # In contrast to the test for start_offset If t.offset is + # a string, we want to extract the integer offset value. + if t.off2int() >= stop_offset: + tokens = tokens[:i] + break + maybe_show_asm(show_asm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 244daa53..4ebeee81 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -130,6 +130,7 @@ Python. # evaluating the escape code. import sys +from io import StringIO from spark_parser import GenericASTTraversal from xdis import COMPILER_FLAG_BIT, iscode @@ -158,7 +159,11 @@ from uncompyle6.semantics.consts import ( ) from uncompyle6.semantics.customize import customize_for_version from uncompyle6.semantics.gencomp import ComprehensionMixin -from uncompyle6.semantics.helper import find_globals_and_nonlocals, print_docstring +from uncompyle6.semantics.helper import ( + find_globals_and_nonlocals, + is_lambda_mode, + print_docstring, +) from uncompyle6.semantics.make_function1 import make_function1 from uncompyle6.semantics.make_function2 import make_function2 from uncompyle6.semantics.make_function3 import make_function3 @@ -186,15 +191,6 @@ PARSER_DEFAULT_DEBUG = { "dups": False, } -PARSER_DEFAULT_DEBUG = { - "rules": False, - "transition": False, - "reduce": False, - "errorstack": "full", - "context": True, - "dups": False, -} - IS_PYPY = "__pypy__" in sys.builtin_module_names TREE_DEFAULT_DEBUG = {"before": False, "after": False} @@ -216,7 +212,8 @@ class SourceWalkerError(Exception): class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): """ - Class to traverses a Parse Tree of the bytecode instruction built from parsing to produce some sort of source text. + Class to traverses a Parse Tree of the bytecode instruction built from parsing to + produce some sort of source text. The Parse tree may be turned an Abstract Syntax tree as an intermediate step. """ @@ -224,7 +221,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def __init__( self, - version, + version: tuple, out, scanner, showast=TREE_DEFAULT_DEBUG, @@ -234,7 +231,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): linestarts={}, tolerate_errors=False, ): - """`version' is the Python version (a float) of the Python dialect + """`version' is the Python version of the Python dialect of both the syntax tree and language we should produce. `out' is IO-like file pointer to where the output should go. It @@ -246,9 +243,12 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): If `showast' is True, we print the syntax tree. - `compile_mode' is is either 'exec' or 'single'. It is the compile - mode that was used to create the Syntax Tree and specifies a - grammar variant within a Python version to use. + `compile_mode` is is either `exec`, `single` or `lambda`. + + For `lambda`, the grammar that can be used in lambda + expressions is used. Otherwise, it is the compile mode that + was used to create the Syntax Tree and specifies a grammar + variant within a Python version to use. `is_pypy` should be True if the Syntax Tree was generated for PyPy. @@ -273,10 +273,8 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.currentclass = None self.classes = [] self.debug_parser = dict(debug_parser) - # Initialize p_lambda on demand self.line_number = 1 self.linemap = {} - self.p_lambda = None self.params = params self.param_stack = [] self.ERROR = None @@ -287,11 +285,15 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.pending_newlines = 0 self.linestarts = linestarts self.treeTransform = TreeTransform(version=self.version, show_ast=showast) + # FIXME: have p.insts update in a better way # modularity is broken here self.insts = scanner.insts self.offset2inst_index = scanner.offset2inst_index + # Initialize p_lambda on demand + self.p_lambda = None + # This is in Python 2.6 on. It changes the way # strings get interpreted. See n_LOAD_CONST self.FUTURE_UNICODE_LITERALS = False @@ -319,12 +321,13 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): customize_for_version(self, is_pypy, version) return - def maybe_show_tree(self, ast, phase): + def maybe_show_tree(self, tree, phase): if self.showast.get("before", False): self.println( """ ---- end before transform """ + + " " ) if self.showast.get("after", False): self.println( @@ -334,7 +337,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): + " " ) if self.showast.get(phase, False): - maybe_show_tree(self, ast) + maybe_show_tree(self, tree) def str_with_template(self, ast): stream = sys.stdout @@ -396,7 +399,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def indent_if_source_nl(self, line_number, indent): if line_number != self.line_number: - self.write("\n" + indent + INDENT_PER_LEVEL[:-1]) + self.write("\n" + indent_spaces + INDENT_PER_LEVEL[:-1]) return self.line_number f = property( @@ -518,19 +521,19 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def pp_tuple(self, tup): """Pretty print a tuple""" last_line = self.f.getvalue().split("\n")[-1] - l = len(last_line) + 1 - indent = " " * l + ll = len(last_line) + 1 + indent = " " * ll self.write("(") sep = "" for item in tup: self.write(sep) - l += len(sep) + ll += len(sep) s = better_repr(item, self.version) - l += len(s) + ll += len(s) self.write(s) sep = "," - if l > LINE_LENGTH: - l = 0 + if ll > LINE_LENGTH: + ll = 0 sep += "\n" + indent else: sep += " " @@ -574,6 +577,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def print_super_classes3(self, node): n = len(node) - 1 + j = 0 if node.kind != "expr": if node == "kwarg": self.template_engine(("(%[0]{attr}=%c)", 1), node) @@ -611,9 +615,9 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.write("(") if kwargs: # Last arg is tuple of keyword values: omit - l = n - 1 + m = n - 1 else: - l = n + m = n if kwargs: # 3.6+ does this @@ -625,7 +629,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): j += 1 j = 0 - while i < l: + while i < m: self.write(sep) value = self.traverse(node[i]) self.write("%s=%s" % (kwargs[j], value)) @@ -633,7 +637,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): j += 1 i += 1 else: - while i < l: + while i < m: value = self.traverse(node[i]) i += 1 self.write(sep, value) @@ -709,9 +713,10 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): """ # print("-----") - # print(startnode) + # print(startnode.kind) # print(entry[0]) # print('======') + fmt = entry[0] arg = 1 i = 0 @@ -880,7 +885,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): d = node.__dict__ try: self.write(eval(expr, d, d)) - except: + except Exception: raise m = escape.search(fmt, i) self.write(fmt[i:]) @@ -1104,8 +1109,8 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): # if docstring exists, dump it if code.co_consts and code.co_consts[0] is not None and len(ast) > 0: do_doc = False + i = 0 if is_docstring(ast[0], self.version, code.co_consts): - i = 0 do_doc = True elif len(ast) > 1 and is_docstring(ast[1], self.version, code.co_consts): i = 1 @@ -1201,7 +1206,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): is_lambda=False, noneInNames=False, is_top_level_module=False, - ): + ) -> GenericASTTraversal: # FIXME: DRY with fragments.py # assert isinstance(tokens[0], Token) @@ -1254,7 +1259,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): # Build a parse tree from a tokenized and massaged disassembly. try: # FIXME: have p.insts update in a better way - # modularity is broken here + # Modularity is broken here. p_insts = self.p.insts self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index @@ -1267,6 +1272,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): checker(ast, False, self.ast_errors) self.customize(customize) + transform_tree = self.treeTransform.transform(ast, code) self.maybe_show_tree(ast, phase="before") @@ -1288,6 +1294,8 @@ def code_deparse( compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, + start_offset: int = 0, + stop_offset: int = -1, ): """ ingests and deparses a given code block 'co'. If version is None, @@ -1296,6 +1304,9 @@ def code_deparse( assert iscode(co) + if out is None: + out = sys.stdout + if version is None: version = PYTHON_VERSION_TRIPLE @@ -1306,6 +1317,21 @@ def code_deparse( co, code_objects=code_objects, show_asm=debug_opts["asm"] ) + if start_offset > 0: + for i, t in enumerate(tokens): + # If t.offset is a string, we want to skip this. + if isinstance(t.offset, int) and t.offset >= start_offset: + tokens = tokens[i:] + break + + if stop_offset > -1: + for i, t in enumerate(tokens): + # In contrast to the test for start_offset If t.offset is + # a string, we want to extract the integer offset value. + if t.off2int() >= stop_offset: + tokens = tokens[:i] + break + debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG)) # Build Syntax Tree from disassembly. @@ -1329,7 +1355,7 @@ def code_deparse( tokens, customize, co, - is_lambda=(compile_mode == "lambda"), + is_lambda=is_lambda_mode(compile_mode), is_top_level_module=is_top_level_module, ) @@ -1338,7 +1364,7 @@ def code_deparse( return None # FIXME use a lookup table here. - if compile_mode == "lambda": + if is_lambda_mode(compile_mode): expected_start = "lambda_start" elif compile_mode == "eval": expected_start = "expr_start" @@ -1351,6 +1377,7 @@ def code_deparse( expected_start = None else: expected_start = None + if expected_start: assert ( deparsed.ast == expected_start @@ -1397,7 +1424,7 @@ def code_deparse( deparsed.ast, name=co.co_name, customize=customize, - is_lambda=compile_mode == "lambda", + is_lambda=is_lambda_mode(compile_mode), debug_opts=debug_opts, ) @@ -1425,9 +1452,12 @@ def deparse_code2str( compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, -): - """Return the deparsed text for a Python code object. `out` is where any intermediate - output for assembly or tree output will be sent. + start_offset: int = 0, + stop_offset: int = -1, +) -> str: + """ + Return the deparsed text for a Python code object. `out` is where + any intermediate output for assembly or tree output will be sent. """ return code_deparse( code, diff --git a/uncompyle6/semantics/transform.py b/uncompyle6/semantics/transform.py index e3f96c83..b646071b 100644 --- a/uncompyle6/semantics/transform.py +++ b/uncompyle6/semantics/transform.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023 by Rocky Bernstein +# Copyright (c) 2019-2024 by Rocky Bernstein # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,14 +13,15 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from uncompyle6.show import maybe_show_tree from copy import copy + from spark_parser import GenericASTTraversal, GenericASTTraversalPruningException -from uncompyle6.semantics.helper import find_code_node from uncompyle6.parsers.treenode import SyntaxTree from uncompyle6.scanners.tok import NoneToken, Token -from uncompyle6.semantics.consts import RETURN_NONE, ASSIGN_DOC_STRING +from uncompyle6.semantics.consts import ASSIGN_DOC_STRING, RETURN_NONE +from uncompyle6.semantics.helper import find_code_node +from uncompyle6.show import maybe_show_tree def is_docstring(node, version, co_consts): @@ -55,27 +56,34 @@ def is_docstring(node, version, co_consts): return node == ASSIGN_DOC_STRING(co_consts[0], doc_load) -def is_not_docstring(call_stmt_node): +def is_not_docstring(call_stmt_node) -> bool: try: return ( call_stmt_node == "call_stmt" and call_stmt_node[0][0] == "LOAD_STR" and call_stmt_node[1] == "POP_TOP" ) - except: + except Exception: return False class TreeTransform(GenericASTTraversal, object): - def __init__(self, version, show_ast=None, is_pypy=False): + def __init__( + self, + version: tuple, + is_pypy=False, + show_ast=None, + ): self.version = version self.showast = show_ast self.is_pypy = is_pypy return - def maybe_show_tree(self, ast): - if isinstance(self.showast, dict) and self.showast: - maybe_show_tree(self, ast) + def maybe_show_tree(self, tree): + if isinstance(self.showast, dict) and ( + self.showast.get("before") or self.showast.get("after") + ): + maybe_show_tree(self, tree) def preorder(self, node=None): """Walk the tree in roughly 'preorder' (a bit of a lie explained below). @@ -119,12 +127,10 @@ class TreeTransform(GenericASTTraversal, object): mkfunc_pattr = node[-1].pattr if isinstance(mkfunc_pattr, tuple): + assert isinstance(mkfunc_pattr, tuple) assert len(mkfunc_pattr) == 4 and isinstance(mkfunc_pattr, int) - if ( - len(code.co_consts) > 0 - and isinstance(code.co_consts[0], str) - ): + if len(code.co_consts) > 0 and isinstance(code.co_consts[0], str): docstring_node = SyntaxTree( "docstring", [Token("LOAD_STR", has_arg=True, pattr=code.co_consts[0])] ) @@ -136,7 +142,7 @@ class TreeTransform(GenericASTTraversal, object): def n_ifstmt(self, node): """Here we check if we can turn an `ifstmt` or 'iflaststmtl` into - some kind of `assert` statement""" + some kind of `assert` statement""" testexpr = node[0] @@ -148,7 +154,11 @@ class TreeTransform(GenericASTTraversal, object): if ifstmts_jump == "_ifstmts_jumpl" and ifstmts_jump[0] == "_ifstmts_jump": ifstmts_jump = ifstmts_jump[0] - elif ifstmts_jump not in ("_ifstmts_jump", "_ifstmts_jumpl", "ifstmts_jumpl"): + elif ifstmts_jump not in ( + "_ifstmts_jump", + "_ifstmts_jumpl", + "ifstmts_jumpl", + ): return node stmts = ifstmts_jump[0] else: @@ -208,10 +218,11 @@ class TreeTransform(GenericASTTraversal, object): kind = "assert2not" LOAD_ASSERT = call[0].first_child() - if LOAD_ASSERT not in ( "LOAD_ASSERT", "LOAD_GLOBAL"): + if LOAD_ASSERT not in ("LOAD_ASSERT", "LOAD_GLOBAL"): return node if isinstance(call[1], SyntaxTree): expr = call[1][0] + assert_expr.transformed_by = "n_ifstmt" node = SyntaxTree( kind, [ @@ -221,8 +232,8 @@ class TreeTransform(GenericASTTraversal, object): expr, RAISE_VARARGS_1, ], + transformed_by="n_ifstmt", ) - node.transformed_by = "n_ifstmt" pass pass else: @@ -250,9 +261,10 @@ class TreeTransform(GenericASTTraversal, object): LOAD_ASSERT = expr[0] node = SyntaxTree( - kind, [assert_expr, jump_cond, LOAD_ASSERT, RAISE_VARARGS_1] + kind, + [assert_expr, jump_cond, LOAD_ASSERT, RAISE_VARARGS_1], + transformed_by="n_ifstmt", ) - node.transformed_by = ("n_ifstmt",) pass pass return node @@ -289,7 +301,12 @@ class TreeTransform(GenericASTTraversal, object): len_n = len(n) # Sometimes stmt is reduced away and n[0] can be a single reduction like continue -> CONTINUE. - if len_n == 1 and isinstance(n[0], SyntaxTree) and len(n[0]) == 1 and n[0] == "stmt": + if ( + len_n == 1 + and isinstance(n[0], SyntaxTree) + and len(n[0]) == 1 + and n[0] == "stmt" + ): n = n[0][0] elif len_n == 0: return node @@ -407,23 +424,27 @@ class TreeTransform(GenericASTTraversal, object): list_for_node.transformed_by = ("n_list_for",) return list_for_node + def n_negated_testtrue(self, node): + assert node[0] == "testtrue" + test_node = node[0][0] + test_node.transformed_by = "n_negated_testtrue" + return test_node + def n_stmts(self, node): if node.first_child() == "SETUP_ANNOTATIONS": prev = node[0][0] new_stmts = [node[0]] for i, sstmt in enumerate(node[1:]): ann_assign = sstmt[0] - if ( - ann_assign == "ann_assign" - and prev == "assign" - ): + if ann_assign == "ann_assign" and prev == "assign": annotate_var = ann_assign[-2] if annotate_var.attr == prev[-1][0].attr: node[i].kind = "deleted " + node[i].kind del new_stmts[-1] ann_assign_init = SyntaxTree( - "ann_assign_init", [ann_assign[0], copy(prev[0]), annotate_var] - ) + "ann_assign_init", + [ann_assign[0], copy(prev[0]), annotate_var], + ) if sstmt[0] == "ann_assign": sstmt[0] = ann_assign_init else: @@ -441,26 +462,28 @@ class TreeTransform(GenericASTTraversal, object): node = self.preorder(node) return node - def transform(self, ast, code): - self.maybe_show_tree(ast) - self.ast = copy(ast) + def transform(self, parse_tree: GenericASTTraversal, code) -> GenericASTTraversal: + self.maybe_show_tree(parse_tree) + self.ast = copy(parse_tree) + del parse_tree self.ast = self.traverse(self.ast, is_lambda=False) + n = len(self.ast) try: # Disambiguate a string (expression) which appears as a "call_stmt" at # the beginning of a function versus a docstring. Seems pretty academic, # but this is Python. - call_stmt = ast[0][0] + call_stmt = self.ast[0][0] if is_not_docstring(call_stmt): call_stmt.kind = "string_at_beginning" call_stmt.transformed_by = "transform" pass - except: + except Exception: pass try: - for i in range(len(self.ast)): - sstmt = ast[i] + for i in range(n): + sstmt = self.ast[i] if len(sstmt) == 1 and sstmt == "sstmt": self.ast[i] = self.ast[i][0] @@ -486,7 +509,7 @@ class TreeTransform(GenericASTTraversal, object): if self.ast[-1] == RETURN_NONE: self.ast.pop() # remove last node # todo: if empty, add 'pass' - except: + except Exception: pass return self.ast