diff --git a/__pkginfo__.py b/__pkginfo__.py index 723c15a3..98fa63d5 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018, 2020-2021 Rocky Bernstein +# Copyright (C) 2018, 2020-2021 2024 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -32,9 +32,11 @@ # 3.3 | pip | 10.0.1 | # 3.4 | pip | 19.1.1 | +import os.path as osp + # Things that change more often go here. copyright = """ -Copyright (C) 2015-2021 Rocky Bernstein . +Copyright (C) 2015-2021, 2024 Rocky Bernstein . """ classifiers = [ @@ -75,7 +77,7 @@ entry_points = { ] } ftp_url = None -install_requires = ["spark-parser >= 1.8.9, < 1.9.0", "xdis >= 6.0.8, < 6.2.0"] +install_requires = ["click", "spark-parser >= 1.8.9, < 1.9.0", "xdis >= 6.0.8, < 6.2.0"] license = "GPL3" mailing_list = "python-debugger@googlegroups.com" @@ -88,21 +90,18 @@ web = "https://github.com/rocky/python-uncompyle6/" zip_safe = True -import os.path - - def get_srcdir(): - filename = os.path.normcase(os.path.dirname(os.path.abspath(__file__))) - return os.path.realpath(filename) + filename = osp.normcase(osp.dirname(osp.abspath(__file__))) + return osp.realpath(filename) srcdir = get_srcdir() def read(*rnames): - return open(os.path.join(srcdir, *rnames)).read() + return open(osp.join(srcdir, *rnames)).read() -# Get info from files; set: long_description and __version__ +# Get info from files; set: long_description and VERSION long_description = read("README.rst") + "\n" exec(read("uncompyle6/version.py")) diff --git a/admin-tools/merge-for-2.4.sh b/admin-tools/merge-for-2.4.sh new file mode 100755 index 00000000..f8c55395 --- /dev/null +++ b/admin-tools/merge-for-2.4.sh @@ -0,0 +1,5 @@ +#/bin/bash +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-2.4.sh; then + git merge python-3.0-to-3.2 +fi diff --git a/admin-tools/merge-for-3.0.sh b/admin-tools/merge-for-3.0.sh new file mode 100755 index 00000000..7fc1a596 --- /dev/null +++ b/admin-tools/merge-for-3.0.sh @@ -0,0 +1,5 @@ +#/bin/bash +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-3.0.sh; then + git merge python-3.3-to-3.5 +fi diff --git a/admin-tools/merge-for-3.3.sh b/admin-tools/merge-for-3.3.sh new file mode 100755 index 00000000..aade2e77 --- /dev/null +++ b/admin-tools/merge-for-3.3.sh @@ -0,0 +1,5 @@ +#/bin/bash +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-3.3.sh; then + git merge master +fi diff --git a/uncompyle6/bin/pydisassemble.py b/uncompyle6/bin/pydisassemble.py index be1cb152..e40e0dfc 100755 --- a/uncompyle6/bin/pydisassemble.py +++ b/uncompyle6/bin/pydisassemble.py @@ -1,9 +1,21 @@ #!/usr/bin/env python -# Mode: -*- python -*- # -# Copyright (c) 2015-2016, 2018, 2020, 2022-2023 by Rocky Bernstein +# Copyright (c) 2015-2016, 2018, 2020, 2022-2024 +# by Rocky Bernstein +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . # -from __future__ import print_function import getopt import os @@ -51,15 +63,14 @@ PATTERNS = ("*.pyc", "*.pyo") def main(): - Usage_short = ( - """usage: %s FILE... + usage_short = ( + f"""usage: {program} FILE... Type -h for for full help.""" - % program ) if len(sys.argv) == 1: print("No file(s) given", file=sys.stderr) - print(Usage_short, file=sys.stderr) + print(usage_short, file=sys.stderr) sys.exit(1) try: @@ -67,7 +78,7 @@ Type -h for for full help.""" sys.argv[1:], "hVU", ["help", "version", "uncompyle6"] ) except getopt.GetoptError as e: - print("%s: %s" % (os.path.basename(sys.argv[0]), e), file=sys.stderr) + print(f"{os.path.basename(sys.argv[0])}: {e}", file=sys.stderr) sys.exit(-1) for opt, val in opts: @@ -75,18 +86,18 @@ Type -h for for full help.""" print(__doc__) sys.exit(1) elif opt in ("-V", "--version"): - print("%s %s" % (program, __version__)) + print(f"{program} {__version__}") sys.exit(0) else: print(opt) - print(Usage_short, file=sys.stderr) + print(usage_short, file=sys.stderr) sys.exit(1) for file in files: if os.path.exists(files[0]): disassemble_file(file, sys.stdout) else: - print("Can't read %s - skipping" % files[0], file=sys.stderr) + print(f"Can't read {files[0]} - skipping", file=sys.stderr) pass pass return diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index e1ae53ff..db395b02 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -1,188 +1,179 @@ #!/usr/bin/env python # Mode: -*- python -*- # -# Copyright (c) 2015-2017, 2019-2020, 2023 by Rocky Bernstein +# Copyright (c) 2015-2017, 2019-2020, 2023-2024 +# by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # -from __future__ import print_function -import getopt import os import sys import time -from uncompyle6 import verify +import click +from xdis.version_info import version_tuple_to_str + from uncompyle6.main import main, status_msg from uncompyle6.version import __version__ program = "uncompyle6" -__doc__ = """ -Usage: - %s [OPTIONS]... [ FILE | DIR]... - %s [--help | -h | --V | --version] - -Examples: - %s foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout - %s -o . foo.pyc bar.pyc # decompile to ./foo.pyc_dis and ./bar.pyc_dis - %s -o /tmp /usr/lib/python1.5 # decompile whole library - -Options: - -o output decompiled files to this path: - if multiple input files are decompiled, the common prefix - is stripped from these names and the remainder appended to - - uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc - -> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis - uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc - -> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis - uncompyle6 -o /tmp /usr/lib/python1.5 - -> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis - --compile | -c - attempts a decompilation after compiling - -d print timestamps - -p use number of processes - -r recurse directories looking for .pyc and .pyo files - --fragments use fragments deparser - --verify compare generated source with input byte-code - --verify-run compile generated source, run it and check exit code - --syntax-verify compile generated source - --linemaps generated line number correspondencies between byte-code - and generated source output - --encoding - use in generated source according to pep-0263 - --help show this message - -Debugging Options: - --asm | -a include byte-code (disables --verify) - --grammar | -g show matching grammar - --tree={before|after} - -t {before|after} include syntax before (or after) tree transformation - (disables --verify) - --tree++ | -T add template rules to --tree=before when possible - -Extensions of generated files: - '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) - + '_unverified' successfully decompile but --verify failed - + '_failed' decompile failed (contact author for enhancement) -""" % ( - (program,) * 5 -) - -program = "uncompyle6" def usage(): print(__doc__) sys.exit(1) -def main_bin(): - if not ( - sys.version_info[0:2] - in ( - (2, 4), - (2, 5), - (2, 6), - (2, 7), - (3, 0), - (3, 1), - (3, 2), - (3, 3), - (3, 4), - (3, 5), - (3, 6), - (3, 7), - (3, 8), - (3, 9), - (3, 10), - (3, 11), +# __doc__ = """ +# Usage: +# %s [OPTIONS]... [ FILE | DIR]... +# %s [--help | -h | --V | --version] + +# Examples: +# %s foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout +# %s -o . foo.pyc bar.pyc # decompile to ./foo.pyc_dis and ./bar.pyc_dis +# %s -o /tmp /usr/lib/python1.5 # decompile whole library + +# Options: +# -o output decompiled files to this path: +# if multiple input files are decompiled, the common prefix +# is stripped from these names and the remainder appended to +# +# uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc +# -> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis +# uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc +# -> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis +# uncompyle6 -o /tmp /usr/lib/python1.5 +# -> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis +# --compile | -c +# attempts a decompilation after compiling +# -d print timestamps +# -p use number of processes +# -r recurse directories looking for .pyc and .pyo files +# --fragments use fragments deparser +# --verify compare generated source with input byte-code +# --verify-run compile generated source, run it and check exit code +# --syntax-verify compile generated source +# --linemaps generated line number correspondencies between byte-code +# and generated source output +# --encoding +# use in generated source according to pep-0263 +# --help show this message + +# Debugging Options: +# --asm | -a include byte-code (disables --verify) +# --grammar | -g show matching grammar +# --tree={before|after} +# -t {before|after} include syntax before (or after) tree transformation +# (disables --verify) +# --tree++ | -T add template rules to --tree=before when possible + +# Extensions of generated files: +# '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) +# + '_unverified' successfully decompile but --verify failed +# + '_failed' decompile failed (contact author for enhancement) +# """ % ( +# (program,) * 5 +# ) + + +@click.command() +@click.option( + "--asm++/--no-asm++", + "-A", + "asm_plus", + default=False, + help="show xdis assembler and tokenized assembler", +) +@click.option("--asm/--no-asm", "-a", default=False) +@click.option("--grammar/--no-grammar", "-g", "show_grammar", default=False) +@click.option("--tree/--no-tree", "-t", default=False) +@click.option( + "--tree++/--no-tree++", + "-T", + "tree_plus", + default=False, + help="show parse tree and Abstract Syntax Tree", +) +@click.option( + "--linemaps/--no-linemaps", + default=False, + help="show line number correspondencies between byte-code " + "and generated source output", +) +@click.option( + "--verify", + type=click.Choice(["run", "syntax"]), + default=None, +) +@click.option( + "--recurse/--no-recurse", + "-r", + "recurse_dirs", + default=False, +) +@click.option( + "--output", + "-o", + "outfile", + type=click.Path( + exists=True, file_okay=True, dir_okay=True, writable=True, resolve_path=True + ), + required=False, +) +@click.version_option(version=__version__) +@click.option( + "--start-offset", + "start_offset", + default=0, + help="start decomplation at offset; default is 0 or the starting offset.", +) +@click.version_option(version=__version__) +@click.option( + "--stop-offset", + "stop_offset", + default=-1, + help="stop decomplation when seeing an offset greater or equal to this; default is " + "-1 which indicates no stopping point.", +) +@click.argument("files", nargs=-1, type=click.Path(readable=True), required=True) +def main_bin( + asm: bool, + asm_plus: bool, + show_grammar, + tree: bool, + tree_plus: bool, + linemaps: bool, + verify, + recurse_dirs: bool, + outfile, + start_offset: int, + stop_offset: int, + files, +): + """ + Cross Python bytecode decompiler for Python bytecode up to Python 3.8. + """ + + version_tuple = sys.version_info[0:2] + if not ((3, 3) <= version_tuple < (3, 6)): + print( + "Error: This version of the {program} runs from Python 3.3 to 3.5." + "You need another branch of this code for other Python versions." + " \n\tYou have version: %s." % version_tuple_to_str() ) - ): - print("Error: %s requires Python 2.4-3.11" % program, file=sys.stderr) sys.exit(-1) - recurse_dirs = False + numproc = 0 - outfile = "-" + out_base = None + out_base = None source_paths = [] timestamp = False timestampfmt = "# %Y.%m.%d %H:%M:%S %Z" + pyc_paths = files - try: - opts, pyc_paths = getopt.getopt( - sys.argv[1:], - "hac:gtTdrVo:p:", - "help asm compile= grammar linemaps recurse " - "timestamp tree= tree+ " - "fragments verify verify-run version " - "syntax-verify " - "showgrammar encoding=".split(" "), - ) - except getopt.GetoptError as e: - print("%s: %s" % (os.path.basename(sys.argv[0]), e), file=sys.stderr) - sys.exit(-1) - - options = { - "showasm": None - } - for opt, val in opts: - if opt in ("-h", "--help"): - print(__doc__) - sys.exit(0) - elif opt in ("-V", "--version"): - print("%s %s" % (program, __version__)) - sys.exit(0) - elif opt == "--verify": - options["do_verify"] = "strong" - elif opt == "--syntax-verify": - options["do_verify"] = "weak" - elif opt == "--fragments": - options["do_fragments"] = True - elif opt == "--verify-run": - options["do_verify"] = "verify-run" - elif opt == "--linemaps": - options["do_linemaps"] = True - elif opt in ("--asm", "-a"): - if options["showasm"] == None: - options["showasm"] = "after" - else: - options["showasm"] = "both" - options["do_verify"] = None - elif opt in ("--tree", "-t"): - if "showast" not in options: - options["showast"] = {} - if val == "before": - options["showast"][val] = True - elif val == "after": - options["showast"][val] = True - else: - options["showast"]["before"] = True - options["do_verify"] = None - elif opt in ("--tree+", "-T"): - if "showast" not in options: - options["showast"] = {} - options["showast"]["after"] = True - options["showast"]["before"] = True - options["do_verify"] = None - elif opt in ("--grammar", "-g"): - options["showgrammar"] = True - elif opt == "-o": - outfile = val - elif opt in ("--timestamp", "-d"): - timestamp = True - elif opt in ("--compile", "-c"): - source_paths.append(val) - elif opt == "-p": - numproc = int(val) - elif opt in ("--recurse", "-r"): - recurse_dirs = True - elif opt == "--encoding": - options["source_encoding"] = val - else: - print(opt, file=sys.stderr) - usage() - - # expand directory if specified + # Expand directory if "recurse" was specified. if recurse_dirs: expanded_files = [] for f in pyc_paths: @@ -216,15 +207,32 @@ def main_bin(): out_base = outfile outfile = None + # A second -a turns show_asm="after" into show_asm="before" + if asm_plus or asm: + asm_opt = "both" if asm_plus else "after" + else: + asm_opt = None + if timestamp: print(time.strftime(timestampfmt)) if numproc <= 1: + show_ast = {"before": tree or tree_plus, "after": tree_plus} try: result = main( - src_base, out_base, pyc_paths, source_paths, outfile, **options + src_base, + out_base, + pyc_paths, + source_paths, + outfile, + showasm=asm_opt, + showgrammar=show_grammar, + showast=show_ast, + do_verify=verify, + do_linemaps=linemaps, + start_offset=start_offset, + stop_offset=stop_offset, ) - result = [options.get("do_verify", None)] + list(result) if len(pyc_paths) > 1: mess = status_msg(*result) print("# " + mess) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index a745df15..0465a1ec 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -13,10 +13,14 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import ast import datetime import os +import os.path as osp import py_compile +import subprocess import sys +import tempfile from xdis import iscode from xdis.load import load_module @@ -37,9 +41,9 @@ def _get_outstream(outfile): """ Return an opened output file descriptor for ``outfile``. """ - dir_name = os.path.dirname(outfile) + dir_name = osp.dirname(outfile) failed_file = outfile + "_failed" - if os.path.exists(failed_file): + if osp.exists(failed_file): os.remove(failed_file) try: os.makedirs(dir_name) @@ -48,6 +52,17 @@ def _get_outstream(outfile): return open(outfile, mode="w", encoding="utf-8") +def syntax_check(filename: str) -> bool: + with open(filename) as f: + source = f.read() + valid = True + try: + ast.parse(source) + except SyntaxError: + valid = False + return valid + + def decompile( co, bytecode_version=PYTHON_VERSION_TRIPLE, @@ -59,11 +74,13 @@ def decompile( source_encoding=None, code_objects={}, source_size=None, - is_pypy=False, + is_pypy: bool = False, magic_int=None, mapstream=None, do_fragments=False, compile_mode="exec", + start_offset: int = 0, + stop_offset: int = -1, ): """ ingests and deparses a given code block 'co' @@ -131,11 +148,12 @@ def decompile( debug_opts=debug_opts, ) header_count = 3 + len(sys_version_lines) - linemap = [ - (line_no, deparsed.source_linemap[line_no] + header_count) - for line_no in sorted(deparsed.source_linemap.keys()) - ] - mapstream.write("\n\n# %s\n" % linemap) + if deparsed is not None: + linemap = [ + (line_no, deparsed.source_linemap[line_no] + header_count) + for line_no in sorted(deparsed.source_linemap.keys()) + ] + mapstream.write("\n\n# %s\n" % linemap) else: if do_fragments: deparse_fn = code_deparse_fragments @@ -148,8 +166,11 @@ def decompile( is_pypy=is_pypy, debug_opts=debug_opts, compile_mode=compile_mode, + start_offset=start_offset, + stop_offset=stop_offset, ) pass + real_out.write("\n") return deparsed except pysource.SourceWalkerError as e: # deparsing failed @@ -173,7 +194,7 @@ def compile_file(source_path): def decompile_file( - filename, + filename: str, outstream=None, showasm=None, showast={}, @@ -181,6 +202,8 @@ def decompile_file( source_encoding=None, mapstream=None, do_fragments=False, + start_offset=0, + stop_offset=-1, ): """ decompile Python byte-code file (.pyc). Return objects to @@ -210,6 +233,8 @@ def decompile_file( is_pypy=is_pypy, magic_int=magic_int, mapstream=mapstream, + start_offset=start_offset, + stop_offset=stop_offset, ), ) else: @@ -230,6 +255,8 @@ def decompile_file( mapstream=mapstream, do_fragments=do_fragments, compile_mode="exec", + start_offset=start_offset, + stop_offset=stop_offset, ) ] return deparsed @@ -244,10 +271,13 @@ def main( outfile=None, showasm=None, showast={}, - showgrammar=False, + do_verify=None, + showgrammar: bool = False, source_encoding=None, do_linemaps=False, do_fragments=False, + start_offset: int = 0, + stop_offset: int = -1, ): """ in_base base directory for input files @@ -260,7 +290,8 @@ def main( - files below out_base out_base=... - stdout out_base=None, outfile=None """ - tot_files = okay_files = failed_files = verify_failed_files = 0 + tot_files = okay_files = failed_files = 0 + verify_failed_files = 0 if do_verify else 0 current_outfile = outfile linemap_stream = None @@ -268,9 +299,9 @@ def main( compiled_files.append(compile_file(source_path)) for filename in compiled_files: - infile = os.path.join(in_base, filename) + infile = osp.join(in_base, filename) # print("XXX", infile) - if not os.path.exists(infile): + if not osp.exists(infile): sys.stderr.write("File '%s' doesn't exist. Skipped\n" % infile) continue @@ -283,14 +314,19 @@ def main( if outfile: # outfile was given as parameter outstream = _get_outstream(outfile) elif out_base is None: - outstream = sys.stdout + out_base = tempfile.mkdtemp(prefix="py-dis-") + if do_verify and filename.endswith(".pyc"): + current_outfile = osp.join(out_base, filename[0:-1]) + outstream = open(current_outfile, "w") + else: + outstream = sys.stdout if do_linemaps: linemap_stream = sys.stdout else: if filename.endswith(".pyc"): - current_outfile = os.path.join(out_base, filename[0:-1]) + current_outfile = osp.join(out_base, filename[0:-1]) else: - current_outfile = os.path.join(out_base, filename) + "_dis" + current_outfile = osp.join(out_base, filename) + "_dis" pass pass @@ -298,9 +334,9 @@ def main( # print(current_outfile, file=sys.stderr) - # Try to uncompile the input file + # Try to decompile the input file. try: - deparsed = decompile_file( + deparsed_objects = decompile_file( infile, outstream, showasm, @@ -309,11 +345,13 @@ def main( source_encoding, linemap_stream, do_fragments, + start_offset, + stop_offset, ) if do_fragments: - for d in deparsed: + for deparsed_object in deparsed_objects: last_mod = None - offsets = d.offsets + offsets = deparsed_object.offsets for e in sorted( [k for k in offsets.keys() if isinstance(k[1], int)] ): @@ -322,11 +360,58 @@ def main( outstream.write("%s\n%s\n%s\n" % (line, e[0], line)) last_mod = e[0] info = offsets[e] - extract_info = d.extract_node_info(info) + extract_info = deparse_object.extract_node_info(info) outstream.write("%s" % info.node.format().strip() + "\n") outstream.write(extract_info.selectedLine + "\n") outstream.write(extract_info.markerLine + "\n\n") pass + + if do_verify: + for deparsed_object in deparsed_objects: + deparsed_object.f.close() + if PYTHON_VERSION_TRIPLE[:2] != deparsed_object.version[:2]: + sys.stdout.write( + "\n# skipping running %s; it is %s and we are %s" + % ( + deparsed_object.f.name, + version_tuple_to_str(deparsed_object.version, end=2), + version_tuple_to_str(PYTHON_VERSION_TRIPLE, end=2), + ) + ) + else: + check_type = "syntax check" + if do_verify == "run": + check_type = "run" + if PYTHON_VERSION_TRIPLE >= (3, 7): + result = subprocess.run( + [sys.executable, deparsed_object.f.name], + capture_output=True, + ) + valid = result.returncode == 0 + output = result.stdout.decode() + if output: + print(output) + pass + else: + result = subprocess.run( + [sys.executable, deparsed_object.f.name], + ) + valid = result.returncode == 0 + pass + if not valid: + print(result.stderr.decode()) + + else: + valid = syntax_check(deparsed_object.f.name) + + if not valid: + verify_failed_files += 1 + sys.stderr.write( + "\n# %s failed on file %s\n" + % (check_type, deparsed_object.f.name) + ) + + # sys.stderr.write("Ran %\n" % deparsed_object.f.name) pass tot_files += 1 except (ValueError, SyntaxError, ParserError, pysource.SourceWalkerError) as e: diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 813f6660..f25ae5d7 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -27,22 +27,24 @@ that a later phase can turn into a sequence of ASCII text. """ import re -from uncompyle6.scanners.tok import Token + +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func from uncompyle6.parsers.reducecheck import ( and_invalid, except_handler_else, ifelsestmt, - ifstmt, iflaststmt, + ifstmt, or_check, testtrue, tryelsestmtl3, tryexcept, - while1stmt + while1stmt, ) from uncompyle6.parsers.treenode import SyntaxTree -from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from uncompyle6.scanners.tok import Token class Python3Parser(PythonParser): @@ -98,7 +100,7 @@ class Python3Parser(PythonParser): """ def p_dict_comp3(self, args): - """" + """ " expr ::= dict_comp stmt ::= dict_comp_func dict_comp_func ::= BUILD_MAP_0 LOAD_ARG FOR_ITER store @@ -519,7 +521,7 @@ class Python3Parser(PythonParser): expr call CALL_FUNCTION_3 - """ + """ # FIXME: I bet this can be simplified # look for next MAKE_FUNCTION for i in range(i + 1, len(tokens)): @@ -625,7 +627,11 @@ class Python3Parser(PythonParser): self.add_unique_rule(rule, token.kind, uniq_param, customize) if "LOAD_BUILD_CLASS" in self.seen_ops: - if next_token == "CALL_FUNCTION" and next_token.attr == 1 and pos_args_count > 1: + if ( + next_token == "CALL_FUNCTION" + and next_token.attr == 1 + and pos_args_count > 1 + ): rule = "classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d" % ( ("expr " * (pos_args_count - 1)), opname, @@ -764,18 +770,24 @@ class Python3Parser(PythonParser): elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"): if opname == "BUILD_CONST_DICT": - rule = """ + rule = ( + """ add_consts ::= ADD_VALUE* const_list ::= COLLECTION_START add_consts %s dict ::= const_list expr ::= dict - """ % opname + """ + % opname + ) else: - rule = """ + rule = ( + """ add_consts ::= ADD_VALUE* const_list ::= COLLECTION_START add_consts %s expr ::= const_list - """ % opname + """ + % opname + ) self.addRule(rule, nop_func) elif opname.startswith("BUILD_DICT_OLDER"): @@ -854,18 +866,24 @@ class Python3Parser(PythonParser): elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"): if opname == "BUILD_CONST_DICT": - rule = """ + rule = ( + """ add_consts ::= ADD_VALUE* const_list ::= COLLECTION_START add_consts %s dict ::= const_list expr ::= dict - """ % opname + """ + % opname + ) else: - rule = """ + rule = ( + """ add_consts ::= ADD_VALUE* const_list ::= COLLECTION_START add_consts %s expr ::= const_list - """ % opname + """ + % opname + ) self.addRule(rule, nop_func) elif opname_base in ( @@ -946,7 +964,6 @@ class Python3Parser(PythonParser): "CALL_FUNCTION_VAR_KW", ) ) or opname.startswith("CALL_FUNCTION_KW"): - if opname == "CALL_FUNCTION" and token.attr == 1: rule = """ dict_comp ::= LOAD_DICTCOMP LOAD_STR MAKE_FUNCTION_0 expr @@ -1122,7 +1139,8 @@ class Python3Parser(PythonParser): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_closure load_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1190,6 +1208,8 @@ class Python3Parser(PythonParser): self.add_unique_rule(rule, opname, token.attr, customize) elif (3, 3) <= self.version < (3, 6): + # FIXME move this into version-specific custom rules. + # In fact, some of this has been done for 3.3. if annotate_args > 0: rule = ( "mkfunc_annotate ::= %s%s%sannotate_tuple load_closure LOAD_CODE LOAD_STR %s" @@ -1208,7 +1228,6 @@ class Python3Parser(PythonParser): ) self.add_unique_rule(rule, opname, token.attr, customize) - if self.version >= (3, 4): if not self.is_pypy: load_op = "LOAD_STR" @@ -1292,14 +1311,16 @@ class Python3Parser(PythonParser): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize ) rule_pat = ( "generator_exp ::= %sload_closure load_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1351,7 +1372,8 @@ class Python3Parser(PythonParser): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1363,7 +1385,8 @@ class Python3Parser(PythonParser): # Todo: For Pypy we need to modify this slightly rule_pat = ( "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("expr " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("expr " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1450,9 +1473,6 @@ class Python3Parser(PythonParser): ) ) if self.version >= (3, 3): - # Normally we remove EXTENDED_ARG from the opcodes, but in the case of - # annotated functions can use the EXTENDED_ARG tuple to signal we have an annotated function. - # Yes this is a little hacky if self.version == (3, 3): # 3.3 puts kwargs before pos_arg pos_kw_tuple = ( @@ -1466,17 +1486,17 @@ class Python3Parser(PythonParser): ("kwargs " * kw_args_count), ) rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR %s" % ( pos_kw_tuple[0], pos_kw_tuple[1], - ("call " * annotate_args), + ("annotate_arg " * annotate_args), opname, ) ) self.add_unique_rule(rule, opname, token.attr, customize) rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR %s" % ( pos_kw_tuple[0], pos_kw_tuple[1], @@ -1485,9 +1505,8 @@ class Python3Parser(PythonParser): ) ) else: - # See above comment about use of EXTENDED_ARG rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE %s" % ( ("kwargs " * kw_args_count), ("pos_arg " * (pos_args_count)), @@ -1497,7 +1516,7 @@ class Python3Parser(PythonParser): ) self.add_unique_rule(rule, opname, token.attr, customize) rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE %s" % ( ("kwargs " * kw_args_count), ("pos_arg " * pos_args_count), @@ -1594,7 +1613,7 @@ class Python3Parser(PythonParser): } if self.version == (3, 6): - self.reduce_check_table["and"] = and_invalid + self.reduce_check_table["and"] = and_invalid self.check_reduce["and"] = "AST" self.check_reduce["annotate_tuple"] = "noAST" @@ -1624,7 +1643,7 @@ class Python3Parser(PythonParser): def reduce_is_invalid(self, rule, ast, tokens, first, last): lhs = rule[0] n = len(tokens) - last = min(last, n-1) + last = min(last, n - 1) fn = self.reduce_check_table.get(lhs, None) if fn: if fn(self, lhs, n, rule, ast, tokens, first, last): @@ -1650,13 +1669,18 @@ class Python3Parser(PythonParser): condition_jump2 = tokens[min(last - 1, len(tokens) - 1)] # If there are two *distinct* condition jumps, they should not jump to the # same place. Otherwise we have some sort of "and"/"or". - if condition_jump2.kind.startswith("POP_JUMP_IF") and condition_jump != condition_jump2: + if ( + condition_jump2.kind.startswith("POP_JUMP_IF") + and condition_jump != condition_jump2 + ): return condition_jump.attr == condition_jump2.attr - if tokens[last] == "COME_FROM" and tokens[last].off2int() != condition_jump.attr: + if ( + tokens[last] == "COME_FROM" + and tokens[last].off2int() != condition_jump.attr + ): return False - # if condition_jump.attr < condition_jump2.off2int(): # print("XXX", first, last) # for t in range(first, last): print(tokens[t]) @@ -1678,7 +1702,6 @@ class Python3Parser(PythonParser): < tokens[last].off2int() ) elif lhs == "while1stmt": - if while1stmt(self, lhs, n, rule, ast, tokens, first, last): return True @@ -1700,7 +1723,6 @@ class Python3Parser(PythonParser): return True return False elif lhs == "while1elsestmt": - n = len(tokens) if last == n: # Adjust for fuzziness in parsing diff --git a/uncompyle6/parsers/parse33.py b/uncompyle6/parsers/parse33.py index 55432e72..ce1fc672 100644 --- a/uncompyle6/parsers/parse33.py +++ b/uncompyle6/parsers/parse33.py @@ -1,15 +1,13 @@ -# Copyright (c) 2016 Rocky Bernstein +# Copyright (c) 2016, 2024 Rocky Bernstein """ spark grammar differences over Python 3.2 for Python 3.3. """ -from __future__ import print_function from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse32 import Python32Parser class Python33Parser(Python32Parser): - def p_33on(self, args): """ # Python 3.3+ adds yield from. @@ -19,13 +17,22 @@ class Python33Parser(Python32Parser): """ def customize_grammar_rules(self, tokens, customize): - self.remove_rules(""" + self.remove_rules( + """ # 3.3+ adds POP_BLOCKS whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP COME_FROM_LOOP whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP COME_FROM_LOOP - """) + """ + ) super(Python33Parser, self).customize_grammar_rules(tokens, customize) + + # FIXME: move 3.3 stuff out of parse3.py and put it here. + # for i, token in enumerate(tokens): + # opname = token.kind + # opname_base = opname[: opname.rfind("_")] + return + class Python33ParserSingle(Python33Parser, PythonParserSingle): pass diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index e7cfa608..0993006f 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -98,6 +98,10 @@ class Code(object): """ def __init__(self, co, scanner, classname=None, show_asm=None): + # Full initialization is given below, but for linters + # well set up some initial values. + self.co_code = None # Really either bytes for >= 3.0 and string in < 3.0 + for i in dir(co): if i.startswith("co_"): setattr(self, i, getattr(co, i)) @@ -430,7 +434,7 @@ class Scanner: """ try: None in instr - except: + except Exception: instr = [instr] first = self.offset2inst_index[start] diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 3491a25c..d29dea90 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -479,6 +479,7 @@ class Scanner3(Scanner): last_op_was_break = False new_tokens = [] + operand_value = 0 for i, inst in enumerate(self.insts): opname = inst.opname @@ -530,10 +531,11 @@ class Scanner3(Scanner): op = inst.opcode if opname == "EXTENDED_ARG": - # FIXME: The EXTENDED_ARG is used to signal annotation - # parameters - if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION: + if i + 1 < n: + operand_value = argval << 16 continue + else: + operand_value = 0 if inst.offset in jump_targets: jump_idx = 0 @@ -640,7 +642,7 @@ class Scanner3(Scanner): attr = attr[:4] # remove last value: attr[5] == False else: pos_args, name_pair_args, annotate_args = parse_fn_counts_30_35( - inst.argval + inst.argval + operand_value ) pattr = "%s positional, %s keyword only, %s annotated" % ( diff --git a/uncompyle6/scanners/scanner33.py b/uncompyle6/scanners/scanner33.py index 1d5d7503..1c4a5aa9 100644 --- a/uncompyle6/scanners/scanner33.py +++ b/uncompyle6/scanners/scanner33.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2019, 2021-2022 by Rocky Bernstein +# Copyright (c) 2015-2019, 2021-2022, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,20 +19,22 @@ This sets up opcodes Python's 3.3 and calls a generalized scanner routine for Python 3. """ -from __future__ import print_function - # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_33 as opc -JUMP_OPS = opc.JUMP_OPS from uncompyle6.scanners.scanner3 import Scanner3 -class Scanner33(Scanner3): +JUMP_OPS = opc.JUMP_OPS + + +class Scanner33(Scanner3): def __init__(self, show_asm=False, is_pypy=False): Scanner3.__init__(self, (3, 3), show_asm) return + pass + if __name__ == "__main__": from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index f69e0f1d..5f8d18c5 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -2035,6 +2035,8 @@ def code_deparse( code_objects={}, compile_mode="exec", walker=FragmentsWalker, + start_offset: int = 0, + stop_offset: int = -1, ): """ Convert the code object co into a python source fragment. @@ -2069,6 +2071,22 @@ def code_deparse( tokens, customize = scanner.ingest(co, code_objects=code_objects, show_asm=show_asm) tokens, customize = scanner.ingest(co) + + if start_offset > 0: + for i, t in enumerate(tokens): + # If t.offset is a string, we want to skip this. + if isinstance(t.offset, int) and t.offset >= start_offset: + tokens = tokens[i:] + break + + if stop_offset > -1: + for i, t in enumerate(tokens): + # In contrast to the test for start_offset If t.offset is + # a string, we want to extract the integer offset value. + if t.off2int() >= stop_offset: + tokens = tokens[:i] + break + maybe_show_asm(show_asm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 0d967fd9..fc8614d4 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -130,6 +130,7 @@ Python. # evaluating the escape code. import sys +from io import StringIO from spark_parser import GenericASTTraversal from xdis import COMPILER_FLAG_BIT, iscode @@ -158,7 +159,11 @@ from uncompyle6.semantics.consts import ( ) from uncompyle6.semantics.customize import customize_for_version from uncompyle6.semantics.gencomp import ComprehensionMixin -from uncompyle6.semantics.helper import find_globals_and_nonlocals, print_docstring +from uncompyle6.semantics.helper import ( + find_globals_and_nonlocals, + is_lambda_mode, + print_docstring, +) from uncompyle6.semantics.make_function1 import make_function1 from uncompyle6.semantics.make_function2 import make_function2 from uncompyle6.semantics.make_function3 import make_function3 @@ -174,8 +179,6 @@ def unicode(x): return x -from io import StringIO - PARSER_DEFAULT_DEBUG = { "rules": False, "transition": False, @@ -206,7 +209,8 @@ class SourceWalkerError(Exception): class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): """ - Class to traverses a Parse Tree of the bytecode instruction built from parsing to produce some sort of source text. + Class to traverses a Parse Tree of the bytecode instruction built from parsing to + produce some sort of source text. The Parse tree may be turned an Abstract Syntax tree as an intermediate step. """ @@ -214,7 +218,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def __init__( self, - version, + version: tuple, out, scanner, showast=TREE_DEFAULT_DEBUG, @@ -224,7 +228,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): linestarts={}, tolerate_errors=False, ): - """`version' is the Python version (a float) of the Python dialect + """`version' is the Python version of the Python dialect of both the syntax tree and language we should produce. `out' is IO-like file pointer to where the output should go. It @@ -236,9 +240,12 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): If `showast' is True, we print the syntax tree. - `compile_mode' is is either 'exec' or 'single'. It is the compile - mode that was used to create the Syntax Tree and specifies a - grammar variant within a Python version to use. + `compile_mode` is is either `exec`, `single` or `lambda`. + + For `lambda`, the grammar that can be used in lambda + expressions is used. Otherwise, it is the compile mode that + was used to create the Syntax Tree and specifies a grammar + variant within a Python version to use. `is_pypy` should be True if the Syntax Tree was generated for PyPy. @@ -263,10 +270,8 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.currentclass = None self.classes = [] self.debug_parser = dict(debug_parser) - # Initialize p_lambda on demand self.line_number = 1 self.linemap = {} - self.p_lambda = None self.params = params self.param_stack = [] self.ERROR = None @@ -277,11 +282,15 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.pending_newlines = 0 self.linestarts = linestarts self.treeTransform = TreeTransform(version=self.version, show_ast=showast) + # FIXME: have p.insts update in a better way # modularity is broken here self.insts = scanner.insts self.offset2inst_index = scanner.offset2inst_index + # Initialize p_lambda on demand + self.p_lambda = None + # This is in Python 2.6 on. It changes the way # strings get interpreted. See n_LOAD_CONST self.FUTURE_UNICODE_LITERALS = False @@ -309,12 +318,13 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): customize_for_version(self, is_pypy, version) return - def maybe_show_tree(self, ast, phase): + def maybe_show_tree(self, tree, phase): if self.showast.get("before", False): self.println( """ ---- end before transform """ + + " " ) if self.showast.get("after", False): self.println( @@ -324,7 +334,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): + " " ) if self.showast.get(phase, False): - maybe_show_tree(self, ast) + maybe_show_tree(self, tree) def str_with_template(self, ast): stream = sys.stdout @@ -384,9 +394,9 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): i += 1 return rv - def indent_if_source_nl(self, line_number: int, indent: int): + def indent_if_source_nl(self, line_number: int, indent_spaces: str): if line_number != self.line_number: - self.write("\n" + indent + INDENT_PER_LEVEL[:-1]) + self.write("\n" + indent_spaces + INDENT_PER_LEVEL[:-1]) return self.line_number f = property( @@ -508,19 +518,19 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def pp_tuple(self, tup): """Pretty print a tuple""" last_line = self.f.getvalue().split("\n")[-1] - l = len(last_line) + 1 - indent = " " * l + ll = len(last_line) + 1 + indent = " " * ll self.write("(") sep = "" for item in tup: self.write(sep) - l += len(sep) + ll += len(sep) s = better_repr(item, self.version) - l += len(s) + ll += len(s) self.write(s) sep = "," - if l > LINE_LENGTH: - l = 0 + if ll > LINE_LENGTH: + ll = 0 sep += "\n" + indent else: sep += " " @@ -564,6 +574,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def print_super_classes3(self, node): n = len(node) - 1 + j = 0 if node.kind != "expr": if node == "kwarg": self.template_engine(("(%[0]{attr}=%c)", 1), node) @@ -601,9 +612,9 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.write("(") if kwargs: # Last arg is tuple of keyword values: omit - l = n - 1 + m = n - 1 else: - l = n + m = n if kwargs: # 3.6+ does this @@ -615,7 +626,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): j += 1 j = 0 - while i < l: + while i < m: self.write(sep) value = self.traverse(node[i]) self.write("%s=%s" % (kwargs[j], value)) @@ -623,7 +634,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): j += 1 i += 1 else: - while i < l: + while i < m: value = self.traverse(node[i]) i += 1 self.write(sep, value) @@ -699,9 +710,10 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): """ # print("-----") - # print(startnode) + # print(startnode.kind) # print(entry[0]) # print('======') + fmt = entry[0] arg = 1 i = 0 @@ -870,7 +882,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): d = node.__dict__ try: self.write(eval(expr, d, d)) - except: + except Exception: raise m = escape.search(fmt, i) self.write(fmt[i:]) @@ -1094,8 +1106,8 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): # if docstring exists, dump it if code.co_consts and code.co_consts[0] is not None and len(ast) > 0: do_doc = False + i = 0 if is_docstring(ast[0], self.version, code.co_consts): - i = 0 do_doc = True elif len(ast) > 1 and is_docstring(ast[1], self.version, code.co_consts): i = 1 @@ -1191,7 +1203,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): is_lambda=False, noneInNames=False, is_top_level_module=False, - ): + ) -> GenericASTTraversal: # FIXME: DRY with fragments.py # assert isinstance(tokens[0], Token) @@ -1243,7 +1255,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): # Build a parse tree from a tokenized and massaged disassembly. try: # FIXME: have p.insts update in a better way - # modularity is broken here + # Modularity is broken here. p_insts = self.p.insts self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index @@ -1256,6 +1268,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): checker(ast, False, self.ast_errors) self.customize(customize) + transform_tree = self.treeTransform.transform(ast, code) self.maybe_show_tree(ast, phase="before") @@ -1277,6 +1290,8 @@ def code_deparse( compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, + start_offset: int = 0, + stop_offset: int = -1, ): """ ingests and deparses a given code block 'co'. If version is None, @@ -1285,6 +1300,9 @@ def code_deparse( assert iscode(co) + if out is None: + out = sys.stdout + if version is None: version = PYTHON_VERSION_TRIPLE @@ -1295,6 +1313,21 @@ def code_deparse( co, code_objects=code_objects, show_asm=debug_opts["asm"] ) + if start_offset > 0: + for i, t in enumerate(tokens): + # If t.offset is a string, we want to skip this. + if isinstance(t.offset, int) and t.offset >= start_offset: + tokens = tokens[i:] + break + + if stop_offset > -1: + for i, t in enumerate(tokens): + # In contrast to the test for start_offset If t.offset is + # a string, we want to extract the integer offset value. + if t.off2int() >= stop_offset: + tokens = tokens[:i] + break + debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG)) # Build Syntax Tree from disassembly. @@ -1318,7 +1351,7 @@ def code_deparse( tokens, customize, co, - is_lambda=(compile_mode == "lambda"), + is_lambda=is_lambda_mode(compile_mode), is_top_level_module=is_top_level_module, ) @@ -1327,7 +1360,7 @@ def code_deparse( return None # FIXME use a lookup table here. - if compile_mode == "lambda": + if is_lambda_mode(compile_mode): expected_start = "lambda_start" elif compile_mode == "eval": expected_start = "expr_start" @@ -1340,6 +1373,7 @@ def code_deparse( expected_start = None else: expected_start = None + if expected_start: assert ( deparsed.ast == expected_start @@ -1386,7 +1420,7 @@ def code_deparse( deparsed.ast, name=co.co_name, customize=customize, - is_lambda=compile_mode == "lambda", + is_lambda=is_lambda_mode(compile_mode), debug_opts=debug_opts, ) @@ -1414,9 +1448,12 @@ def deparse_code2str( compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, -): - """Return the deparsed text for a Python code object. `out` is where any intermediate - output for assembly or tree output will be sent. + start_offset: int = 0, + stop_offset: int = -1, +) -> str: + """ + Return the deparsed text for a Python code object. `out` is where + any intermediate output for assembly or tree output will be sent. """ return code_deparse( code, diff --git a/uncompyle6/semantics/transform.py b/uncompyle6/semantics/transform.py index e3f96c83..b646071b 100644 --- a/uncompyle6/semantics/transform.py +++ b/uncompyle6/semantics/transform.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023 by Rocky Bernstein +# Copyright (c) 2019-2024 by Rocky Bernstein # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,14 +13,15 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from uncompyle6.show import maybe_show_tree from copy import copy + from spark_parser import GenericASTTraversal, GenericASTTraversalPruningException -from uncompyle6.semantics.helper import find_code_node from uncompyle6.parsers.treenode import SyntaxTree from uncompyle6.scanners.tok import NoneToken, Token -from uncompyle6.semantics.consts import RETURN_NONE, ASSIGN_DOC_STRING +from uncompyle6.semantics.consts import ASSIGN_DOC_STRING, RETURN_NONE +from uncompyle6.semantics.helper import find_code_node +from uncompyle6.show import maybe_show_tree def is_docstring(node, version, co_consts): @@ -55,27 +56,34 @@ def is_docstring(node, version, co_consts): return node == ASSIGN_DOC_STRING(co_consts[0], doc_load) -def is_not_docstring(call_stmt_node): +def is_not_docstring(call_stmt_node) -> bool: try: return ( call_stmt_node == "call_stmt" and call_stmt_node[0][0] == "LOAD_STR" and call_stmt_node[1] == "POP_TOP" ) - except: + except Exception: return False class TreeTransform(GenericASTTraversal, object): - def __init__(self, version, show_ast=None, is_pypy=False): + def __init__( + self, + version: tuple, + is_pypy=False, + show_ast=None, + ): self.version = version self.showast = show_ast self.is_pypy = is_pypy return - def maybe_show_tree(self, ast): - if isinstance(self.showast, dict) and self.showast: - maybe_show_tree(self, ast) + def maybe_show_tree(self, tree): + if isinstance(self.showast, dict) and ( + self.showast.get("before") or self.showast.get("after") + ): + maybe_show_tree(self, tree) def preorder(self, node=None): """Walk the tree in roughly 'preorder' (a bit of a lie explained below). @@ -119,12 +127,10 @@ class TreeTransform(GenericASTTraversal, object): mkfunc_pattr = node[-1].pattr if isinstance(mkfunc_pattr, tuple): + assert isinstance(mkfunc_pattr, tuple) assert len(mkfunc_pattr) == 4 and isinstance(mkfunc_pattr, int) - if ( - len(code.co_consts) > 0 - and isinstance(code.co_consts[0], str) - ): + if len(code.co_consts) > 0 and isinstance(code.co_consts[0], str): docstring_node = SyntaxTree( "docstring", [Token("LOAD_STR", has_arg=True, pattr=code.co_consts[0])] ) @@ -136,7 +142,7 @@ class TreeTransform(GenericASTTraversal, object): def n_ifstmt(self, node): """Here we check if we can turn an `ifstmt` or 'iflaststmtl` into - some kind of `assert` statement""" + some kind of `assert` statement""" testexpr = node[0] @@ -148,7 +154,11 @@ class TreeTransform(GenericASTTraversal, object): if ifstmts_jump == "_ifstmts_jumpl" and ifstmts_jump[0] == "_ifstmts_jump": ifstmts_jump = ifstmts_jump[0] - elif ifstmts_jump not in ("_ifstmts_jump", "_ifstmts_jumpl", "ifstmts_jumpl"): + elif ifstmts_jump not in ( + "_ifstmts_jump", + "_ifstmts_jumpl", + "ifstmts_jumpl", + ): return node stmts = ifstmts_jump[0] else: @@ -208,10 +218,11 @@ class TreeTransform(GenericASTTraversal, object): kind = "assert2not" LOAD_ASSERT = call[0].first_child() - if LOAD_ASSERT not in ( "LOAD_ASSERT", "LOAD_GLOBAL"): + if LOAD_ASSERT not in ("LOAD_ASSERT", "LOAD_GLOBAL"): return node if isinstance(call[1], SyntaxTree): expr = call[1][0] + assert_expr.transformed_by = "n_ifstmt" node = SyntaxTree( kind, [ @@ -221,8 +232,8 @@ class TreeTransform(GenericASTTraversal, object): expr, RAISE_VARARGS_1, ], + transformed_by="n_ifstmt", ) - node.transformed_by = "n_ifstmt" pass pass else: @@ -250,9 +261,10 @@ class TreeTransform(GenericASTTraversal, object): LOAD_ASSERT = expr[0] node = SyntaxTree( - kind, [assert_expr, jump_cond, LOAD_ASSERT, RAISE_VARARGS_1] + kind, + [assert_expr, jump_cond, LOAD_ASSERT, RAISE_VARARGS_1], + transformed_by="n_ifstmt", ) - node.transformed_by = ("n_ifstmt",) pass pass return node @@ -289,7 +301,12 @@ class TreeTransform(GenericASTTraversal, object): len_n = len(n) # Sometimes stmt is reduced away and n[0] can be a single reduction like continue -> CONTINUE. - if len_n == 1 and isinstance(n[0], SyntaxTree) and len(n[0]) == 1 and n[0] == "stmt": + if ( + len_n == 1 + and isinstance(n[0], SyntaxTree) + and len(n[0]) == 1 + and n[0] == "stmt" + ): n = n[0][0] elif len_n == 0: return node @@ -407,23 +424,27 @@ class TreeTransform(GenericASTTraversal, object): list_for_node.transformed_by = ("n_list_for",) return list_for_node + def n_negated_testtrue(self, node): + assert node[0] == "testtrue" + test_node = node[0][0] + test_node.transformed_by = "n_negated_testtrue" + return test_node + def n_stmts(self, node): if node.first_child() == "SETUP_ANNOTATIONS": prev = node[0][0] new_stmts = [node[0]] for i, sstmt in enumerate(node[1:]): ann_assign = sstmt[0] - if ( - ann_assign == "ann_assign" - and prev == "assign" - ): + if ann_assign == "ann_assign" and prev == "assign": annotate_var = ann_assign[-2] if annotate_var.attr == prev[-1][0].attr: node[i].kind = "deleted " + node[i].kind del new_stmts[-1] ann_assign_init = SyntaxTree( - "ann_assign_init", [ann_assign[0], copy(prev[0]), annotate_var] - ) + "ann_assign_init", + [ann_assign[0], copy(prev[0]), annotate_var], + ) if sstmt[0] == "ann_assign": sstmt[0] = ann_assign_init else: @@ -441,26 +462,28 @@ class TreeTransform(GenericASTTraversal, object): node = self.preorder(node) return node - def transform(self, ast, code): - self.maybe_show_tree(ast) - self.ast = copy(ast) + def transform(self, parse_tree: GenericASTTraversal, code) -> GenericASTTraversal: + self.maybe_show_tree(parse_tree) + self.ast = copy(parse_tree) + del parse_tree self.ast = self.traverse(self.ast, is_lambda=False) + n = len(self.ast) try: # Disambiguate a string (expression) which appears as a "call_stmt" at # the beginning of a function versus a docstring. Seems pretty academic, # but this is Python. - call_stmt = ast[0][0] + call_stmt = self.ast[0][0] if is_not_docstring(call_stmt): call_stmt.kind = "string_at_beginning" call_stmt.transformed_by = "transform" pass - except: + except Exception: pass try: - for i in range(len(self.ast)): - sstmt = ast[i] + for i in range(n): + sstmt = self.ast[i] if len(sstmt) == 1 and sstmt == "sstmt": self.ast[i] = self.ast[i][0] @@ -486,7 +509,7 @@ class TreeTransform(GenericASTTraversal, object): if self.ast[-1] == RETURN_NONE: self.ast.pop() # remove last node # todo: if empty, add 'pass' - except: + except Exception: pass return self.ast