diff --git a/__pkginfo__.py b/__pkginfo__.py index 723c15a3..98fa63d5 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018, 2020-2021 Rocky Bernstein +# Copyright (C) 2018, 2020-2021 2024 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -32,9 +32,11 @@ # 3.3 | pip | 10.0.1 | # 3.4 | pip | 19.1.1 | +import os.path as osp + # Things that change more often go here. copyright = """ -Copyright (C) 2015-2021 Rocky Bernstein . +Copyright (C) 2015-2021, 2024 Rocky Bernstein . """ classifiers = [ @@ -75,7 +77,7 @@ entry_points = { ] } ftp_url = None -install_requires = ["spark-parser >= 1.8.9, < 1.9.0", "xdis >= 6.0.8, < 6.2.0"] +install_requires = ["click", "spark-parser >= 1.8.9, < 1.9.0", "xdis >= 6.0.8, < 6.2.0"] license = "GPL3" mailing_list = "python-debugger@googlegroups.com" @@ -88,21 +90,18 @@ web = "https://github.com/rocky/python-uncompyle6/" zip_safe = True -import os.path - - def get_srcdir(): - filename = os.path.normcase(os.path.dirname(os.path.abspath(__file__))) - return os.path.realpath(filename) + filename = osp.normcase(osp.dirname(osp.abspath(__file__))) + return osp.realpath(filename) srcdir = get_srcdir() def read(*rnames): - return open(os.path.join(srcdir, *rnames)).read() + return open(osp.join(srcdir, *rnames)).read() -# Get info from files; set: long_description and __version__ +# Get info from files; set: long_description and VERSION long_description = read("README.rst") + "\n" exec(read("uncompyle6/version.py")) diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index fc7e7556..6d3a347a 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -1,15 +1,19 @@ #!/usr/bin/env python # Mode: -*- python -*- # -# Copyright (c) 2015-2017, 2019-2020, 2023 by Rocky Bernstein +# Copyright (c) 2015-2017, 2019-2020, 2023-2024 +# by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # from __future__ import print_function -import getopt import os import sys import time +from typing import List + +import click +from xdis.version_info import version_tuple_to_str from uncompyle6 import verify from uncompyle6.main import main, status_msg @@ -17,150 +21,162 @@ from uncompyle6.version import __version__ program = "uncompyle6" -__doc__ = """ -Usage: - %s [OPTIONS]... [ FILE | DIR]... - %s [--help | -h | --V | --version] - -Examples: - %s foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout - %s -o . foo.pyc bar.pyc # decompile to ./foo.pyc_dis and ./bar.pyc_dis - %s -o /tmp /usr/lib/python1.5 # decompile whole library - -Options: - -o output decompiled files to this path: - if multiple input files are decompiled, the common prefix - is stripped from these names and the remainder appended to - - uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc - -> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis - uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc - -> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis - uncompyle6 -o /tmp /usr/lib/python1.5 - -> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis - --compile | -c - attempts a decompilation after compiling - -d print timestamps - -p use number of processes - -r recurse directories looking for .pyc and .pyo files - --fragments use fragments deparser - --verify compare generated source with input byte-code - --verify-run compile generated source, run it and check exit code - --syntax-verify compile generated source - --linemaps generated line number correspondencies between byte-code - and generated source output - --encoding - use in generated source according to pep-0263 - --help show this message - -Debugging Options: - --asm | -a include byte-code (disables --verify) - --grammar | -g show matching grammar - --tree={before|after} - -t {before|after} include syntax before (or after) tree transformation - (disables --verify) - --tree++ | -T add template rules to --tree=before when possible - -Extensions of generated files: - '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) - + '_unverified' successfully decompile but --verify failed - + '_failed' decompile failed (contact author for enhancement) -""" % ( - (program,) * 5 -) - -program = "uncompyle6" - def usage(): print(__doc__) sys.exit(1) -def main_bin(): - recurse_dirs = False - numproc = 0 - outfile = "-" - out_base = None - source_paths = [] - timestamp = False - timestampfmt = "# %Y.%m.%d %H:%M:%S %Z" +# __doc__ = """ +# Usage: +# %s [OPTIONS]... [ FILE | DIR]... +# %s [--help | -h | --V | --version] - try: - opts, pyc_paths = getopt.getopt( - sys.argv[1:], - "hac:gtTdrVo:p:", - "help asm compile= grammar linemaps recurse " - "timestamp tree= tree+ " - "fragments verify verify-run version " - "syntax-verify " - "showgrammar encoding=".split(" "), +# Examples: +# %s foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout +# %s -o . foo.pyc bar.pyc # decompile to ./foo.pyc_dis and ./bar.pyc_dis +# %s -o /tmp /usr/lib/python1.5 # decompile whole library + +# Options: +# -o output decompiled files to this path: +# if multiple input files are decompiled, the common prefix +# is stripped from these names and the remainder appended to +# +# uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc +# -> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis +# uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc +# -> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis +# uncompyle6 -o /tmp /usr/lib/python1.5 +# -> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis +# --compile | -c +# attempts a decompilation after compiling +# -d print timestamps +# -p use number of processes +# -r recurse directories looking for .pyc and .pyo files +# --fragments use fragments deparser +# --verify compare generated source with input byte-code +# --verify-run compile generated source, run it and check exit code +# --syntax-verify compile generated source +# --linemaps generated line number correspondencies between byte-code +# and generated source output +# --encoding +# use in generated source according to pep-0263 +# --help show this message + +# Debugging Options: +# --asm | -a include byte-code (disables --verify) +# --grammar | -g show matching grammar +# --tree={before|after} +# -t {before|after} include syntax before (or after) tree transformation +# (disables --verify) +# --tree++ | -T add template rules to --tree=before when possible + +# Extensions of generated files: +# '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) +# + '_unverified' successfully decompile but --verify failed +# + '_failed' decompile failed (contact author for enhancement) +# """ % ( +# (program,) * 5 +# ) + + +@click.command() +@click.option( + "--asm++/--no-asm++", + "-A", + "asm_plus", + default=False, + help="show xdis assembler and tokenized assembler", +) +@click.option("--asm/--no-asm", "-a", default=False) +@click.option("--grammar/--no-grammar", "-g", "show_grammar", default=False) +@click.option("--tree/--no-tree", "-t", default=False) +@click.option( + "--tree++/--no-tree++", + "-T", + "tree_plus", + default=False, + help="show parse tree and Abstract Syntax Tree", +) +@click.option( + "--linemaps/--no-linemaps", + default=False, + help="show line number correspondencies between byte-code " + "and generated source output", +) +@click.option( + "--verify", + type=click.Choice(["run", "syntax"]), + default=None, +) +@click.option( + "--recurse/--no-recurse", + "-r", + "recurse_dirs", + default=False, +) +@click.option( + "--output", + "-o", + "outfile", + type=click.Path( + exists=True, file_okay=True, dir_okay=True, writable=True, resolve_path=True + ), + required=False, +) +@click.version_option(version=__version__) +@click.option( + "--start-offset", + "start_offset", + default=0, + help="start decomplation at offset; default is 0 or the starting offset.", +) +@click.version_option(version=__version__) +@click.option( + "--stop-offset", + "stop_offset", + default=-1, + help="stop decomplation when seeing an offset greater or equal to this; default is " + "-1 which indicates no stopping point.", +) +@click.argument("files", nargs=-1, type=click.Path(readable=True), required=True) +def main_bin( + asm: bool, + asm_plus: bool, + show_grammar, + tree: bool, + tree_plus: bool, + linemaps: bool, + verify, + recurse_dirs: bool, + outfile, + start_offset: int, + stop_offset: int, + files, +): + """ + Cross Python bytecode decompiler for Python bytecode up to Python 3.8. + """ + + version_tuple = sys.version_info[0:2] + if version_tuple < (3, 7): + print( + f"Error: This version of the {program} runs from Python 3.7 or greater." + f"You need another branch of this code for Python before 3.7." + f""" \n\tYou have version: {version_tuple_to_str()}.""" ) - except getopt.GetoptError as e: - print("%s: %s" % (os.path.basename(sys.argv[0]), e), file=sys.stderr) sys.exit(-1) - options = { - "showasm": None - } - for opt, val in opts: - if opt in ("-h", "--help"): - print(__doc__) - sys.exit(0) - elif opt in ("-V", "--version"): - print("%s %s" % (program, __version__)) - sys.exit(0) - elif opt == "--verify": - options["do_verify"] = "strong" - elif opt == "--syntax-verify": - options["do_verify"] = "weak" - elif opt == "--fragments": - options["do_fragments"] = True - elif opt == "--verify-run": - options["do_verify"] = "verify-run" - elif opt == "--linemaps": - options["do_linemaps"] = True - elif opt in ("--asm", "-a"): - if options["showasm"] == None: - options["showasm"] = "after" - else: - options["showasm"] = "both" - options["do_verify"] = None - elif opt in ("--tree", "-t"): - if "showast" not in options: - options["showast"] = {} - if val == "before": - options["showast"][val] = True - elif val == "after": - options["showast"][val] = True - else: - options["showast"]["before"] = True - options["do_verify"] = None - elif opt in ("--tree+", "-T"): - if "showast" not in options: - options["showast"] = {} - options["showast"]["after"] = True - options["showast"]["before"] = True - options["do_verify"] = None - elif opt in ("--grammar", "-g"): - options["showgrammar"] = True - elif opt == "-o": - outfile = val - elif opt in ("--timestamp", "-d"): - timestamp = True - elif opt in ("--compile", "-c"): - source_paths.append(val) - elif opt == "-p": - numproc = int(val) - elif opt in ("--recurse", "-r"): - recurse_dirs = True - elif opt == "--encoding": - options["source_encoding"] = val - else: - print(opt, file=sys.stderr) - usage() + numproc = 0 + out_base = None - # expand directory if specified + out_base = None + source_paths: List[str] = [] + timestamp = False + timestampfmt = "# %Y.%m.%d %H:%M:%S %Z" + pyc_paths = files + + # Expand directory if "recurse" was specified. if recurse_dirs: expanded_files = [] for f in pyc_paths: @@ -194,15 +210,32 @@ def main_bin(): out_base = outfile outfile = None + # A second -a turns show_asm="after" into show_asm="before" + if asm_plus or asm: + asm_opt = "both" if asm_plus else "after" + else: + asm_opt = None + if timestamp: print(time.strftime(timestampfmt)) if numproc <= 1: + show_ast = {"before": tree or tree_plus, "after": tree_plus} try: result = main( - src_base, out_base, pyc_paths, source_paths, outfile, **options + src_base, + out_base, + pyc_paths, + source_paths, + outfile, + showasm=asm_opt, + showgrammar=show_grammar, + showast=show_ast, + do_verify=verify, + do_linemaps=linemaps, + start_offset=start_offset, + stop_offset=stop_offset, ) - result = [options.get("do_verify", None)] + list(result) if len(pyc_paths) > 1: mess = status_msg(*result) print("# " + mess) diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 671f0b2b..5e759190 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -15,9 +15,11 @@ import datetime import os +import os.path as osp import py_compile import sys -from typing import Any, Optional, Tuple +import tempfile +from typing import Any, Optional, TextIO, Tuple from xdis import iscode from xdis.load import load_module @@ -38,9 +40,9 @@ def _get_outstream(outfile: str) -> Any: """ Return an opened output file descriptor for ``outfile``. """ - dir_name = os.path.dirname(outfile) + dir_name = osp.dirname(outfile) failed_file = outfile + "_failed" - if os.path.exists(failed_file): + if osp.exists(failed_file): os.remove(failed_file) try: os.makedirs(dir_name) @@ -52,7 +54,7 @@ def _get_outstream(outfile: str) -> Any: def decompile( co, bytecode_version: Tuple[int] = PYTHON_VERSION_TRIPLE, - out=sys.stdout, + out: Optional[TextIO] = sys.stdout, showasm: Optional[str] = None, showast={}, timestamp=None, @@ -60,11 +62,13 @@ def decompile( source_encoding=None, code_objects={}, source_size=None, - is_pypy=False, + is_pypy: bool = False, magic_int=None, mapstream=None, do_fragments=False, compile_mode="exec", + start_offset: int = 0, + stop_offset: int = -1, ) -> Any: """ ingests and deparses a given code block 'co' @@ -132,11 +136,12 @@ def decompile( debug_opts=debug_opts, ) header_count = 3 + len(sys_version_lines) - linemap = [ - (line_no, deparsed.source_linemap[line_no] + header_count) - for line_no in sorted(deparsed.source_linemap.keys()) - ] - mapstream.write(f"\n\n# {linemap}\n") + if deparsed is not None: + linemap = [ + (line_no, deparsed.source_linemap[line_no] + header_count) + for line_no in sorted(deparsed.source_linemap.keys()) + ] + mapstream.write(f"\n\n# {linemap}\n") else: if do_fragments: deparse_fn = code_deparse_fragments @@ -149,8 +154,11 @@ def decompile( is_pypy=is_pypy, debug_opts=debug_opts, compile_mode=compile_mode, + start_offset=start_offset, + stop_offset=stop_offset, ) pass + real_out.write("\n") return deparsed except pysource.SourceWalkerError as e: # deparsing failed @@ -175,13 +183,15 @@ def compile_file(source_path: str) -> str: def decompile_file( filename: str, - outstream=None, - showasm=None, + outstream: Optional[TextIO] = None, + showasm: Optional[str] = None, showast={}, showgrammar=False, source_encoding=None, mapstream=None, do_fragments=False, + start_offset=0, + stop_offset=-1, ) -> Any: """ decompile Python byte-code file (.pyc). Return objects to @@ -211,6 +221,8 @@ def decompile_file( is_pypy=is_pypy, magic_int=magic_int, mapstream=mapstream, + start_offset=start_offset, + stop_offset=stop_offset, ), ) else: @@ -231,6 +243,8 @@ def decompile_file( mapstream=mapstream, do_fragments=do_fragments, compile_mode="exec", + start_offset=start_offset, + stop_offset=stop_offset, ) ] return deparsed @@ -242,13 +256,16 @@ def main( out_base: Optional[str], compiled_files: list, source_files: list, - outfile=None, + outfile: Optional[str] = None, showasm: Optional[str] = None, showast={}, - showgrammar=False, + do_verify: Optional[str] = None, + showgrammar: bool = False, source_encoding=None, do_linemaps=False, do_fragments=False, + start_offset: int = 0, + stop_offset: int = -1, ) -> Tuple[int, int, int, int]: """ in_base base directory for input files @@ -261,7 +278,8 @@ def main( - files below out_base out_base=... - stdout out_base=None, outfile=None """ - tot_files = okay_files = failed_files = verify_failed_files = 0 + tot_files = okay_files = failed_files = 0 + verify_failed_files = 0 if do_verify else 0 current_outfile = outfile linemap_stream = None @@ -269,9 +287,9 @@ def main( compiled_files.append(compile_file(source_path)) for filename in compiled_files: - infile = os.path.join(in_base, filename) + infile = osp.join(in_base, filename) # print("XXX", infile) - if not os.path.exists(infile): + if not osp.exists(infile): sys.stderr.write(f"File '{infile}' doesn't exist. Skipped\n") continue @@ -284,14 +302,19 @@ def main( if outfile: # outfile was given as parameter outstream = _get_outstream(outfile) elif out_base is None: - outstream = sys.stdout + out_base = tempfile.mkdtemp(prefix="py-dis-") + if do_verify and filename.endswith(".pyc"): + current_outfile = osp.join(out_base, filename[0:-1]) + outstream = open(current_outfile, "w") + else: + outstream = sys.stdout if do_linemaps: linemap_stream = sys.stdout else: if filename.endswith(".pyc"): - current_outfile = os.path.join(out_base, filename[0:-1]) + current_outfile = osp.join(out_base, filename[0:-1]) else: - current_outfile = os.path.join(out_base, filename) + "_dis" + current_outfile = osp.join(out_base, filename) + "_dis" pass pass @@ -299,9 +322,9 @@ def main( # print(current_outfile, file=sys.stderr) - # Try to uncompile the input file + # Try to decompile the input file. try: - deparsed = decompile_file( + deparsed_objects = decompile_file( infile, outstream, showasm, @@ -310,11 +333,13 @@ def main( source_encoding, linemap_stream, do_fragments, + start_offset, + stop_offset, ) if do_fragments: - for d in deparsed: + for deparsed_object in deparsed_objects: last_mod = None - offsets = d.offsets + offsets = deparsed_object.offsets for e in sorted( [k for k in offsets.keys() if isinstance(k[1], int)] ): @@ -323,11 +348,48 @@ def main( outstream.write(f"{line}\n{e[0]}\n{line}\n") last_mod = e[0] info = offsets[e] - extract_info = d.extract_node_info(info) + extract_info = deparse_object.extract_node_info(info) outstream.write(f"{info.node.format().strip()}" + "\n") outstream.write(extract_info.selectedLine + "\n") outstream.write(extract_info.markerLine + "\n\n") pass + + if do_verify: + for deparsed_object in deparsed_objects: + deparsed_object.f.close() + if PYTHON_VERSION_TRIPLE[:2] != deparsed_object.version[:2]: + sys.stdout.write( + f"\n# skipping running {deparsed_object.f.name}; it is" + f"{version_tuple_to_str(deparsed_object.version, end=2)}, " + "and we are " + f"{version_tuple_to_str(PYTHON_VERSION_TRIPLE, end=2)}\n" + ) + else: + check_type = "syntax check" + if do_verify == "run": + check_type = "run" + result = subprocess.run( + [sys.executable, deparsed_object.f.name], + capture_output=True, + ) + valid = result.returncode == 0 + output = result.stdout.decode() + if output: + print(output) + pass + if not valid: + print(result.stderr.decode()) + + else: + valid = syntax_check(deparsed_object.f.name) + + if not valid: + verify_failed_files += 1 + sys.stderr.write( + f"\n# {check_type} failed on file {deparsed_object.f.name}\n" + ) + + # sys.stderr.write(f"Ran {deparsed_object.f.name}\n") pass tot_files += 1 except (ValueError, SyntaxError, ParserError, pysource.SourceWalkerError) as e: diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 90a2ee57..23dc83fd 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -2036,6 +2036,8 @@ def code_deparse( code_objects={}, compile_mode="exec", walker=FragmentsWalker, + start_offset: int = 0, + stop_offset: int = -1, ): """ Convert the code object co into a python source fragment. @@ -2070,6 +2072,22 @@ def code_deparse( tokens, customize = scanner.ingest(co, code_objects=code_objects, show_asm=show_asm) tokens, customize = scanner.ingest(co) + + if start_offset > 0: + for i, t in enumerate(tokens): + # If t.offset is a string, we want to skip this. + if isinstance(t.offset, int) and t.offset >= start_offset: + tokens = tokens[i:] + break + + if stop_offset > -1: + for i, t in enumerate(tokens): + # In contrast to the test for start_offset If t.offset is + # a string, we want to extract the integer offset value. + if t.off2int() >= stop_offset: + tokens = tokens[:i] + break + maybe_show_asm(show_asm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG)