Redo uncompyel6 options ...

Use click now and make more like decompyle3
This commit is contained in:
rocky
2024-02-10 13:19:32 -05:00
parent d7a1d5bbad
commit dd8ee1466d
4 changed files with 284 additions and 172 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2018, 2020-2021 Rocky Bernstein <rocky@gnu.org>
# Copyright (C) 2018, 2020-2021 2024 Rocky Bernstein <rocky@gnu.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -32,9 +32,11 @@
# 3.3 | pip | 10.0.1 |
# 3.4 | pip | 19.1.1 |
import os.path as osp
# Things that change more often go here.
copyright = """
Copyright (C) 2015-2021 Rocky Bernstein <rb@dustyfeet.com>.
Copyright (C) 2015-2021, 2024 Rocky Bernstein <rb@dustyfeet.com>.
"""
classifiers = [
@@ -75,7 +77,7 @@ entry_points = {
]
}
ftp_url = None
install_requires = ["spark-parser >= 1.8.9, < 1.9.0", "xdis >= 6.0.8, < 6.2.0"]
install_requires = ["click", "spark-parser >= 1.8.9, < 1.9.0", "xdis >= 6.0.8, < 6.2.0"]
license = "GPL3"
mailing_list = "python-debugger@googlegroups.com"
@@ -88,21 +90,18 @@ web = "https://github.com/rocky/python-uncompyle6/"
zip_safe = True
import os.path
def get_srcdir():
filename = os.path.normcase(os.path.dirname(os.path.abspath(__file__)))
return os.path.realpath(filename)
filename = osp.normcase(osp.dirname(osp.abspath(__file__)))
return osp.realpath(filename)
srcdir = get_srcdir()
def read(*rnames):
return open(os.path.join(srcdir, *rnames)).read()
return open(osp.join(srcdir, *rnames)).read()
# Get info from files; set: long_description and __version__
# Get info from files; set: long_description and VERSION
long_description = read("README.rst") + "\n"
exec(read("uncompyle6/version.py"))

View File

@@ -1,15 +1,19 @@
#!/usr/bin/env python
# Mode: -*- python -*-
#
# Copyright (c) 2015-2017, 2019-2020, 2023 by Rocky Bernstein
# Copyright (c) 2015-2017, 2019-2020, 2023-2024
# by Rocky Bernstein
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
#
from __future__ import print_function
import getopt
import os
import sys
import time
from typing import List
import click
from xdis.version_info import version_tuple_to_str
from uncompyle6 import verify
from uncompyle6.main import main, status_msg
@@ -17,150 +21,162 @@ from uncompyle6.version import __version__
program = "uncompyle6"
__doc__ = """
Usage:
%s [OPTIONS]... [ FILE | DIR]...
%s [--help | -h | --V | --version]
Examples:
%s foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout
%s -o . foo.pyc bar.pyc # decompile to ./foo.pyc_dis and ./bar.pyc_dis
%s -o /tmp /usr/lib/python1.5 # decompile whole library
Options:
-o <path> output decompiled files to this path:
if multiple input files are decompiled, the common prefix
is stripped from these names and the remainder appended to
<path>
uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc
-> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis
uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc
-> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis
uncompyle6 -o /tmp /usr/lib/python1.5
-> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis
--compile | -c <python-file>
attempts a decompilation after compiling <python-file>
-d print timestamps
-p <integer> use <integer> number of processes
-r recurse directories looking for .pyc and .pyo files
--fragments use fragments deparser
--verify compare generated source with input byte-code
--verify-run compile generated source, run it and check exit code
--syntax-verify compile generated source
--linemaps generated line number correspondencies between byte-code
and generated source output
--encoding <encoding>
use <encoding> in generated source according to pep-0263
--help show this message
Debugging Options:
--asm | -a include byte-code (disables --verify)
--grammar | -g show matching grammar
--tree={before|after}
-t {before|after} include syntax before (or after) tree transformation
(disables --verify)
--tree++ | -T add template rules to --tree=before when possible
Extensions of generated files:
'.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify)
+ '_unverified' successfully decompile but --verify failed
+ '_failed' decompile failed (contact author for enhancement)
""" % (
(program,) * 5
)
program = "uncompyle6"
def usage():
print(__doc__)
sys.exit(1)
def main_bin():
recurse_dirs = False
numproc = 0
outfile = "-"
out_base = None
source_paths = []
timestamp = False
timestampfmt = "# %Y.%m.%d %H:%M:%S %Z"
# __doc__ = """
# Usage:
# %s [OPTIONS]... [ FILE | DIR]...
# %s [--help | -h | --V | --version]
try:
opts, pyc_paths = getopt.getopt(
sys.argv[1:],
"hac:gtTdrVo:p:",
"help asm compile= grammar linemaps recurse "
"timestamp tree= tree+ "
"fragments verify verify-run version "
"syntax-verify "
"showgrammar encoding=".split(" "),
# Examples:
# %s foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout
# %s -o . foo.pyc bar.pyc # decompile to ./foo.pyc_dis and ./bar.pyc_dis
# %s -o /tmp /usr/lib/python1.5 # decompile whole library
# Options:
# -o <path> output decompiled files to this path:
# if multiple input files are decompiled, the common prefix
# is stripped from these names and the remainder appended to
# <path>
# uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc
# -> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis
# uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc
# -> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis
# uncompyle6 -o /tmp /usr/lib/python1.5
# -> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis
# --compile | -c <python-file>
# attempts a decompilation after compiling <python-file>
# -d print timestamps
# -p <integer> use <integer> number of processes
# -r recurse directories looking for .pyc and .pyo files
# --fragments use fragments deparser
# --verify compare generated source with input byte-code
# --verify-run compile generated source, run it and check exit code
# --syntax-verify compile generated source
# --linemaps generated line number correspondencies between byte-code
# and generated source output
# --encoding <encoding>
# use <encoding> in generated source according to pep-0263
# --help show this message
# Debugging Options:
# --asm | -a include byte-code (disables --verify)
# --grammar | -g show matching grammar
# --tree={before|after}
# -t {before|after} include syntax before (or after) tree transformation
# (disables --verify)
# --tree++ | -T add template rules to --tree=before when possible
# Extensions of generated files:
# '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify)
# + '_unverified' successfully decompile but --verify failed
# + '_failed' decompile failed (contact author for enhancement)
# """ % (
# (program,) * 5
# )
@click.command()
@click.option(
"--asm++/--no-asm++",
"-A",
"asm_plus",
default=False,
help="show xdis assembler and tokenized assembler",
)
@click.option("--asm/--no-asm", "-a", default=False)
@click.option("--grammar/--no-grammar", "-g", "show_grammar", default=False)
@click.option("--tree/--no-tree", "-t", default=False)
@click.option(
"--tree++/--no-tree++",
"-T",
"tree_plus",
default=False,
help="show parse tree and Abstract Syntax Tree",
)
@click.option(
"--linemaps/--no-linemaps",
default=False,
help="show line number correspondencies between byte-code "
"and generated source output",
)
@click.option(
"--verify",
type=click.Choice(["run", "syntax"]),
default=None,
)
@click.option(
"--recurse/--no-recurse",
"-r",
"recurse_dirs",
default=False,
)
@click.option(
"--output",
"-o",
"outfile",
type=click.Path(
exists=True, file_okay=True, dir_okay=True, writable=True, resolve_path=True
),
required=False,
)
@click.version_option(version=__version__)
@click.option(
"--start-offset",
"start_offset",
default=0,
help="start decomplation at offset; default is 0 or the starting offset.",
)
@click.version_option(version=__version__)
@click.option(
"--stop-offset",
"stop_offset",
default=-1,
help="stop decomplation when seeing an offset greater or equal to this; default is "
"-1 which indicates no stopping point.",
)
@click.argument("files", nargs=-1, type=click.Path(readable=True), required=True)
def main_bin(
asm: bool,
asm_plus: bool,
show_grammar,
tree: bool,
tree_plus: bool,
linemaps: bool,
verify,
recurse_dirs: bool,
outfile,
start_offset: int,
stop_offset: int,
files,
):
"""
Cross Python bytecode decompiler for Python bytecode up to Python 3.8.
"""
version_tuple = sys.version_info[0:2]
if version_tuple < (3, 7):
print(
f"Error: This version of the {program} runs from Python 3.7 or greater."
f"You need another branch of this code for Python before 3.7."
f""" \n\tYou have version: {version_tuple_to_str()}."""
)
except getopt.GetoptError as e:
print("%s: %s" % (os.path.basename(sys.argv[0]), e), file=sys.stderr)
sys.exit(-1)
options = {
"showasm": None
}
for opt, val in opts:
if opt in ("-h", "--help"):
print(__doc__)
sys.exit(0)
elif opt in ("-V", "--version"):
print("%s %s" % (program, __version__))
sys.exit(0)
elif opt == "--verify":
options["do_verify"] = "strong"
elif opt == "--syntax-verify":
options["do_verify"] = "weak"
elif opt == "--fragments":
options["do_fragments"] = True
elif opt == "--verify-run":
options["do_verify"] = "verify-run"
elif opt == "--linemaps":
options["do_linemaps"] = True
elif opt in ("--asm", "-a"):
if options["showasm"] == None:
options["showasm"] = "after"
else:
options["showasm"] = "both"
options["do_verify"] = None
elif opt in ("--tree", "-t"):
if "showast" not in options:
options["showast"] = {}
if val == "before":
options["showast"][val] = True
elif val == "after":
options["showast"][val] = True
else:
options["showast"]["before"] = True
options["do_verify"] = None
elif opt in ("--tree+", "-T"):
if "showast" not in options:
options["showast"] = {}
options["showast"]["after"] = True
options["showast"]["before"] = True
options["do_verify"] = None
elif opt in ("--grammar", "-g"):
options["showgrammar"] = True
elif opt == "-o":
outfile = val
elif opt in ("--timestamp", "-d"):
timestamp = True
elif opt in ("--compile", "-c"):
source_paths.append(val)
elif opt == "-p":
numproc = int(val)
elif opt in ("--recurse", "-r"):
recurse_dirs = True
elif opt == "--encoding":
options["source_encoding"] = val
else:
print(opt, file=sys.stderr)
usage()
numproc = 0
out_base = None
# expand directory if specified
out_base = None
source_paths: List[str] = []
timestamp = False
timestampfmt = "# %Y.%m.%d %H:%M:%S %Z"
pyc_paths = files
# Expand directory if "recurse" was specified.
if recurse_dirs:
expanded_files = []
for f in pyc_paths:
@@ -194,15 +210,32 @@ def main_bin():
out_base = outfile
outfile = None
# A second -a turns show_asm="after" into show_asm="before"
if asm_plus or asm:
asm_opt = "both" if asm_plus else "after"
else:
asm_opt = None
if timestamp:
print(time.strftime(timestampfmt))
if numproc <= 1:
show_ast = {"before": tree or tree_plus, "after": tree_plus}
try:
result = main(
src_base, out_base, pyc_paths, source_paths, outfile, **options
src_base,
out_base,
pyc_paths,
source_paths,
outfile,
showasm=asm_opt,
showgrammar=show_grammar,
showast=show_ast,
do_verify=verify,
do_linemaps=linemaps,
start_offset=start_offset,
stop_offset=stop_offset,
)
result = [options.get("do_verify", None)] + list(result)
if len(pyc_paths) > 1:
mess = status_msg(*result)
print("# " + mess)

View File

@@ -15,9 +15,11 @@
import datetime
import os
import os.path as osp
import py_compile
import sys
from typing import Any, Optional, Tuple
import tempfile
from typing import Any, Optional, TextIO, Tuple
from xdis import iscode
from xdis.load import load_module
@@ -38,9 +40,9 @@ def _get_outstream(outfile: str) -> Any:
"""
Return an opened output file descriptor for ``outfile``.
"""
dir_name = os.path.dirname(outfile)
dir_name = osp.dirname(outfile)
failed_file = outfile + "_failed"
if os.path.exists(failed_file):
if osp.exists(failed_file):
os.remove(failed_file)
try:
os.makedirs(dir_name)
@@ -52,7 +54,7 @@ def _get_outstream(outfile: str) -> Any:
def decompile(
co,
bytecode_version: Tuple[int] = PYTHON_VERSION_TRIPLE,
out=sys.stdout,
out: Optional[TextIO] = sys.stdout,
showasm: Optional[str] = None,
showast={},
timestamp=None,
@@ -60,11 +62,13 @@ def decompile(
source_encoding=None,
code_objects={},
source_size=None,
is_pypy=False,
is_pypy: bool = False,
magic_int=None,
mapstream=None,
do_fragments=False,
compile_mode="exec",
start_offset: int = 0,
stop_offset: int = -1,
) -> Any:
"""
ingests and deparses a given code block 'co'
@@ -132,6 +136,7 @@ def decompile(
debug_opts=debug_opts,
)
header_count = 3 + len(sys_version_lines)
if deparsed is not None:
linemap = [
(line_no, deparsed.source_linemap[line_no] + header_count)
for line_no in sorted(deparsed.source_linemap.keys())
@@ -149,8 +154,11 @@ def decompile(
is_pypy=is_pypy,
debug_opts=debug_opts,
compile_mode=compile_mode,
start_offset=start_offset,
stop_offset=stop_offset,
)
pass
real_out.write("\n")
return deparsed
except pysource.SourceWalkerError as e:
# deparsing failed
@@ -175,13 +183,15 @@ def compile_file(source_path: str) -> str:
def decompile_file(
filename: str,
outstream=None,
showasm=None,
outstream: Optional[TextIO] = None,
showasm: Optional[str] = None,
showast={},
showgrammar=False,
source_encoding=None,
mapstream=None,
do_fragments=False,
start_offset=0,
stop_offset=-1,
) -> Any:
"""
decompile Python byte-code file (.pyc). Return objects to
@@ -211,6 +221,8 @@ def decompile_file(
is_pypy=is_pypy,
magic_int=magic_int,
mapstream=mapstream,
start_offset=start_offset,
stop_offset=stop_offset,
),
)
else:
@@ -231,6 +243,8 @@ def decompile_file(
mapstream=mapstream,
do_fragments=do_fragments,
compile_mode="exec",
start_offset=start_offset,
stop_offset=stop_offset,
)
]
return deparsed
@@ -242,13 +256,16 @@ def main(
out_base: Optional[str],
compiled_files: list,
source_files: list,
outfile=None,
outfile: Optional[str] = None,
showasm: Optional[str] = None,
showast={},
showgrammar=False,
do_verify: Optional[str] = None,
showgrammar: bool = False,
source_encoding=None,
do_linemaps=False,
do_fragments=False,
start_offset: int = 0,
stop_offset: int = -1,
) -> Tuple[int, int, int, int]:
"""
in_base base directory for input files
@@ -261,7 +278,8 @@ def main(
- files below out_base out_base=...
- stdout out_base=None, outfile=None
"""
tot_files = okay_files = failed_files = verify_failed_files = 0
tot_files = okay_files = failed_files = 0
verify_failed_files = 0 if do_verify else 0
current_outfile = outfile
linemap_stream = None
@@ -269,9 +287,9 @@ def main(
compiled_files.append(compile_file(source_path))
for filename in compiled_files:
infile = os.path.join(in_base, filename)
infile = osp.join(in_base, filename)
# print("XXX", infile)
if not os.path.exists(infile):
if not osp.exists(infile):
sys.stderr.write(f"File '{infile}' doesn't exist. Skipped\n")
continue
@@ -284,14 +302,19 @@ def main(
if outfile: # outfile was given as parameter
outstream = _get_outstream(outfile)
elif out_base is None:
out_base = tempfile.mkdtemp(prefix="py-dis-")
if do_verify and filename.endswith(".pyc"):
current_outfile = osp.join(out_base, filename[0:-1])
outstream = open(current_outfile, "w")
else:
outstream = sys.stdout
if do_linemaps:
linemap_stream = sys.stdout
else:
if filename.endswith(".pyc"):
current_outfile = os.path.join(out_base, filename[0:-1])
current_outfile = osp.join(out_base, filename[0:-1])
else:
current_outfile = os.path.join(out_base, filename) + "_dis"
current_outfile = osp.join(out_base, filename) + "_dis"
pass
pass
@@ -299,9 +322,9 @@ def main(
# print(current_outfile, file=sys.stderr)
# Try to uncompile the input file
# Try to decompile the input file.
try:
deparsed = decompile_file(
deparsed_objects = decompile_file(
infile,
outstream,
showasm,
@@ -310,11 +333,13 @@ def main(
source_encoding,
linemap_stream,
do_fragments,
start_offset,
stop_offset,
)
if do_fragments:
for d in deparsed:
for deparsed_object in deparsed_objects:
last_mod = None
offsets = d.offsets
offsets = deparsed_object.offsets
for e in sorted(
[k for k in offsets.keys() if isinstance(k[1], int)]
):
@@ -323,11 +348,48 @@ def main(
outstream.write(f"{line}\n{e[0]}\n{line}\n")
last_mod = e[0]
info = offsets[e]
extract_info = d.extract_node_info(info)
extract_info = deparse_object.extract_node_info(info)
outstream.write(f"{info.node.format().strip()}" + "\n")
outstream.write(extract_info.selectedLine + "\n")
outstream.write(extract_info.markerLine + "\n\n")
pass
if do_verify:
for deparsed_object in deparsed_objects:
deparsed_object.f.close()
if PYTHON_VERSION_TRIPLE[:2] != deparsed_object.version[:2]:
sys.stdout.write(
f"\n# skipping running {deparsed_object.f.name}; it is"
f"{version_tuple_to_str(deparsed_object.version, end=2)}, "
"and we are "
f"{version_tuple_to_str(PYTHON_VERSION_TRIPLE, end=2)}\n"
)
else:
check_type = "syntax check"
if do_verify == "run":
check_type = "run"
result = subprocess.run(
[sys.executable, deparsed_object.f.name],
capture_output=True,
)
valid = result.returncode == 0
output = result.stdout.decode()
if output:
print(output)
pass
if not valid:
print(result.stderr.decode())
else:
valid = syntax_check(deparsed_object.f.name)
if not valid:
verify_failed_files += 1
sys.stderr.write(
f"\n# {check_type} failed on file {deparsed_object.f.name}\n"
)
# sys.stderr.write(f"Ran {deparsed_object.f.name}\n")
pass
tot_files += 1
except (ValueError, SyntaxError, ParserError, pysource.SourceWalkerError) as e:

View File

@@ -2036,6 +2036,8 @@ def code_deparse(
code_objects={},
compile_mode="exec",
walker=FragmentsWalker,
start_offset: int = 0,
stop_offset: int = -1,
):
"""
Convert the code object co into a python source fragment.
@@ -2070,6 +2072,22 @@ def code_deparse(
tokens, customize = scanner.ingest(co, code_objects=code_objects, show_asm=show_asm)
tokens, customize = scanner.ingest(co)
if start_offset > 0:
for i, t in enumerate(tokens):
# If t.offset is a string, we want to skip this.
if isinstance(t.offset, int) and t.offset >= start_offset:
tokens = tokens[i:]
break
if stop_offset > -1:
for i, t in enumerate(tokens):
# In contrast to the test for start_offset If t.offset is
# a string, we want to extract the integer offset value.
if t.off2int() >= stop_offset:
tokens = tokens[:i]
break
maybe_show_asm(show_asm, tokens)
debug_parser = dict(PARSER_DEFAULT_DEBUG)