Merge pull request #482 from rocky/sync-with-decompile3

Sync with decompile3
This commit is contained in:
R. Bernstein
2024-02-05 17:10:13 -05:00
committed by GitHub
2 changed files with 82 additions and 51 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016, 2018-2023 by Rocky Bernstein
# Copyright (c) 2016, 2018-2024 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
@@ -21,13 +21,11 @@ scanner/ingestion module. From here we call various version-specific
scanners, e.g. for Python 2.7 or 3.4.
"""
from types import ModuleType
from typing import Optional, Tuple, Union
from array import array
from collections import namedtuple
from types import ModuleType
from typing import Optional, Tuple, Union
from uncompyle6.scanners.tok import Token
from xdis.version_info import IS_PYPY, version_tuple_to_str
import xdis
from xdis import (
Bytecode,
@@ -37,6 +35,9 @@ from xdis import (
instruction_size,
next_offset,
)
from xdis.version_info import IS_PYPY, version_tuple_to_str
from uncompyle6.scanners.tok import Token
# The byte code versions we support.
# Note: these all have to be tuples of 2 ints
@@ -80,6 +81,7 @@ CANONIC2VERSION["3.5.2"] = 3.5
# FIXME: DRY
L65536 = 65536
def long(num):
return num
@@ -96,7 +98,6 @@ class Code(object):
"""
def __init__(self, co, scanner, classname=None, show_asm=None):
# Full initialization is given below, but for linters
# well set up some initial values.
self.co_code = None # Really either bytes for >= 3.0 and string in < 3.0
@@ -133,9 +134,7 @@ class Scanner:
# FIXME: This weird Python2 behavior is not Python3
self.resetTokenClass()
def bound_collection_from_tokens(
self, tokens, t, i, collection_type
):
def bound_collection_from_tokens(self, tokens, t, i, collection_type):
count = t.attr
assert isinstance(count, int)
@@ -429,7 +428,7 @@ class Scanner:
"""
try:
None in instr
except:
except Exception:
instr = [instr]
first = self.offset2inst_index[start]
@@ -620,16 +619,14 @@ def parse_fn_counts_30_35(argc: int) -> Tuple[int, int, int]:
def get_scanner(version: Union[str, tuple], is_pypy=False, show_asm=None) -> Scanner:
# If version is a string, turn that into the corresponding float.
if isinstance(version, str):
if version not in canonic_python_version:
raise RuntimeError("Unknown Python version in xdis %s" % version)
raise RuntimeError(f"Unknown Python version in xdis {version}")
canonic_version = canonic_python_version[version]
if canonic_version not in CANONIC2VERSION:
raise RuntimeError(
"Unsupported Python version %s (canonic %s)"
% (version, canonic_version)
f"Unsupported Python version {version} (canonic {canonic_version})"
)
version = CANONIC2VERSION[canonic_version]
@@ -680,5 +677,6 @@ if __name__ == "__main__":
# scanner = get_scanner('2.7.13', True)
# scanner = get_scanner(sys.version[:5], False)
from xdis.version_info import PYTHON_VERSION_TRIPLE
scanner = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY, True)
tokens, customize = scanner.ingest(co, {}, show_asm="after")

View File

@@ -131,6 +131,7 @@ Python.
import sys
from io import StringIO
from typing import Optional
from spark_parser import GenericASTTraversal
from xdis import COMPILER_FLAG_BIT, iscode
@@ -159,7 +160,11 @@ from uncompyle6.semantics.consts import (
)
from uncompyle6.semantics.customize import customize_for_version
from uncompyle6.semantics.gencomp import ComprehensionMixin
from uncompyle6.semantics.helper import find_globals_and_nonlocals, print_docstring
from uncompyle6.semantics.helper import (
find_globals_and_nonlocals,
is_lambda_mode,
print_docstring,
)
from uncompyle6.semantics.make_function1 import make_function1
from uncompyle6.semantics.make_function2 import make_function2
from uncompyle6.semantics.make_function3 import make_function3
@@ -213,7 +218,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
def __init__(
self,
version,
version: tuple,
out,
scanner,
showast=TREE_DEFAULT_DEBUG,
@@ -223,7 +228,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
linestarts={},
tolerate_errors=False,
):
"""`version' is the Python version (a float) of the Python dialect
"""`version' is the Python version of the Python dialect
of both the syntax tree and language we should produce.
`out' is IO-like file pointer to where the output should go. It
@@ -235,9 +240,12 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
If `showast' is True, we print the syntax tree.
`compile_mode' is is either 'exec' or 'single'. It is the compile
mode that was used to create the Syntax Tree and specifies a
grammar variant within a Python version to use.
`compile_mode` is is either `exec`, `single` or `lambda`.
For `lambda`, the grammar that can be used in lambda
expressions is used. Otherwise, it is the compile mode that
was used to create the Syntax Tree and specifies a grammar
variant within a Python version to use.
`is_pypy` should be True if the Syntax Tree was generated for PyPy.
@@ -262,10 +270,8 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
self.currentclass = None
self.classes = []
self.debug_parser = dict(debug_parser)
# Initialize p_lambda on demand
self.line_number = 1
self.linemap = {}
self.p_lambda = None
self.params = params
self.param_stack = []
self.ERROR = None
@@ -276,11 +282,15 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
self.pending_newlines = 0
self.linestarts = linestarts
self.treeTransform = TreeTransform(version=self.version, show_ast=showast)
# FIXME: have p.insts update in a better way
# modularity is broken here
self.insts = scanner.insts
self.offset2inst_index = scanner.offset2inst_index
# Initialize p_lambda on demand
self.p_lambda = None
# This is in Python 2.6 on. It changes the way
# strings get interpreted. See n_LOAD_CONST
self.FUTURE_UNICODE_LITERALS = False
@@ -507,19 +517,19 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
def pp_tuple(self, tup):
"""Pretty print a tuple"""
last_line = self.f.getvalue().split("\n")[-1]
l = len(last_line) + 1
indent = " " * l
ll = len(last_line) + 1
indent = " " * ll
self.write("(")
sep = ""
for item in tup:
self.write(sep)
l += len(sep)
ll += len(sep)
s = better_repr(item, self.version)
l += len(s)
ll += len(s)
self.write(s)
sep = ","
if l > LINE_LENGTH:
l = 0
if ll > LINE_LENGTH:
ll = 0
sep += "\n" + indent
else:
sep += " "
@@ -699,9 +709,10 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
"""
# print("-----")
# print(startnode)
# print(startnode.kind)
# print(entry[0])
# print('======')
fmt = entry[0]
arg = 1
i = 0
@@ -794,13 +805,9 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
node[index].kind,
)
else:
assert (
node[tup[0]] in tup[1]
), "at %s[%d], expected to be in '%s' node; got '%s'" % (
node.kind,
arg,
index[1],
node[index[0]].kind,
assert node[tup[0]] in tup[1], (
f"at {node.kind}[{tup[0]}], expected to be in '{tup[1]}' "
f"node; got '{node[tup[0]].kind}'"
)
else:
@@ -869,7 +876,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
d = node.__dict__
try:
self.write(eval(expr, d, d))
except:
except Exception:
raise
m = escape.search(fmt, i)
self.write(fmt[i:])
@@ -1190,7 +1197,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
is_lambda=False,
noneInNames=False,
is_top_level_module=False,
):
) -> GenericASTTraversal:
# FIXME: DRY with fragments.py
# assert isinstance(tokens[0], Token)
@@ -1242,7 +1249,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
# Build a parse tree from a tokenized and massaged disassembly.
try:
# FIXME: have p.insts update in a better way
# modularity is broken here
# Modularity is broken here.
p_insts = self.p.insts
self.p.insts = self.scanner.insts
self.p.offset2inst_index = self.scanner.offset2inst_index
@@ -1255,6 +1262,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
checker(ast, False, self.ast_errors)
self.customize(customize)
transform_tree = self.treeTransform.transform(ast, code)
self.maybe_show_tree(ast, phase="before")
@@ -1270,13 +1278,15 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
def code_deparse(
co,
out=sys.stdout,
version=None,
version: Optional[tuple] = None,
debug_opts=DEFAULT_DEBUG_OPTS,
code_objects={},
compile_mode="exec",
is_pypy=IS_PYPY,
walker=SourceWalker,
):
start_offset: int = 0,
stop_offset: int = -1,
) -> Optional[SourceWalker]:
"""
ingests and deparses a given code block 'co'. If version is None,
we will use the current Python interpreter version.
@@ -1284,6 +1294,9 @@ def code_deparse(
assert iscode(co)
if out is None:
out = sys.stdout
if version is None:
version = PYTHON_VERSION_TRIPLE
@@ -1294,6 +1307,21 @@ def code_deparse(
co, code_objects=code_objects, show_asm=debug_opts["asm"]
)
if start_offset > 0:
for i, t in enumerate(tokens):
# If t.offset is a string, we want to skip this.
if isinstance(t.offset, int) and t.offset >= start_offset:
tokens = tokens[i:]
break
if stop_offset > -1:
for i, t in enumerate(tokens):
# In contrast to the test for start_offset If t.offset is
# a string, we want to extract the integer offset value.
if t.off2int() >= stop_offset:
tokens = tokens[:i]
break
debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG))
# Build Syntax Tree from disassembly.
@@ -1317,7 +1345,7 @@ def code_deparse(
tokens,
customize,
co,
is_lambda=(compile_mode == "lambda"),
is_lambda=is_lambda_mode(compile_mode),
is_top_level_module=is_top_level_module,
)
@@ -1326,7 +1354,7 @@ def code_deparse(
return None
# FIXME use a lookup table here.
if compile_mode == "lambda":
if is_lambda_mode(compile_mode):
expected_start = "lambda_start"
elif compile_mode == "eval":
expected_start = "expr_start"
@@ -1339,10 +1367,12 @@ def code_deparse(
expected_start = None
else:
expected_start = None
if expected_start:
assert (
deparsed.ast == expected_start
), f"Should have parsed grammar start to '{expected_start}'; got: {deparsed.ast.kind}"
assert deparsed.ast == expected_start, (
f"Should have parsed grammar start to '{expected_start}'; "
f"got: {deparsed.ast.kind}"
)
# save memory
del tokens
@@ -1382,7 +1412,7 @@ def code_deparse(
deparsed.ast,
name=co.co_name,
customize=customize,
is_lambda=compile_mode == "lambda",
is_lambda=is_lambda_mode(compile_mode),
debug_opts=debug_opts,
)
@@ -1410,9 +1440,12 @@ def deparse_code2str(
compile_mode="exec",
is_pypy=IS_PYPY,
walker=SourceWalker,
):
"""Return the deparsed text for a Python code object. `out` is where any intermediate
output for assembly or tree output will be sent.
start_offset: int = 0,
stop_offset: int = -1,
) -> str:
"""
Return the deparsed text for a Python code object. `out` is where
any intermediate output for assembly or tree output will be sent.
"""
return code_deparse(
code,
@@ -1427,6 +1460,7 @@ def deparse_code2str(
if __name__ == "__main__":
def deparse_test(co):
"""This is a docstring"""
s = deparse_code2str(co)
@@ -1434,5 +1468,4 @@ if __name__ == "__main__":
print(s)
return
deparse_test(deparse_test.__code__)