From f605f859aee4e76573c4ba309cb0752a74f308d5 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 5 Feb 2024 16:57:59 -0500 Subject: [PATCH] Partial sync with decompyle3 --- uncompyle6/semantics/pysource.py | 107 ++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 37 deletions(-) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 3b62481c..71459685 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -131,6 +131,7 @@ Python. import sys from io import StringIO +from typing import Optional from spark_parser import GenericASTTraversal from xdis import COMPILER_FLAG_BIT, iscode @@ -159,7 +160,11 @@ from uncompyle6.semantics.consts import ( ) from uncompyle6.semantics.customize import customize_for_version from uncompyle6.semantics.gencomp import ComprehensionMixin -from uncompyle6.semantics.helper import find_globals_and_nonlocals, print_docstring +from uncompyle6.semantics.helper import ( + find_globals_and_nonlocals, + is_lambda_mode, + print_docstring, +) from uncompyle6.semantics.make_function1 import make_function1 from uncompyle6.semantics.make_function2 import make_function2 from uncompyle6.semantics.make_function3 import make_function3 @@ -213,7 +218,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def __init__( self, - version, + version: tuple, out, scanner, showast=TREE_DEFAULT_DEBUG, @@ -223,7 +228,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): linestarts={}, tolerate_errors=False, ): - """`version' is the Python version (a float) of the Python dialect + """`version' is the Python version of the Python dialect of both the syntax tree and language we should produce. `out' is IO-like file pointer to where the output should go. It @@ -235,9 +240,12 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): If `showast' is True, we print the syntax tree. - `compile_mode' is is either 'exec' or 'single'. It is the compile - mode that was used to create the Syntax Tree and specifies a - grammar variant within a Python version to use. + `compile_mode` is is either `exec`, `single` or `lambda`. + + For `lambda`, the grammar that can be used in lambda + expressions is used. Otherwise, it is the compile mode that + was used to create the Syntax Tree and specifies a grammar + variant within a Python version to use. `is_pypy` should be True if the Syntax Tree was generated for PyPy. @@ -262,10 +270,8 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.currentclass = None self.classes = [] self.debug_parser = dict(debug_parser) - # Initialize p_lambda on demand self.line_number = 1 self.linemap = {} - self.p_lambda = None self.params = params self.param_stack = [] self.ERROR = None @@ -276,11 +282,15 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): self.pending_newlines = 0 self.linestarts = linestarts self.treeTransform = TreeTransform(version=self.version, show_ast=showast) + # FIXME: have p.insts update in a better way # modularity is broken here self.insts = scanner.insts self.offset2inst_index = scanner.offset2inst_index + # Initialize p_lambda on demand + self.p_lambda = None + # This is in Python 2.6 on. It changes the way # strings get interpreted. See n_LOAD_CONST self.FUTURE_UNICODE_LITERALS = False @@ -507,19 +517,19 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def pp_tuple(self, tup): """Pretty print a tuple""" last_line = self.f.getvalue().split("\n")[-1] - l = len(last_line) + 1 - indent = " " * l + ll = len(last_line) + 1 + indent = " " * ll self.write("(") sep = "" for item in tup: self.write(sep) - l += len(sep) + ll += len(sep) s = better_repr(item, self.version) - l += len(s) + ll += len(s) self.write(s) sep = "," - if l > LINE_LENGTH: - l = 0 + if ll > LINE_LENGTH: + ll = 0 sep += "\n" + indent else: sep += " " @@ -699,9 +709,10 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): """ # print("-----") - # print(startnode) + # print(startnode.kind) # print(entry[0]) # print('======') + fmt = entry[0] arg = 1 i = 0 @@ -794,13 +805,9 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): node[index].kind, ) else: - assert ( - node[tup[0]] in tup[1] - ), "at %s[%d], expected to be in '%s' node; got '%s'" % ( - node.kind, - arg, - index[1], - node[index[0]].kind, + assert node[tup[0]] in tup[1], ( + f"at {node.kind}[{tup[0]}], expected to be in '{tup[1]}' " + f"node; got '{node[tup[0]].kind}'" ) else: @@ -869,7 +876,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): d = node.__dict__ try: self.write(eval(expr, d, d)) - except: + except Exception: raise m = escape.search(fmt, i) self.write(fmt[i:]) @@ -1190,7 +1197,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): is_lambda=False, noneInNames=False, is_top_level_module=False, - ): + ) -> GenericASTTraversal: # FIXME: DRY with fragments.py # assert isinstance(tokens[0], Token) @@ -1242,7 +1249,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): # Build a parse tree from a tokenized and massaged disassembly. try: # FIXME: have p.insts update in a better way - # modularity is broken here + # Modularity is broken here. p_insts = self.p.insts self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index @@ -1255,6 +1262,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): checker(ast, False, self.ast_errors) self.customize(customize) + transform_tree = self.treeTransform.transform(ast, code) self.maybe_show_tree(ast, phase="before") @@ -1270,13 +1278,15 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def code_deparse( co, out=sys.stdout, - version=None, + version: Optional[tuple] = None, debug_opts=DEFAULT_DEBUG_OPTS, code_objects={}, compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, -): + start_offset: int = 0, + stop_offset: int = -1, +) -> Optional[SourceWalker]: """ ingests and deparses a given code block 'co'. If version is None, we will use the current Python interpreter version. @@ -1284,6 +1294,9 @@ def code_deparse( assert iscode(co) + if out is None: + out = sys.stdout + if version is None: version = PYTHON_VERSION_TRIPLE @@ -1294,6 +1307,21 @@ def code_deparse( co, code_objects=code_objects, show_asm=debug_opts["asm"] ) + if start_offset > 0: + for i, t in enumerate(tokens): + # If t.offset is a string, we want to skip this. + if isinstance(t.offset, int) and t.offset >= start_offset: + tokens = tokens[i:] + break + + if stop_offset > -1: + for i, t in enumerate(tokens): + # In contrast to the test for start_offset If t.offset is + # a string, we want to extract the integer offset value. + if t.off2int() >= stop_offset: + tokens = tokens[:i] + break + debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG)) # Build Syntax Tree from disassembly. @@ -1317,7 +1345,7 @@ def code_deparse( tokens, customize, co, - is_lambda=(compile_mode == "lambda"), + is_lambda=is_lambda_mode(compile_mode), is_top_level_module=is_top_level_module, ) @@ -1326,7 +1354,7 @@ def code_deparse( return None # FIXME use a lookup table here. - if compile_mode == "lambda": + if is_lambda_mode(compile_mode): expected_start = "lambda_start" elif compile_mode == "eval": expected_start = "expr_start" @@ -1339,10 +1367,12 @@ def code_deparse( expected_start = None else: expected_start = None + if expected_start: - assert ( - deparsed.ast == expected_start - ), f"Should have parsed grammar start to '{expected_start}'; got: {deparsed.ast.kind}" + assert deparsed.ast == expected_start, ( + f"Should have parsed grammar start to '{expected_start}'; " + f"got: {deparsed.ast.kind}" + ) # save memory del tokens @@ -1382,7 +1412,7 @@ def code_deparse( deparsed.ast, name=co.co_name, customize=customize, - is_lambda=compile_mode == "lambda", + is_lambda=is_lambda_mode(compile_mode), debug_opts=debug_opts, ) @@ -1410,9 +1440,12 @@ def deparse_code2str( compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, -): - """Return the deparsed text for a Python code object. `out` is where any intermediate - output for assembly or tree output will be sent. + start_offset: int = 0, + stop_offset: int = -1, +) -> str: + """ + Return the deparsed text for a Python code object. `out` is where + any intermediate output for assembly or tree output will be sent. """ return code_deparse( code, @@ -1427,6 +1460,7 @@ def deparse_code2str( if __name__ == "__main__": + def deparse_test(co): """This is a docstring""" s = deparse_code2str(co) @@ -1434,5 +1468,4 @@ if __name__ == "__main__": print(s) return - deparse_test(deparse_test.__code__)