diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 30539a7d..69ab2749 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -39,10 +39,10 @@ from typing import Optional, Tuple from xdis import iscode, instruction_size, Instruction from xdis.bytecode import _get_const_info -from xdis.codetype import UnicodeForPython3 from uncompyle6.scanners.tok import Token from uncompyle6.scanner import parse_fn_counts_30_35 +from uncompyle6.util import get_code_name import xdis # Get all the opcodes into globals @@ -209,11 +209,18 @@ class Scanner3(Scanner): return def bound_collection_from_inst( - self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str + self, + insts: list, + next_tokens: list, + inst: Instruction, + t: Token, + i: int, + collection_type: str, ) -> Optional[list]: """ - Try to a replace sequence of instruction that ends with a BUILD_xxx with a sequence that can - be parsed much faster, but inserting the token boundary at the beginning of the sequence. + Try to a replace sequence of instruction that ends with a + BUILD_xxx with a sequence that can be parsed much faster, but + inserting the token boundary at the beginning of the sequence. """ count = t.attr assert isinstance(count, int) @@ -291,10 +298,12 @@ class Scanner3(Scanner): return new_tokens def bound_map_from_inst( - self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int) -> Optional[list]: + self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int + ) -> Optional[list]: """ - Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can - be parsed much faster, but inserting the token boundary at the beginning of the sequence. + Try to a sequence of instruction that ends with a BUILD_MAP into + a sequence that can be parsed much faster, but inserting the + token boundary at the beginning of the sequence. """ count = t.attr assert isinstance(count, int) @@ -309,21 +318,18 @@ class Scanner3(Scanner): assert (count * 2) <= i for j in range(collection_start, i, 2): - if insts[j].opname not in ( - "LOAD_CONST", - ): + if insts[j].opname not in ("LOAD_CONST",): return None - if insts[j+1].opname not in ( - "LOAD_CONST", - ): + if insts[j + 1].opname not in ("LOAD_CONST",): return None collection_start = i - (2 * count) collection_enum = CONST_COLLECTIONS.index("CONST_MAP") - # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace - # add a boundary marker and change LOAD_CONST to something else - new_tokens = next_tokens[:-(2*count)] + # If we get here, all instructions before tokens[i] are LOAD_CONST and + # we can replace add a boundary marker and change LOAD_CONST to + # something else. + new_tokens = next_tokens[: -(2 * count)] start_offset = insts[collection_start].offset new_tokens.append( Token( @@ -353,10 +359,10 @@ class Scanner3(Scanner): new_tokens.append( Token( opname="ADD_VALUE", - attr=insts[j+1].argval, - pattr=insts[j+1].argrepr, - offset=insts[j+1].offset, - linestart=insts[j+1].starts_line, + attr=insts[j + 1].argval, + pattr=insts[j + 1].argrepr, + offset=insts[j + 1].offset, + linestart=insts[j + 1].starts_line, has_arg=True, has_extended_arg=False, opc=self.opc, @@ -376,8 +382,9 @@ class Scanner3(Scanner): ) return new_tokens - def ingest(self, co, classname=None, code_objects={}, show_asm=None - ) -> Tuple[list, dict]: + def ingest( + self, co, classname=None, code_objects={}, show_asm=None + ) -> Tuple[list, dict]: """ Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing @@ -387,14 +394,17 @@ class Scanner3(Scanner): Some transformations are made to assist the deparsing grammar: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - operands with stack argument counts or flag masks are appended to the opcode name, e.g.: + - operands with stack argument counts or flag masks are appended to the + opcode name, e.g.: * BUILD_LIST, BUILD_SET - * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional + arguments - EXTENDED_ARGS instructions are removed - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. + Also, when we encounter certain tokens, we add them to a set + which will cause custom grammar rules. Specifically, variable + arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules + for the specific number of arguments they take. """ if not show_asm: @@ -420,7 +430,6 @@ class Scanner3(Scanner): n = len(self.insts) for i, inst in enumerate(self.insts): - opname = inst.opname # We need to detect the difference between: # raise AssertionError @@ -437,12 +446,12 @@ class Scanner3(Scanner): prev_inst = self.insts[i - 1] assert_can_follow = ( prev_inst.opname in ("JUMP_IF_TRUE", "JUMP_IF_FALSE") - and i + 1 < n ) + and i + 1 < n + ) jump_if_inst = prev_inst else: assert_can_follow = ( - opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE") - and i + 1 < n + opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE") and i + 1 < n ) jump_if_inst = inst if assert_can_follow: @@ -452,7 +461,9 @@ class Scanner3(Scanner): and next_inst.argval == "AssertionError" and jump_if_inst.argval ): - raise_idx = self.offset2inst_index[self.prev_op[jump_if_inst.argval]] + raise_idx = self.offset2inst_index[ + self.prev_op[jump_if_inst.argval] + ] raise_inst = self.insts[raise_idx] if raise_inst.opname.startswith("RAISE_VARARGS"): self.load_asserts.add(next_inst.offset) @@ -468,22 +479,21 @@ class Scanner3(Scanner): new_tokens = [] for i, inst in enumerate(self.insts): - opname = inst.opname argval = inst.argval pattr = inst.argrepr t = Token( - opname=opname, - attr=argval, - pattr=pattr, - offset=inst.offset, - linestart=inst.starts_line, - op=inst.opcode, - has_arg=inst.has_arg, - has_extended_arg=inst.has_extended_arg, - opc=self.opc, - ) + opname=opname, + attr=argval, + pattr=pattr, + offset=inst.offset, + linestart=inst.starts_line, + op=inst.opcode, + has_arg=inst.has_arg, + has_extended_arg=inst.has_extended_arg, + opc=self.opc, + ) # things that smash new_tokens like BUILD_LIST have to come first. if opname in ( @@ -502,11 +512,13 @@ class Scanner3(Scanner): if try_tokens is not None: new_tokens = try_tokens continue - elif opname in ( - "BUILD_MAP", - ): + elif opname in ("BUILD_MAP",): try_tokens = self.bound_map_from_inst( - self.insts, new_tokens, inst, t, i, + self.insts, + new_tokens, + inst, + t, + i, ) if try_tokens is not None: new_tokens = try_tokens @@ -573,9 +585,7 @@ class Scanner3(Scanner): if op in self.opc.CONST_OPS: const = argval if iscode(const): - co_name = const.co_name - if isinstance(const.co_name, UnicodeForPython3): - co_name = const.co_name.value.decode("utf-8") + co_name = get_code_name(const) if co_name == "": assert opname == "LOAD_CONST" opname = "LOAD_LAMBDA" @@ -629,7 +639,7 @@ class Scanner3(Scanner): else: pos_args, name_pair_args, annotate_args = parse_fn_counts_30_35( inst.argval - ) + ) pattr = f"{pos_args} positional, {name_pair_args} keyword only, {annotate_args} annotated" @@ -715,11 +725,13 @@ class Scanner3(Scanner): and self.insts[i + 1].opname == "JUMP_FORWARD" ) - if (self.version[:2] == (3, 0) and self.insts[i + 1].opname == "JUMP_FORWARD" - and not is_continue): + if ( + self.version[:2] == (3, 0) + and self.insts[i + 1].opname == "JUMP_FORWARD" + and not is_continue + ): target_prev = self.offset2inst_index[self.prev_op[target]] - is_continue = ( - self.insts[target_prev].opname == "SETUP_LOOP") + is_continue = self.insts[target_prev].opname == "SETUP_LOOP" if is_continue or ( inst.offset in self.stmts @@ -736,7 +748,10 @@ class Scanner3(Scanner): # the "continue" is not on a new line. # There are other situations where we don't catch # CONTINUE as well. - if new_tokens[-1].kind == "JUMP_BACK" and new_tokens[-1].attr <= argval: + if ( + new_tokens[-1].kind == "JUMP_BACK" + and new_tokens[-1].attr <= argval + ): if new_tokens[-2].kind == "BREAK_LOOP": del new_tokens[-1] else: @@ -809,7 +824,10 @@ class Scanner3(Scanner): if inst.has_arg: label = self.fixed_jumps.get(offset) oparg = inst.arg - if self.version >= (3, 6) and self.code[offset] == self.opc.EXTENDED_ARG: + if ( + self.version >= (3, 6) + and self.code[offset] == self.opc.EXTENDED_ARG + ): j = xdis.next_offset(op, self.opc, offset) next_offset = xdis.next_offset(op, self.opc, j) else: @@ -1082,7 +1100,6 @@ class Scanner3(Scanner): and (target > offset) and pretarget.offset != offset ): - # FIXME: hack upon hack... # In some cases the pretarget can be a jump to the next instruction # and these aren't and/or's either. We limit to 3.5+ since we experienced there @@ -1104,7 +1121,6 @@ class Scanner3(Scanner): # Is it an "and" inside an "if" or "while" block if op == self.opc.POP_JUMP_IF_FALSE: - # Search for another POP_JUMP_IF_FALSE targetting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs @@ -1357,7 +1373,6 @@ class Scanner3(Scanner): self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) else: - # FIXME: this is very convoluted and based on rather hacky # empirical evidence. It should go a way when # we have better control-flow analysis diff --git a/uncompyle6/semantics/aligner.py b/uncompyle6/semantics/aligner.py index 1cc68a56..2db50bc2 100644 --- a/uncompyle6/semantics/aligner.py +++ b/uncompyle6/semantics/aligner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, 2022 by Rocky Bernstein +# Copyright (c) 2018, 2022-2023 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,41 +14,63 @@ # along with this program. If not, see . import sys -from uncompyle6.semantics.pysource import ( - SourceWalker, SourceWalkerError, find_globals, ASSIGN_DOC_STRING, RETURN_NONE) from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6 import IS_PYPY +from xdis import iscode + +from xdis.version_info import IS_PYPY +from uncompyle6.scanner import get_scanner +from uncompyle6.semantics.pysource import ( + ASSIGN_DOC_STRING, + RETURN_NONE, + SourceWalker, + SourceWalkerError, + find_globals_and_nonlocals +) +from uncompyle6.show import maybe_show_asm + +# + class AligningWalker(SourceWalker, object): - def __init__(self, version, out, scanner, showast=False, - debug_parser=PARSER_DEFAULT_DEBUG, - compile_mode='exec', is_pypy=False): - SourceWalker.__init__(self, version, out, scanner, showast, debug_parser, - compile_mode, is_pypy) + def __init__( + self, + version, + out, + scanner, + showast=False, + debug_parser=PARSER_DEFAULT_DEBUG, + compile_mode="exec", + is_pypy=False, + ): + SourceWalker.__init__( + self, version, out, scanner, showast, debug_parser, compile_mode, is_pypy + ) self.desired_line_number = 0 self.current_line_number = 0 def println(self, *data): - if data and not(len(data) == 1 and data[0] == ''): + if data and not (len(data) == 1 and data[0] == ""): self.write(*data) self.pending_newlines = max(self.pending_newlines, 1) def write(self, *data): if (len(data) == 1) and data[0] == self.indent: - diff = max(self.pending_newlines, - self.desired_line_number - self.current_line_number) - self.f.write('\n'*diff) + diff = max( + self.pending_newlines, + self.desired_line_number - self.current_line_number, + ) + self.f.write("\n" * diff) self.current_line_number += diff self.pending_newlines = 0 - if (len(data) == 0) or (len(data) == 1 and data[0] == ''): + if (len(data) == 0) or (len(data) == 1 and data[0] == ""): return - out = ''.join((str(j) for j in data)) + out = "".join((str(j) for j in data)) n = 0 for i in out: - if i == '\n': + if i == "\n": n += 1 if n == len(out): self.pending_newlines = max(self.pending_newlines, n) @@ -61,25 +83,27 @@ class AligningWalker(SourceWalker, object): break if self.pending_newlines > 0: - diff = max(self.pending_newlines, - self.desired_line_number - self.current_line_number) - self.f.write('\n'*diff) + diff = max( + self.pending_newlines, + self.desired_line_number - self.current_line_number, + ) + self.f.write("\n" * diff) self.current_line_number += diff self.pending_newlines = 0 for i in out[::-1]: - if i == '\n': + if i == "\n": self.pending_newlines += 1 else: break if self.pending_newlines: - out = out[:-self.pending_newlines] + out = out[: -self.pending_newlines] self.f.write(out) def default(self, node): mapping = self._get_mapping(node) - if hasattr(node, 'linestart'): + if hasattr(node, "linestart"): if node.linestart: self.desired_line_number = node.linestart table = mapping[0] @@ -90,25 +114,22 @@ class AligningWalker(SourceWalker, object): pass if key.type in table: - self.engine(table[key.type], node) + self.template_engine(table[key.type], node) self.prune() -from xdis import iscode -from uncompyle6.scanner import get_scanner -from uncompyle6.show import ( - maybe_show_asm, -) -# -DEFAULT_DEBUG_OPTS = { - 'asm': False, - 'tree': False, - 'grammar': False -} +DEFAULT_DEBUG_OPTS = {"asm": False, "tree": False, "grammar": False} -def code_deparse_align(co, out=sys.stderr, version=None, is_pypy=None, - debug_opts=DEFAULT_DEBUG_OPTS, - code_objects={}, compile_mode='exec'): + +def code_deparse_align( + co, + out=sys.stderr, + version=None, + is_pypy=None, + debug_opts=DEFAULT_DEBUG_OPTS, + code_objects={}, + compile_mode="exec", +): """ ingests and deparses a given code block 'co' """ @@ -120,61 +141,73 @@ def code_deparse_align(co, out=sys.stderr, version=None, is_pypy=None, if is_pypy is None: is_pypy = IS_PYPY - # store final output stream for case of error scanner = get_scanner(version, is_pypy=is_pypy) tokens, customize = scanner.ingest(co, code_objects=code_objects) - show_asm = debug_opts.get('asm', None) + show_asm = debug_opts.get("asm", None) maybe_show_asm(show_asm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG) - show_grammar = debug_opts.get('grammar', None) - show_grammar = debug_opts.get('grammar', None) + show_grammar = debug_opts.get("grammar", None) + show_grammar = debug_opts.get("grammar", None) if show_grammar: - debug_parser['reduce'] = show_grammar - debug_parser['errorstack'] = True + debug_parser["reduce"] = show_grammar + debug_parser["errorstack"] = True # Build a parse tree from tokenized and massaged disassembly. - show_ast = debug_opts.get('ast', None) - deparsed = AligningWalker(version, scanner, out, showast=show_ast, - debug_parser=debug_parser, compile_mode=compile_mode, - is_pypy = is_pypy) + show_ast = debug_opts.get("ast", None) + deparsed = AligningWalker( + version, + scanner, + out, + showast=show_ast, + debug_parser=debug_parser, + compile_mode=compile_mode, + is_pypy=is_pypy, + ) - is_top_level_module = co.co_name == '' - deparsed.ast = deparsed.build_ast(tokens, customize, co, is_top_level_module=is_top_level_module) + is_top_level_module = co.co_name == "" + deparsed.ast = deparsed.build_ast( + tokens, customize, co, is_top_level_module=is_top_level_module + ) - assert deparsed.ast == 'stmts', 'Should have parsed grammar start' + assert deparsed.ast == "stmts", "Should have parsed grammar start" - del tokens # save memory + del tokens # save memory - deparsed.mod_globs = find_globals(deparsed.ast, set()) + (deparsed.mod_globs, _) = find_globals_and_nonlocals( + deparsed.ast, set(), set(), co, version + ) # convert leading '__doc__ = "..." into doc string try: if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]): - deparsed.print_docstring('', co.co_consts[0]) + deparsed.print_docstring("", co.co_consts[0]) del deparsed.ast[0] if deparsed.ast[-1] == RETURN_NONE: - deparsed.ast.pop() # remove last node + deparsed.ast.pop() # remove last node # todo: if empty, add 'pass' - except: + except Exception: pass # What we've been waiting for: Generate Python source from the parse tree! deparsed.gen_source(deparsed.ast, co.co_name, customize) for g in sorted(deparsed.mod_globs): - deparsed.write('# global %s ## Warning: Unused global\n' % g) + deparsed.write("# global %s ## Warning: Unused global\n" % g) if deparsed.ERROR: raise SourceWalkerError("Deparsing stopped due to parse error") return deparsed -if __name__ == '__main__': + +if __name__ == "__main__": + def deparse_test(co): "This is a docstring" deparsed = code_deparse_align(co) print(deparsed.text) return + deparse_test(deparse_test.__code__) diff --git a/uncompyle6/semantics/customize3.py b/uncompyle6/semantics/customize3.py index 2f388893..798ba2f3 100644 --- a/uncompyle6/semantics/customize3.py +++ b/uncompyle6/semantics/customize3.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021 by Rocky Bernstein +# Copyright (c) 2018-2021, 2023 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,23 +13,20 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Isolate Python 3 version-specific semantic actions here. +""" +Isolate Python 3 version-specific semantic actions here. """ +from xdis import iscode + from uncompyle6.semantics.consts import TABLE_DIRECT - -from xdis import co_flags_is_async, iscode -from uncompyle6.scanner import Code -from uncompyle6.semantics.helper import ( - find_code_node, - gen_function_parens_adjust, -) - -from uncompyle6.semantics.make_function3 import make_function3_annotate from uncompyle6.semantics.customize35 import customize_for_version35 from uncompyle6.semantics.customize36 import customize_for_version36 from uncompyle6.semantics.customize37 import customize_for_version37 from uncompyle6.semantics.customize38 import customize_for_version38 +from uncompyle6.semantics.helper import find_code_node, gen_function_parens_adjust +from uncompyle6.semantics.make_function3 import make_function3_annotate +from uncompyle6.util import get_code_name def customize_for_version3(self, version): @@ -51,7 +48,7 @@ def customize_for_version3(self, version): "import_cont": (", %c", 2), "kwarg": ("%[0]{attr}=%c", 1), "raise_stmt2": ("%|raise %c from %c\n", 0, 1), - "tf_tryelsestmtl3": ( '%c%-%c%|else:\n%+%c', 1, 3, 5 ), + "tf_tryelsestmtl3": ("%c%-%c%|else:\n%+%c", 1, 3, 5), "store_locals": ("%|# inspect.currentframe().f_locals = __locals__\n",), "with": ("%|with %c:\n%+%c%-", 0, 3), "withasstmt": ("%|with %c as (%c):\n%+%c%-", 0, 2, 3), @@ -67,22 +64,22 @@ def customize_for_version3(self, version): # are different. See test_fileio.py for an example that shows this. def tryfinallystmt(node): suite_stmts = node[1][0] - if len(suite_stmts) == 1 and suite_stmts[0] == 'stmt': + if len(suite_stmts) == 1 and suite_stmts[0] == "stmt": stmt = suite_stmts[0] try_something = stmt[0] if try_something == "try_except": try_something.kind = "tf_try_except" if try_something.kind.startswith("tryelsestmt"): if try_something == "tryelsestmtl3": - try_something.kind = 'tf_tryelsestmtl3' + try_something.kind = "tf_tryelsestmtl3" else: - try_something.kind = 'tf_tryelsestmt' + try_something.kind = "tf_tryelsestmt" self.default(node) + self.n_tryfinallystmt = tryfinallystmt def n_classdef3(node): - """Handle "classdef" nonterminal for 3.0 >= version 3.0 < 3.6 - """ + """Handle "classdef" nonterminal for 3.0 >= version 3.0 < 3.6""" assert (3, 0) <= self.version < (3, 6) @@ -191,18 +188,25 @@ def customize_for_version3(self, version): # the iteration variable. These rules we can ignore # since we pick up the iteration variable some other way and # we definitely don't include in the source _[dd]. - TABLE_DIRECT.update({ - "ifstmt30": ( "%|if %c:\n%+%c%-", - (0, "testfalse_then"), - (1, "_ifstmts_jump30") ), - "ifnotstmt30": ( "%|if not %c:\n%+%c%-", - (0, "testtrue_then"), - (1, "_ifstmts_jump30") ), - "try_except30": ( "%|try:\n%+%c%-%c\n\n", - (1, "suite_stmts_opt"), - (4, "except_handler") ), - - }) + TABLE_DIRECT.update( + { + "ifstmt30": ( + "%|if %c:\n%+%c%-", + (0, "testfalse_then"), + (1, "_ifstmts_jump30"), + ), + "ifnotstmt30": ( + "%|if not %c:\n%+%c%-", + (0, "testtrue_then"), + (1, "_ifstmts_jump30"), + ), + "try_except30": ( + "%|try:\n%+%c%-%c\n\n", + (1, "suite_stmts_opt"), + (4, "except_handler"), + ), + } + ) def n_comp_iter(node): if node[0] == "expr": @@ -235,7 +239,6 @@ def customize_for_version3(self, version): if (3, 2) <= version <= (3, 4): def n_call(node): - mapping = self._get_mapping(node) key = node for i in mapping[1:]: @@ -289,24 +292,23 @@ def customize_for_version3(self, version): self.n_call = n_call def n_mkfunc_annotate(node): - # Handling EXTENDED_ARG before MAKE_FUNCTION ... i = -1 if node[-2] == "EXTENDED_ARG" else 0 if self.version < (3, 3): - code = node[-2 + i] + code_node = node[-2 + i] elif self.version >= (3, 3) or node[-2] == "kwargs": # LOAD_CONST code object .. # LOAD_CONST 'x0' if >= 3.3 # EXTENDED_ARG # MAKE_FUNCTION .. - code = node[-3 + i] + code_node = node[-3 + i] elif node[-3] == "expr": - code = node[-3][0] + code_node = node[-3][0] else: # LOAD_CONST code object .. # MAKE_FUNCTION .. - code = node[-3] + code_node = node[-3] self.indent_more() for annotate_last in range(len(node) - 1, -1, -1): @@ -318,11 +320,15 @@ def customize_for_version3(self, version): # But when derived from funcdefdeco it hasn't Would like a better # way to distinquish. if self.f.getvalue()[-4:] == "def ": - self.write(code.attr.co_name) + self.write(get_code_name(code_node.attr)) # FIXME: handle and pass full annotate args make_function3_annotate( - self, node, is_lambda=False, code_node=code, annotate_last=annotate_last + self, + node, + is_lambda=False, + code_node=code_node, + annotate_last=annotate_last, ) if len(self.param_stack) > 1: @@ -339,7 +345,7 @@ def customize_for_version3(self, version): "tryelsestmtl3": ( "%|try:\n%+%c%-%c%|else:\n%+%c%-", (1, "suite_stmts_opt"), - 3, # "except_handler_else" or "except_handler" + 3, # "except_handler_else" or "except_handler" (5, "else_suitel"), ), "LOAD_CLASSDEREF": ("%{pattr}",), diff --git a/uncompyle6/semantics/customize36.py b/uncompyle6/semantics/customize36.py index 30bacc01..9b2c6ae9 100644 --- a/uncompyle6/semantics/customize36.py +++ b/uncompyle6/semantics/customize36.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022 by Rocky Bernstein +# Copyright (c) 2019-2023 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,6 +25,7 @@ from uncompyle6.semantics.consts import ( TABLE_DIRECT, TABLE_R, ) +from uncompyle6.util import get_code_name def escape_format(s): @@ -190,7 +191,7 @@ def customize_for_version36(self, version): code_node = build_class[1][1] else: code_node = build_class[1][0] - class_name = code_node.attr.co_name + class_name = get_code_name(code_node.attr) assert "mkfunc" == build_class[1] mkfunc = build_class[1] diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 1c25bb71..bb245a06 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -63,38 +63,33 @@ The node position 0 will be associated with "import". # FIXME: DRY code with pysource -from __future__ import print_function - import re - -from uncompyle6.semantics import pysource -from uncompyle6 import parser -from uncompyle6.scanner import Token, Code, get_scanner -import uncompyle6.parser as python_parser -from uncompyle6.semantics.check_ast import checker - -from uncompyle6.show import maybe_show_asm, maybe_show_tree - -from uncompyle6.parsers.treenode import SyntaxTree - -from uncompyle6.semantics.pysource import ParserError, StringIO -from xdis import iscode -from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE - -from uncompyle6.semantics.consts import ( - INDENT_PER_LEVEL, - NONE, - PRECEDENCE, - TABLE_DIRECT, - escape, - MAP, - PASS, -) +import sys +from collections import namedtuple +from typing import Optional from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from spark_parser.ast import GenericASTTraversalPruningException +from xdis import iscode +from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE -from collections import namedtuple +import uncompyle6.parser as python_parser +from uncompyle6 import parser +from uncompyle6.parsers.treenode import SyntaxTree +from uncompyle6.scanner import Code, Token, get_scanner +from uncompyle6.semantics import pysource +from uncompyle6.semantics.check_ast import checker +from uncompyle6.semantics.consts import ( + INDENT_PER_LEVEL, + MAP, + NONE, + PASS, + PRECEDENCE, + TABLE_DIRECT, + escape, +) +from uncompyle6.semantics.pysource import ParserError, StringIO +from uncompyle6.show import maybe_show_asm, maybe_show_tree NodeInfo = namedtuple("NodeInfo", "node start finish") ExtractInfo = namedtuple( @@ -149,7 +144,6 @@ TABLE_DIRECT_FRAGMENT = { class FragmentsWalker(pysource.SourceWalker, object): - MAP_DIRECT_FRAGMENT = () stacked_params = ("f", "indent", "is_lambda", "_globals") @@ -346,7 +340,6 @@ class FragmentsWalker(pysource.SourceWalker, object): self.prune() # stop recursing def n_return_if_stmt(self, node): - start = len(self.f.getvalue()) + len(self.indent) if self.params["is_lambda"]: node[0].parent = node @@ -667,7 +660,7 @@ class FragmentsWalker(pysource.SourceWalker, object): assert n == "comp_iter" # Find the comprehension body. It is the inner-most # node that is not list_.. . - while n == "comp_iter": # list_iter + while n == "comp_iter": # list_iter n = n[0] # recurse one step if n == "comp_for": if n[0] == "SETUP_LOOP": @@ -1123,8 +1116,9 @@ class FragmentsWalker(pysource.SourceWalker, object): n_classdefdeco2 = n_classdef - def gen_source(self, ast, name, customize, is_lambda=False, returnNone=False, - debug_opts=None): + def gen_source( + self, ast, name, customize, is_lambda=False, returnNone=False, debug_opts=None + ): """convert parse tree to Python source code""" rn = self.return_none @@ -1150,7 +1144,6 @@ class FragmentsWalker(pysource.SourceWalker, object): noneInNames=False, is_top_level_module=False, ): - # FIXME: DRY with pysource.py # assert isinstance(tokens[0], Token) @@ -1463,7 +1456,6 @@ class FragmentsWalker(pysource.SourceWalker, object): self.set_pos_info(node, start, len(self.f.getvalue())) def print_super_classes3(self, node): - # FIXME: wrap superclasses onto a node # as a custom rule start = len(self.f.getvalue()) @@ -1482,7 +1474,7 @@ class FragmentsWalker(pysource.SourceWalker, object): # FIXME: this doesn't handle positional and keyword args # properly. Need to do something more like that below # in the non-PYPY 3.6 case. - self.template_engine(('(%[0]{attr}=%c)', 1), node[n-1]) + self.template_engine(("(%[0]{attr}=%c)", 1), node[n - 1]) return else: kwargs = node[n - 1].attr @@ -1846,9 +1838,13 @@ class FragmentsWalker(pysource.SourceWalker, object): index = entry[arg] if isinstance(index, tuple): - assert node[index[0]] == index[1], ( - "at %s[%d], expected %s node; got %s" - % (node.kind, arg, node[index[0]].kind, index[1]) + assert ( + node[index[0]] == index[1] + ), "at %s[%d], expected %s node; got %s" % ( + node.kind, + arg, + node[index[0]].kind, + index[1], ) index = index[0] assert isinstance( @@ -1869,9 +1865,13 @@ class FragmentsWalker(pysource.SourceWalker, object): assert isinstance(tup, tuple) if len(tup) == 3: (index, nonterm_name, self.prec) = tup - assert node[index] == nonterm_name, ( - "at %s[%d], expected '%s' node; got '%s'" - % (node.kind, arg, nonterm_name, node[index].kind) + assert ( + node[index] == nonterm_name + ), "at %s[%d], expected '%s' node; got '%s'" % ( + node.kind, + arg, + nonterm_name, + node[index].kind, ) else: assert len(tup) == 2 @@ -1984,6 +1984,7 @@ class FragmentsWalker(pysource.SourceWalker, object): # DEFAULT_DEBUG_OPTS = {"asm": False, "tree": False, "grammar": False} + # This interface is deprecated def deparse_code( version, @@ -2074,7 +2075,9 @@ def code_deparse( ) is_top_level_module = co.co_name == "" - deparsed.ast = deparsed.build_ast(tokens, customize, co, is_top_level_module=is_top_level_module) + deparsed.ast = deparsed.build_ast( + tokens, customize, co, is_top_level_module=is_top_level_module + ) assert deparsed.ast == "stmts", "Should have parsed grammar start" @@ -2084,7 +2087,7 @@ def code_deparse( # convert leading '__doc__ = "..." into doc string assert deparsed.ast == "stmts" - (deparsed.mod_globs, nonlocals) = pysource.find_globals_and_nonlocals( + (deparsed.mod_globs, _) = pysource.find_globals_and_nonlocals( deparsed.ast, set(), set(), co, version ) @@ -2135,7 +2138,7 @@ def code_deparse_around_offset( offset, co, out=StringIO(), - version=None, + version=Optional[tuple], is_pypy=None, debug_opts=DEFAULT_DEBUG_OPTS, ): @@ -2147,7 +2150,7 @@ def code_deparse_around_offset( assert iscode(co) if version is None: - version = sysinfo2float() + version = sys.version_info[:3] if is_pypy is None: is_pypy = IS_PYPY @@ -2200,8 +2203,7 @@ def deparsed_find(tup, deparsed, code): """Return a NodeInfo nametuple for a fragment-deparsed `deparsed` at `tup`. `tup` is a name and offset tuple, `deparsed` is a fragment object - and `code` is instruction bytecode. -""" + and `code` is instruction bytecode.""" nodeInfo = None name, last_i = tup if not hasattr(deparsed, "offsets"): diff --git a/uncompyle6/semantics/n_actions.py b/uncompyle6/semantics/n_actions.py index d61cf517..e22588f3 100644 --- a/uncompyle6/semantics/n_actions.py +++ b/uncompyle6/semantics/n_actions.py @@ -25,7 +25,7 @@ from uncompyle6.semantics.consts import ( from uncompyle6.parsers.treenode import SyntaxTree from uncompyle6.scanners.tok import Token -from uncompyle6.util import better_repr +from uncompyle6.util import better_repr, get_code_name from uncompyle6.semantics.helper import ( find_code_node, @@ -1040,7 +1040,7 @@ class NonterminalActions: def n_mkfunc(self, node): code_node = find_code_node(node, -2) code = code_node.attr - self.write(code.co_name) + self.write(get_code_name(code)) self.indent_more() self.make_function(node, is_lambda=False, code_node=code_node) diff --git a/uncompyle6/util.py b/uncompyle6/util.py index 888ed368..79b4fbe6 100644 --- a/uncompyle6/util.py +++ b/uncompyle6/util.py @@ -3,8 +3,14 @@ # More could be done here though. from math import copysign +from xdis.codetype import UnicodeForPython3 from xdis.version_info import PYTHON_VERSION_TRIPLE +def get_code_name(code) -> str: + code_name = code.co_name + if isinstance(code_name, UnicodeForPython3): + return code_name.value.decode("utf-8") + return code_name def is_negative_zero(n): """Returns true if n is -0.0"""