You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 00:45:53 +08:00
handle long literal constants faster
This commit is contained in:
@@ -1,3 +0,0 @@
|
||||
# Long lists pose a slowdown in uncompiling.
|
||||
x = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
print(x)
|
1813
test/simple_source/expression/05_long_literals.py
Normal file
1813
test/simple_source/expression/05_long_literals.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -56,6 +56,7 @@ class PythonParser(GenericASTBuilder):
|
||||
"_come_froms",
|
||||
"_stmts",
|
||||
"attributes",
|
||||
"add_consts",
|
||||
"come_froms",
|
||||
"except_stmts",
|
||||
"exprlist",
|
||||
|
@@ -319,6 +319,22 @@ class Python37BaseParser(PythonParser):
|
||||
"""
|
||||
self.addRule(rules_str, nop_func)
|
||||
|
||||
elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
|
||||
if opname == "BUILD_CONST_DICT":
|
||||
rule = f"""
|
||||
add_consts ::= ADD_VALUE*
|
||||
const_list ::= COLLECTION_START add_consts {opname}
|
||||
dict ::= const_list
|
||||
expr ::= dict
|
||||
"""
|
||||
else:
|
||||
rule = f"""
|
||||
add_consts ::= ADD_VALUE*
|
||||
const_list ::= COLLECTION_START add_consts {opname}
|
||||
expr ::= const_list
|
||||
"""
|
||||
self.addRule(rule, nop_func)
|
||||
|
||||
elif opname_base == "BUILD_CONST_KEY_MAP":
|
||||
kvlist_n = "expr " * (token.attr)
|
||||
rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
|
||||
|
@@ -22,6 +22,7 @@ This sets up opcodes Python's 3.7 and calls a generalized
|
||||
scanner routine for Python 3.
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
from uncompyle6.scanners.scanner37base import Scanner37Base
|
||||
|
||||
# bytecode verification, verify(), uses JUMP_OPs from here
|
||||
@@ -30,6 +31,8 @@ from xdis.opcodes import opcode_37 as opc
|
||||
# bytecode verification, verify(), uses JUMP_OPS from here
|
||||
JUMP_OPs = opc.JUMP_OPS
|
||||
|
||||
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
|
||||
|
||||
|
||||
class Scanner37(Scanner37Base):
|
||||
def __init__(self, show_asm=None, is_pypy: bool=False):
|
||||
@@ -39,9 +42,28 @@ class Scanner37(Scanner37Base):
|
||||
|
||||
pass
|
||||
|
||||
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
|
||||
def ingest(
|
||||
self, co, classname=None, code_objects={}, show_asm=None
|
||||
) -> Tuple[list, dict]:
|
||||
tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm)
|
||||
for t in tokens:
|
||||
new_tokens = []
|
||||
for i, t in enumerate(tokens):
|
||||
# things that smash new_tokens like BUILD_LIST have to come first.
|
||||
if t.op in (
|
||||
self.opc.BUILD_CONST_KEY_MAP,
|
||||
self.opc.BUILD_LIST,
|
||||
self.opc.BUILD_SET,
|
||||
):
|
||||
collection_type = (
|
||||
"DICT"
|
||||
if t.kind.startswith("BUILD_CONST_KEY_MAP")
|
||||
else t.kind.split("_")[1]
|
||||
)
|
||||
new_tokens = self.bound_collection(
|
||||
tokens, new_tokens, t, i, f"CONST_{collection_type}"
|
||||
)
|
||||
continue
|
||||
|
||||
# The lowest bit of flags indicates whether the
|
||||
# var-keyword argument is placed at the top of the stack
|
||||
if t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1:
|
||||
@@ -59,8 +81,9 @@ class Scanner37(Scanner37Base):
|
||||
t.kind = "BUILD_MAP_UNPACK_WITH_CALL_%d" % t.attr
|
||||
elif not self.is_pypy and t.op == self.opc.BUILD_TUPLE_UNPACK_WITH_CALL:
|
||||
t.kind = "BUILD_TUPLE_UNPACK_WITH_CALL_%d" % t.attr
|
||||
pass
|
||||
return tokens, customize
|
||||
new_tokens.append(t)
|
||||
|
||||
return new_tokens, customize
|
||||
|
||||
if __name__ == "__main__":
|
||||
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2015-2020 by Rocky Bernstein
|
||||
# Copyright (c) 2015-2020, 2022 by Rocky Bernstein
|
||||
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
||||
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
||||
#
|
||||
@@ -29,6 +29,8 @@ For example:
|
||||
Finally we save token information.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Set
|
||||
|
||||
from xdis import iscode, instruction_size, Instruction
|
||||
from xdis.bytecode import _get_const_info
|
||||
|
||||
@@ -45,6 +47,9 @@ import sys
|
||||
globals().update(op3.opmap)
|
||||
|
||||
|
||||
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
|
||||
|
||||
|
||||
class Scanner37Base(Scanner):
|
||||
def __init__(self, version, show_asm=None, is_pypy=False):
|
||||
super(Scanner37Base, self).__init__(version, show_asm, is_pypy)
|
||||
@@ -179,6 +184,80 @@ class Scanner37Base(Scanner):
|
||||
# self.varargs_ops = frozenset(self.opc.hasvargs)
|
||||
return
|
||||
|
||||
def bound_collection(
|
||||
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
|
||||
):
|
||||
count = t.attr
|
||||
assert isinstance(count, int)
|
||||
|
||||
assert count <= i
|
||||
|
||||
if collection_type == "CONST_DICT":
|
||||
# constant dictonaries work via BUILD_CONST_KEY_MAP and
|
||||
# handle the values() like sets and lists.
|
||||
# However the keys() are an LOAD_CONST of the keys.
|
||||
# adjust offset to account for this
|
||||
count += 1
|
||||
|
||||
# For small lists don't bother
|
||||
if count < 5:
|
||||
return next_tokens + [t]
|
||||
|
||||
collection_start = i - count
|
||||
|
||||
for j in range(collection_start, i):
|
||||
if tokens[j].kind not in (
|
||||
"LOAD_CONST",
|
||||
"LOAD_FAST",
|
||||
"LOAD_GLOBAL",
|
||||
"LOAD_NAME",
|
||||
):
|
||||
return next_tokens + [t]
|
||||
|
||||
collection_enum = CONST_COLLECTIONS.index(collection_type)
|
||||
|
||||
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
|
||||
# add a boundary marker and change LOAD_CONST to something else
|
||||
new_tokens = next_tokens[:-count]
|
||||
start_offset = tokens[collection_start].offset
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname="COLLECTION_START",
|
||||
attr=collection_enum,
|
||||
pattr=collection_type,
|
||||
offset=f"{start_offset}_0",
|
||||
has_arg=True,
|
||||
opc=self.opc,
|
||||
has_extended_arg=False,
|
||||
)
|
||||
)
|
||||
for j in range(collection_start, i):
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname="ADD_VALUE",
|
||||
attr=tokens[j].attr,
|
||||
pattr=tokens[j].pattr,
|
||||
offset=tokens[j].offset,
|
||||
has_arg=True,
|
||||
linestart=tokens[j].linestart,
|
||||
opc=self.opc,
|
||||
has_extended_arg=False,
|
||||
)
|
||||
)
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname=f"BUILD_{collection_type}",
|
||||
attr=t.attr,
|
||||
pattr=t.pattr,
|
||||
offset=t.offset,
|
||||
has_arg=t.has_arg,
|
||||
linestart=t.linestart,
|
||||
opc=t.opc,
|
||||
has_extended_arg=False,
|
||||
)
|
||||
)
|
||||
return new_tokens
|
||||
|
||||
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
|
||||
"""
|
||||
Pick out tokens from an uncompyle6 code object, and transform them,
|
||||
@@ -212,7 +291,7 @@ class Scanner37Base(Scanner):
|
||||
# show_asm = 'both'
|
||||
if show_asm in ("both", "before"):
|
||||
for instr in bytecode.get_instructions(co):
|
||||
print(instr.disassemble())
|
||||
print(instr.disassemble(self.opc))
|
||||
|
||||
# "customize" is in the process of going away here
|
||||
customize = {}
|
||||
@@ -316,6 +395,7 @@ class Scanner37Base(Scanner):
|
||||
# "loop" tag last so the grammar rule matches that properly.
|
||||
for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
|
||||
come_from_name = "COME_FROM"
|
||||
|
||||
opname = self.opname_for_offset(jump_offset)
|
||||
if opname == "EXTENDED_ARG":
|
||||
k = xdis.next_offset(op, self.opc, jump_offset)
|
||||
@@ -342,22 +422,6 @@ class Scanner37Base(Scanner):
|
||||
jump_idx += 1
|
||||
pass
|
||||
pass
|
||||
elif inst.offset in self.else_start:
|
||||
end_offset = self.else_start[inst.offset]
|
||||
j = tokens_append(
|
||||
j,
|
||||
Token(
|
||||
"ELSE",
|
||||
None,
|
||||
repr(end_offset),
|
||||
offset="%s" % (inst.offset),
|
||||
has_arg=True,
|
||||
opc=self.opc,
|
||||
has_extended_arg=inst.has_extended_arg,
|
||||
),
|
||||
)
|
||||
|
||||
pass
|
||||
|
||||
pattr = inst.argrepr
|
||||
opname = inst.opname
|
||||
@@ -444,17 +508,24 @@ class Scanner37Base(Scanner):
|
||||
opname = "%s_%d+%d" % (opname, before_args, after_args)
|
||||
|
||||
elif op == self.opc.JUMP_ABSOLUTE:
|
||||
# Further classify JUMP_ABSOLUTE into backward jumps
|
||||
# which are used in loops, and "CONTINUE" jumps which
|
||||
# may appear in a "continue" statement. The loop-type
|
||||
# and continue-type jumps will help us classify loop
|
||||
# boundaries The continue-type jumps help us get
|
||||
# "continue" statements with would otherwise be turned
|
||||
# into a "pass" statement because JUMPs are sometimes
|
||||
# ignored in rules as just boundary overhead. In
|
||||
# comprehensions we might sometimes classify JUMP_BACK
|
||||
# as CONTINUE, but that's okay since we add a grammar
|
||||
# rule for that.
|
||||
# Refine JUMP_ABSOLUTE further in into:
|
||||
#
|
||||
# * "JUMP_LOOP" - which are are used in loops. This is sometimes
|
||||
# found at the end of a looping construct
|
||||
# * "BREAK_LOOP" - which are are used to break loops.
|
||||
# * "CONTINUE" - jumps which may appear in a "continue" statement.
|
||||
# It is okay to confuse this with JUMP_LOOP. The
|
||||
# grammar should tolerate this.
|
||||
# * "JUMP_FORWARD - forward jumps that are not BREAK_LOOP jumps.
|
||||
#
|
||||
# The loop-type and continue-type jumps will help us
|
||||
# classify loop boundaries The continue-type jumps
|
||||
# help us get "continue" statements with would
|
||||
# otherwise be turned into a "pass" statement because
|
||||
# JUMPs are sometimes ignored in rules as just
|
||||
# boundary overhead. Again, in comprehensions we might
|
||||
# sometimes classify JUMP_LOOP as CONTINUE, but that's
|
||||
# okay since grammar rules should tolerate that.
|
||||
pattr = argval
|
||||
target = inst.argval
|
||||
if target <= inst.offset:
|
||||
@@ -523,7 +594,7 @@ class Scanner37Base(Scanner):
|
||||
print()
|
||||
return tokens, customize
|
||||
|
||||
def find_jump_targets(self, debug):
|
||||
def find_jump_targets(self, debug: str) -> dict:
|
||||
"""
|
||||
Detect all offsets in a byte code which are jump targets
|
||||
where we might insert a COME_FROM instruction.
|
||||
@@ -538,18 +609,17 @@ class Scanner37Base(Scanner):
|
||||
self.structs = [{"type": "root", "start": 0, "end": n - 1}]
|
||||
|
||||
# All loop entry points
|
||||
self.loops = []
|
||||
self.loops: List[int] = []
|
||||
|
||||
# Map fixed jumps to their real destination
|
||||
self.fixed_jumps = {}
|
||||
self.fixed_jumps: Dict[int, int] = {}
|
||||
self.except_targets = {}
|
||||
self.ignore_if = set()
|
||||
self.ignore_if: Set[int] = set()
|
||||
self.build_statement_indices()
|
||||
self.else_start = {}
|
||||
|
||||
# Containers filled by detect_control_flow()
|
||||
self.not_continue = set()
|
||||
self.return_end_ifs = set()
|
||||
self.not_continue: Set[int] = set()
|
||||
self.return_end_ifs: Set[int] = set()
|
||||
self.setup_loop_targets = {} # target given setup_loop offset
|
||||
self.setup_loops = {} # setup_loop offset given target
|
||||
|
||||
@@ -655,9 +725,9 @@ class Scanner37Base(Scanner):
|
||||
):
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Rewing ops till we encounter non-JUMP_ABSOLUTE one
|
||||
# Scan back bytecode ops till we encounter non-JUMP_ABSOLUTE op
|
||||
j = self.prev_op[stmt_offset]
|
||||
while code[j] == self.opc.JUMP_ABSOLUTE:
|
||||
while code[j] == self.opc.JUMP_ABSOLUTE and j > 0:
|
||||
j = self.prev_op[j]
|
||||
# If we got here, then it's list comprehension which
|
||||
# is not a statement too
|
||||
@@ -687,7 +757,9 @@ class Scanner37Base(Scanner):
|
||||
# Finish filling the list for last statement
|
||||
slist += [codelen] * (codelen - len(slist))
|
||||
|
||||
def detect_control_flow(self, offset, targets, inst_index):
|
||||
def detect_control_flow(
|
||||
self, offset: int, targets: Dict[Any, Any], inst_index: int
|
||||
):
|
||||
"""
|
||||
Detect type of block structures and their boundaries to fix optimized jumps
|
||||
in python2.3+
|
||||
@@ -698,9 +770,9 @@ class Scanner37Base(Scanner):
|
||||
op = inst.opcode
|
||||
|
||||
# Detect parent structure
|
||||
parent = self.structs[0]
|
||||
start = parent["start"]
|
||||
end = parent["end"]
|
||||
parent: Dict[str, Any] = self.structs[0]
|
||||
start: int = parent["start"]
|
||||
end: int = parent["end"]
|
||||
|
||||
# Pick inner-most parent for our offset
|
||||
for struct in self.structs:
|
||||
@@ -933,20 +1005,16 @@ class Scanner37Base(Scanner):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
|
||||
|
||||
if PYTHON_VERSION >= 3.7:
|
||||
if PYTHON_VERSION_TRIPLE[:2] == (3, 7):
|
||||
import inspect
|
||||
|
||||
co = inspect.currentframe().f_code
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
co = inspect.currentframe().f_code # type: ignore
|
||||
|
||||
tokens, customize = Scanner37Base(PYTHON_VERSION).ingest(co)
|
||||
tokens, customize = Scanner37Base(PYTHON_VERSION_TRIPLE).ingest(co)
|
||||
for t in tokens:
|
||||
print(t)
|
||||
else:
|
||||
print(
|
||||
"Need to be Python 3.7 or greater to demo; I am version {PYTHON_VERSION}."
|
||||
% PYTHON_VERSION
|
||||
)
|
||||
print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.")
|
||||
pass
|
||||
|
@@ -62,6 +62,8 @@ class Scanner38(Scanner37):
|
||||
print(jump_back_targets)
|
||||
loop_ends = []
|
||||
next_end = tokens[len(tokens) - 1].off2int() + 10
|
||||
|
||||
new_tokens = []
|
||||
for i, token in enumerate(tokens):
|
||||
opname = token.kind
|
||||
offset = token.offset
|
||||
@@ -76,6 +78,8 @@ class Scanner38(Scanner37):
|
||||
else tokens[len(tokens) - 1].off2int() + 10
|
||||
)
|
||||
|
||||
# things that smash new_tokens like BUILD_LIST have to come first.
|
||||
|
||||
if offset in jump_back_targets:
|
||||
next_end = off2int(jump_back_targets[offset], prefer_last=False)
|
||||
if self.debug:
|
||||
@@ -93,6 +97,7 @@ class Scanner38(Scanner37):
|
||||
if opname == "JUMP_ABSOLUTE" and jump_target <= next_end:
|
||||
# Not a forward-enough jump to break out of the next loop, so continue.
|
||||
# FIXME: Do we need "continue" detection?
|
||||
new_tokens.append(token)
|
||||
continue
|
||||
|
||||
# We also want to avoid confusing BREAK_LOOPS with parts of the
|
||||
@@ -123,8 +128,8 @@ class Scanner38(Scanner37):
|
||||
):
|
||||
token.kind = "BREAK_LOOP"
|
||||
pass
|
||||
pass
|
||||
return tokens, customize
|
||||
new_tokens.append(token)
|
||||
return new_tokens, customize
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@@ -282,6 +282,7 @@ TABLE_DIRECT = {
|
||||
"comp_if": (" if %c%c", 0, 2),
|
||||
"comp_if_not": (" if not %p%c", (0, "expr", PRECEDENCE["unary_not"]), 2),
|
||||
"comp_body": ("",), # ignore when recusing
|
||||
|
||||
"set_comp_body": ("%c", 0),
|
||||
"gen_comp_body": ("%c", 0),
|
||||
"dict_comp_body": ("%c:%c", 1, 0),
|
||||
|
@@ -277,8 +277,16 @@ def make_function36(self, node, is_lambda, nested=1, code_node=None):
|
||||
# FIXME: handle free_tup, ann_dict, and default_tup
|
||||
if kw_dict:
|
||||
assert kw_dict == "dict"
|
||||
defaults = [self.traverse(n, indent="") for n in kw_dict[:-2]]
|
||||
names = eval(self.traverse(kw_dict[-2]))
|
||||
const_list = kw_dict[0]
|
||||
if kw_dict[0] == "const_list":
|
||||
add_consts = const_list[1]
|
||||
assert add_consts == "add_consts"
|
||||
names = add_consts[-1].attr
|
||||
defaults = [v.pattr for v in add_consts[:-1]]
|
||||
else:
|
||||
defaults = [self.traverse(n, indent="") for n in kw_dict[:-2]]
|
||||
names = eval(self.traverse(kw_dict[-2]))
|
||||
|
||||
assert len(defaults) == len(names)
|
||||
# FIXME: possibly handle line breaks
|
||||
for i, n in enumerate(names):
|
||||
|
@@ -202,6 +202,68 @@ class NonterminalActions:
|
||||
|
||||
n_classdefdeco2 = n_classdef
|
||||
|
||||
def n_const_list(self, node):
|
||||
"""
|
||||
prettyprint a constant dict, list, set or tuple.
|
||||
"""
|
||||
p = self.prec
|
||||
|
||||
lastnodetype = node[2].kind
|
||||
flat_elems = node[1]
|
||||
is_dict = lastnodetype.endswith("DICT")
|
||||
|
||||
if lastnodetype.endswith("LIST"):
|
||||
self.write("[")
|
||||
endchar = "]"
|
||||
elif lastnodetype.endswith("SET") or is_dict:
|
||||
self.write("{")
|
||||
endchar = "}"
|
||||
else:
|
||||
# from trepan.api import debug; debug()
|
||||
raise TypeError(
|
||||
f"Internal Error: n_const_list expects dict, list set, or set; got {lastnodetype}"
|
||||
)
|
||||
|
||||
self.indent_more(INDENT_PER_LEVEL)
|
||||
sep = ""
|
||||
if is_dict:
|
||||
keys = flat_elems[-1].pattr
|
||||
assert isinstance(keys, tuple)
|
||||
assert len(keys) == len(flat_elems) - 1
|
||||
for i, elem in enumerate(flat_elems[:-1]):
|
||||
assert elem.kind == "ADD_VALUE"
|
||||
value = elem.pattr
|
||||
if elem.linestart is not None:
|
||||
if elem.linestart != self.line_number:
|
||||
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
|
||||
self.line_number = elem.linestart
|
||||
else:
|
||||
if sep != "":
|
||||
sep += " "
|
||||
self.write(f"{sep} {repr(keys[i])}: {value}")
|
||||
sep = ","
|
||||
else:
|
||||
for elem in flat_elems:
|
||||
if elem.kind != "ADD_VALUE":
|
||||
from trepan.api import debug; debug()
|
||||
assert elem.kind == "ADD_VALUE"
|
||||
value = elem.pattr
|
||||
if elem.linestart is not None:
|
||||
if elem.linestart != self.line_number:
|
||||
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
|
||||
self.line_number = elem.linestart
|
||||
else:
|
||||
if sep != "":
|
||||
sep += " "
|
||||
self.write(sep, value)
|
||||
sep = ","
|
||||
self.write(endchar)
|
||||
self.indent_less(INDENT_PER_LEVEL)
|
||||
|
||||
self.prec = p
|
||||
self.prune()
|
||||
return
|
||||
|
||||
def n_delete_subscript(self, node):
|
||||
if node[-2][0] == "build_list" and node[-2][0][-1].kind.startswith(
|
||||
"BUILD_TUPLE"
|
||||
@@ -498,6 +560,11 @@ class NonterminalActions:
|
||||
"""
|
||||
prettyprint a dict, list, set or tuple.
|
||||
"""
|
||||
if len(node) == 1 and node[0] == "const_list":
|
||||
self.preorder(node[0])
|
||||
self.prune()
|
||||
return
|
||||
|
||||
p = self.prec
|
||||
self.prec = PRECEDENCE["yield"] - 1
|
||||
lastnode = node.pop()
|
||||
@@ -547,7 +614,6 @@ class NonterminalActions:
|
||||
self.write("(")
|
||||
endchar = ")"
|
||||
else:
|
||||
# from trepan.api import debug; debug()
|
||||
raise TypeError(
|
||||
"Internal Error: n_build_list expects list, tuple, set, or unpack"
|
||||
)
|
||||
|
Reference in New Issue
Block a user