handle long literal constants faster

This commit is contained in:
rocky
2022-04-24 02:50:09 -04:00
parent 464801bcb3
commit 371138cfbc
10 changed files with 2061 additions and 63 deletions

View File

@@ -1,3 +0,0 @@
# Long lists pose a slowdown in uncompiling.
x = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
print(x)

File diff suppressed because it is too large Load Diff

View File

@@ -56,6 +56,7 @@ class PythonParser(GenericASTBuilder):
"_come_froms",
"_stmts",
"attributes",
"add_consts",
"come_froms",
"except_stmts",
"exprlist",

View File

@@ -319,6 +319,22 @@ class Python37BaseParser(PythonParser):
"""
self.addRule(rules_str, nop_func)
elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
if opname == "BUILD_CONST_DICT":
rule = f"""
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts {opname}
dict ::= const_list
expr ::= dict
"""
else:
rule = f"""
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts {opname}
expr ::= const_list
"""
self.addRule(rule, nop_func)
elif opname_base == "BUILD_CONST_KEY_MAP":
kvlist_n = "expr " * (token.attr)
rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)

View File

@@ -22,6 +22,7 @@ This sets up opcodes Python's 3.7 and calls a generalized
scanner routine for Python 3.
"""
from typing import Tuple
from uncompyle6.scanners.scanner37base import Scanner37Base
# bytecode verification, verify(), uses JUMP_OPs from here
@@ -30,6 +31,8 @@ from xdis.opcodes import opcode_37 as opc
# bytecode verification, verify(), uses JUMP_OPS from here
JUMP_OPs = opc.JUMP_OPS
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
class Scanner37(Scanner37Base):
def __init__(self, show_asm=None, is_pypy: bool=False):
@@ -39,9 +42,28 @@ class Scanner37(Scanner37Base):
pass
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
def ingest(
self, co, classname=None, code_objects={}, show_asm=None
) -> Tuple[list, dict]:
tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm)
for t in tokens:
new_tokens = []
for i, t in enumerate(tokens):
# things that smash new_tokens like BUILD_LIST have to come first.
if t.op in (
self.opc.BUILD_CONST_KEY_MAP,
self.opc.BUILD_LIST,
self.opc.BUILD_SET,
):
collection_type = (
"DICT"
if t.kind.startswith("BUILD_CONST_KEY_MAP")
else t.kind.split("_")[1]
)
new_tokens = self.bound_collection(
tokens, new_tokens, t, i, f"CONST_{collection_type}"
)
continue
# The lowest bit of flags indicates whether the
# var-keyword argument is placed at the top of the stack
if t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1:
@@ -59,8 +81,9 @@ class Scanner37(Scanner37Base):
t.kind = "BUILD_MAP_UNPACK_WITH_CALL_%d" % t.attr
elif not self.is_pypy and t.op == self.opc.BUILD_TUPLE_UNPACK_WITH_CALL:
t.kind = "BUILD_TUPLE_UNPACK_WITH_CALL_%d" % t.attr
pass
return tokens, customize
new_tokens.append(t)
return new_tokens, customize
if __name__ == "__main__":
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015-2020 by Rocky Bernstein
# Copyright (c) 2015-2020, 2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
#
@@ -29,6 +29,8 @@ For example:
Finally we save token information.
"""
from typing import Any, Dict, List, Set
from xdis import iscode, instruction_size, Instruction
from xdis.bytecode import _get_const_info
@@ -45,6 +47,9 @@ import sys
globals().update(op3.opmap)
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
class Scanner37Base(Scanner):
def __init__(self, version, show_asm=None, is_pypy=False):
super(Scanner37Base, self).__init__(version, show_asm, is_pypy)
@@ -179,6 +184,80 @@ class Scanner37Base(Scanner):
# self.varargs_ops = frozenset(self.opc.hasvargs)
return
def bound_collection(
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
):
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return next_tokens + [t]
collection_start = i - count
for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return next_tokens + [t]
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset=f"{start_offset}_0",
has_arg=True,
opc=self.opc,
has_extended_arg=False,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
has_arg=True,
linestart=tokens[j].linestart,
opc=self.opc,
has_extended_arg=False,
)
)
new_tokens.append(
Token(
opname=f"BUILD_{collection_type}",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
has_arg=t.has_arg,
linestart=t.linestart,
opc=t.opc,
has_extended_arg=False,
)
)
return new_tokens
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Pick out tokens from an uncompyle6 code object, and transform them,
@@ -212,7 +291,7 @@ class Scanner37Base(Scanner):
# show_asm = 'both'
if show_asm in ("both", "before"):
for instr in bytecode.get_instructions(co):
print(instr.disassemble())
print(instr.disassemble(self.opc))
# "customize" is in the process of going away here
customize = {}
@@ -316,6 +395,7 @@ class Scanner37Base(Scanner):
# "loop" tag last so the grammar rule matches that properly.
for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
come_from_name = "COME_FROM"
opname = self.opname_for_offset(jump_offset)
if opname == "EXTENDED_ARG":
k = xdis.next_offset(op, self.opc, jump_offset)
@@ -342,22 +422,6 @@ class Scanner37Base(Scanner):
jump_idx += 1
pass
pass
elif inst.offset in self.else_start:
end_offset = self.else_start[inst.offset]
j = tokens_append(
j,
Token(
"ELSE",
None,
repr(end_offset),
offset="%s" % (inst.offset),
has_arg=True,
opc=self.opc,
has_extended_arg=inst.has_extended_arg,
),
)
pass
pattr = inst.argrepr
opname = inst.opname
@@ -444,17 +508,24 @@ class Scanner37Base(Scanner):
opname = "%s_%d+%d" % (opname, before_args, after_args)
elif op == self.opc.JUMP_ABSOLUTE:
# Further classify JUMP_ABSOLUTE into backward jumps
# which are used in loops, and "CONTINUE" jumps which
# may appear in a "continue" statement. The loop-type
# and continue-type jumps will help us classify loop
# boundaries The continue-type jumps help us get
# "continue" statements with would otherwise be turned
# into a "pass" statement because JUMPs are sometimes
# ignored in rules as just boundary overhead. In
# comprehensions we might sometimes classify JUMP_BACK
# as CONTINUE, but that's okay since we add a grammar
# rule for that.
# Refine JUMP_ABSOLUTE further in into:
#
# * "JUMP_LOOP" - which are are used in loops. This is sometimes
# found at the end of a looping construct
# * "BREAK_LOOP" - which are are used to break loops.
# * "CONTINUE" - jumps which may appear in a "continue" statement.
# It is okay to confuse this with JUMP_LOOP. The
# grammar should tolerate this.
# * "JUMP_FORWARD - forward jumps that are not BREAK_LOOP jumps.
#
# The loop-type and continue-type jumps will help us
# classify loop boundaries The continue-type jumps
# help us get "continue" statements with would
# otherwise be turned into a "pass" statement because
# JUMPs are sometimes ignored in rules as just
# boundary overhead. Again, in comprehensions we might
# sometimes classify JUMP_LOOP as CONTINUE, but that's
# okay since grammar rules should tolerate that.
pattr = argval
target = inst.argval
if target <= inst.offset:
@@ -523,7 +594,7 @@ class Scanner37Base(Scanner):
print()
return tokens, customize
def find_jump_targets(self, debug):
def find_jump_targets(self, debug: str) -> dict:
"""
Detect all offsets in a byte code which are jump targets
where we might insert a COME_FROM instruction.
@@ -538,18 +609,17 @@ class Scanner37Base(Scanner):
self.structs = [{"type": "root", "start": 0, "end": n - 1}]
# All loop entry points
self.loops = []
self.loops: List[int] = []
# Map fixed jumps to their real destination
self.fixed_jumps = {}
self.fixed_jumps: Dict[int, int] = {}
self.except_targets = {}
self.ignore_if = set()
self.ignore_if: Set[int] = set()
self.build_statement_indices()
self.else_start = {}
# Containers filled by detect_control_flow()
self.not_continue = set()
self.return_end_ifs = set()
self.not_continue: Set[int] = set()
self.return_end_ifs: Set[int] = set()
self.setup_loop_targets = {} # target given setup_loop offset
self.setup_loops = {} # setup_loop offset given target
@@ -655,9 +725,9 @@ class Scanner37Base(Scanner):
):
stmts.remove(stmt_offset)
continue
# Rewing ops till we encounter non-JUMP_ABSOLUTE one
# Scan back bytecode ops till we encounter non-JUMP_ABSOLUTE op
j = self.prev_op[stmt_offset]
while code[j] == self.opc.JUMP_ABSOLUTE:
while code[j] == self.opc.JUMP_ABSOLUTE and j > 0:
j = self.prev_op[j]
# If we got here, then it's list comprehension which
# is not a statement too
@@ -687,7 +757,9 @@ class Scanner37Base(Scanner):
# Finish filling the list for last statement
slist += [codelen] * (codelen - len(slist))
def detect_control_flow(self, offset, targets, inst_index):
def detect_control_flow(
self, offset: int, targets: Dict[Any, Any], inst_index: int
):
"""
Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+
@@ -698,9 +770,9 @@ class Scanner37Base(Scanner):
op = inst.opcode
# Detect parent structure
parent = self.structs[0]
start = parent["start"]
end = parent["end"]
parent: Dict[str, Any] = self.structs[0]
start: int = parent["start"]
end: int = parent["end"]
# Pick inner-most parent for our offset
for struct in self.structs:
@@ -933,20 +1005,16 @@ class Scanner37Base(Scanner):
if __name__ == "__main__":
from uncompyle6 import PYTHON_VERSION
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
if PYTHON_VERSION >= 3.7:
if PYTHON_VERSION_TRIPLE[:2] == (3, 7):
import inspect
co = inspect.currentframe().f_code
from uncompyle6 import PYTHON_VERSION
co = inspect.currentframe().f_code # type: ignore
tokens, customize = Scanner37Base(PYTHON_VERSION).ingest(co)
tokens, customize = Scanner37Base(PYTHON_VERSION_TRIPLE).ingest(co)
for t in tokens:
print(t)
else:
print(
"Need to be Python 3.7 or greater to demo; I am version {PYTHON_VERSION}."
% PYTHON_VERSION
)
print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.")
pass

View File

@@ -62,6 +62,8 @@ class Scanner38(Scanner37):
print(jump_back_targets)
loop_ends = []
next_end = tokens[len(tokens) - 1].off2int() + 10
new_tokens = []
for i, token in enumerate(tokens):
opname = token.kind
offset = token.offset
@@ -76,6 +78,8 @@ class Scanner38(Scanner37):
else tokens[len(tokens) - 1].off2int() + 10
)
# things that smash new_tokens like BUILD_LIST have to come first.
if offset in jump_back_targets:
next_end = off2int(jump_back_targets[offset], prefer_last=False)
if self.debug:
@@ -93,6 +97,7 @@ class Scanner38(Scanner37):
if opname == "JUMP_ABSOLUTE" and jump_target <= next_end:
# Not a forward-enough jump to break out of the next loop, so continue.
# FIXME: Do we need "continue" detection?
new_tokens.append(token)
continue
# We also want to avoid confusing BREAK_LOOPS with parts of the
@@ -123,8 +128,8 @@ class Scanner38(Scanner37):
):
token.kind = "BREAK_LOOP"
pass
pass
return tokens, customize
new_tokens.append(token)
return new_tokens, customize
if __name__ == "__main__":

View File

@@ -282,6 +282,7 @@ TABLE_DIRECT = {
"comp_if": (" if %c%c", 0, 2),
"comp_if_not": (" if not %p%c", (0, "expr", PRECEDENCE["unary_not"]), 2),
"comp_body": ("",), # ignore when recusing
"set_comp_body": ("%c", 0),
"gen_comp_body": ("%c", 0),
"dict_comp_body": ("%c:%c", 1, 0),

View File

@@ -277,8 +277,16 @@ def make_function36(self, node, is_lambda, nested=1, code_node=None):
# FIXME: handle free_tup, ann_dict, and default_tup
if kw_dict:
assert kw_dict == "dict"
const_list = kw_dict[0]
if kw_dict[0] == "const_list":
add_consts = const_list[1]
assert add_consts == "add_consts"
names = add_consts[-1].attr
defaults = [v.pattr for v in add_consts[:-1]]
else:
defaults = [self.traverse(n, indent="") for n in kw_dict[:-2]]
names = eval(self.traverse(kw_dict[-2]))
assert len(defaults) == len(names)
# FIXME: possibly handle line breaks
for i, n in enumerate(names):

View File

@@ -202,6 +202,68 @@ class NonterminalActions:
n_classdefdeco2 = n_classdef
def n_const_list(self, node):
"""
prettyprint a constant dict, list, set or tuple.
"""
p = self.prec
lastnodetype = node[2].kind
flat_elems = node[1]
is_dict = lastnodetype.endswith("DICT")
if lastnodetype.endswith("LIST"):
self.write("[")
endchar = "]"
elif lastnodetype.endswith("SET") or is_dict:
self.write("{")
endchar = "}"
else:
# from trepan.api import debug; debug()
raise TypeError(
f"Internal Error: n_const_list expects dict, list set, or set; got {lastnodetype}"
)
self.indent_more(INDENT_PER_LEVEL)
sep = ""
if is_dict:
keys = flat_elems[-1].pattr
assert isinstance(keys, tuple)
assert len(keys) == len(flat_elems) - 1
for i, elem in enumerate(flat_elems[:-1]):
assert elem.kind == "ADD_VALUE"
value = elem.pattr
if elem.linestart is not None:
if elem.linestart != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
self.line_number = elem.linestart
else:
if sep != "":
sep += " "
self.write(f"{sep} {repr(keys[i])}: {value}")
sep = ","
else:
for elem in flat_elems:
if elem.kind != "ADD_VALUE":
from trepan.api import debug; debug()
assert elem.kind == "ADD_VALUE"
value = elem.pattr
if elem.linestart is not None:
if elem.linestart != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
self.line_number = elem.linestart
else:
if sep != "":
sep += " "
self.write(sep, value)
sep = ","
self.write(endchar)
self.indent_less(INDENT_PER_LEVEL)
self.prec = p
self.prune()
return
def n_delete_subscript(self, node):
if node[-2][0] == "build_list" and node[-2][0][-1].kind.startswith(
"BUILD_TUPLE"
@@ -498,6 +560,11 @@ class NonterminalActions:
"""
prettyprint a dict, list, set or tuple.
"""
if len(node) == 1 and node[0] == "const_list":
self.preorder(node[0])
self.prune()
return
p = self.prec
self.prec = PRECEDENCE["yield"] - 1
lastnode = node.pop()
@@ -547,7 +614,6 @@ class NonterminalActions:
self.write("(")
endchar = ")"
else:
# from trepan.api import debug; debug()
raise TypeError(
"Internal Error: n_build_list expects list, tuple, set, or unpack"
)