handle long literal constants faster

This commit is contained in:
rocky
2022-04-24 02:50:09 -04:00
parent 464801bcb3
commit 371138cfbc
10 changed files with 2061 additions and 63 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015-2020 by Rocky Bernstein
# Copyright (c) 2015-2020, 2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
#
@@ -29,6 +29,8 @@ For example:
Finally we save token information.
"""
from typing import Any, Dict, List, Set
from xdis import iscode, instruction_size, Instruction
from xdis.bytecode import _get_const_info
@@ -45,6 +47,9 @@ import sys
globals().update(op3.opmap)
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
class Scanner37Base(Scanner):
def __init__(self, version, show_asm=None, is_pypy=False):
super(Scanner37Base, self).__init__(version, show_asm, is_pypy)
@@ -179,6 +184,80 @@ class Scanner37Base(Scanner):
# self.varargs_ops = frozenset(self.opc.hasvargs)
return
def bound_collection(
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
):
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return next_tokens + [t]
collection_start = i - count
for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return next_tokens + [t]
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset=f"{start_offset}_0",
has_arg=True,
opc=self.opc,
has_extended_arg=False,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
has_arg=True,
linestart=tokens[j].linestart,
opc=self.opc,
has_extended_arg=False,
)
)
new_tokens.append(
Token(
opname=f"BUILD_{collection_type}",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
has_arg=t.has_arg,
linestart=t.linestart,
opc=t.opc,
has_extended_arg=False,
)
)
return new_tokens
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Pick out tokens from an uncompyle6 code object, and transform them,
@@ -212,7 +291,7 @@ class Scanner37Base(Scanner):
# show_asm = 'both'
if show_asm in ("both", "before"):
for instr in bytecode.get_instructions(co):
print(instr.disassemble())
print(instr.disassemble(self.opc))
# "customize" is in the process of going away here
customize = {}
@@ -316,6 +395,7 @@ class Scanner37Base(Scanner):
# "loop" tag last so the grammar rule matches that properly.
for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
come_from_name = "COME_FROM"
opname = self.opname_for_offset(jump_offset)
if opname == "EXTENDED_ARG":
k = xdis.next_offset(op, self.opc, jump_offset)
@@ -342,22 +422,6 @@ class Scanner37Base(Scanner):
jump_idx += 1
pass
pass
elif inst.offset in self.else_start:
end_offset = self.else_start[inst.offset]
j = tokens_append(
j,
Token(
"ELSE",
None,
repr(end_offset),
offset="%s" % (inst.offset),
has_arg=True,
opc=self.opc,
has_extended_arg=inst.has_extended_arg,
),
)
pass
pattr = inst.argrepr
opname = inst.opname
@@ -444,17 +508,24 @@ class Scanner37Base(Scanner):
opname = "%s_%d+%d" % (opname, before_args, after_args)
elif op == self.opc.JUMP_ABSOLUTE:
# Further classify JUMP_ABSOLUTE into backward jumps
# which are used in loops, and "CONTINUE" jumps which
# may appear in a "continue" statement. The loop-type
# and continue-type jumps will help us classify loop
# boundaries The continue-type jumps help us get
# "continue" statements with would otherwise be turned
# into a "pass" statement because JUMPs are sometimes
# ignored in rules as just boundary overhead. In
# comprehensions we might sometimes classify JUMP_BACK
# as CONTINUE, but that's okay since we add a grammar
# rule for that.
# Refine JUMP_ABSOLUTE further in into:
#
# * "JUMP_LOOP" - which are are used in loops. This is sometimes
# found at the end of a looping construct
# * "BREAK_LOOP" - which are are used to break loops.
# * "CONTINUE" - jumps which may appear in a "continue" statement.
# It is okay to confuse this with JUMP_LOOP. The
# grammar should tolerate this.
# * "JUMP_FORWARD - forward jumps that are not BREAK_LOOP jumps.
#
# The loop-type and continue-type jumps will help us
# classify loop boundaries The continue-type jumps
# help us get "continue" statements with would
# otherwise be turned into a "pass" statement because
# JUMPs are sometimes ignored in rules as just
# boundary overhead. Again, in comprehensions we might
# sometimes classify JUMP_LOOP as CONTINUE, but that's
# okay since grammar rules should tolerate that.
pattr = argval
target = inst.argval
if target <= inst.offset:
@@ -523,7 +594,7 @@ class Scanner37Base(Scanner):
print()
return tokens, customize
def find_jump_targets(self, debug):
def find_jump_targets(self, debug: str) -> dict:
"""
Detect all offsets in a byte code which are jump targets
where we might insert a COME_FROM instruction.
@@ -538,18 +609,17 @@ class Scanner37Base(Scanner):
self.structs = [{"type": "root", "start": 0, "end": n - 1}]
# All loop entry points
self.loops = []
self.loops: List[int] = []
# Map fixed jumps to their real destination
self.fixed_jumps = {}
self.fixed_jumps: Dict[int, int] = {}
self.except_targets = {}
self.ignore_if = set()
self.ignore_if: Set[int] = set()
self.build_statement_indices()
self.else_start = {}
# Containers filled by detect_control_flow()
self.not_continue = set()
self.return_end_ifs = set()
self.not_continue: Set[int] = set()
self.return_end_ifs: Set[int] = set()
self.setup_loop_targets = {} # target given setup_loop offset
self.setup_loops = {} # setup_loop offset given target
@@ -655,9 +725,9 @@ class Scanner37Base(Scanner):
):
stmts.remove(stmt_offset)
continue
# Rewing ops till we encounter non-JUMP_ABSOLUTE one
# Scan back bytecode ops till we encounter non-JUMP_ABSOLUTE op
j = self.prev_op[stmt_offset]
while code[j] == self.opc.JUMP_ABSOLUTE:
while code[j] == self.opc.JUMP_ABSOLUTE and j > 0:
j = self.prev_op[j]
# If we got here, then it's list comprehension which
# is not a statement too
@@ -687,7 +757,9 @@ class Scanner37Base(Scanner):
# Finish filling the list for last statement
slist += [codelen] * (codelen - len(slist))
def detect_control_flow(self, offset, targets, inst_index):
def detect_control_flow(
self, offset: int, targets: Dict[Any, Any], inst_index: int
):
"""
Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+
@@ -698,9 +770,9 @@ class Scanner37Base(Scanner):
op = inst.opcode
# Detect parent structure
parent = self.structs[0]
start = parent["start"]
end = parent["end"]
parent: Dict[str, Any] = self.structs[0]
start: int = parent["start"]
end: int = parent["end"]
# Pick inner-most parent for our offset
for struct in self.structs:
@@ -933,20 +1005,16 @@ class Scanner37Base(Scanner):
if __name__ == "__main__":
from uncompyle6 import PYTHON_VERSION
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
if PYTHON_VERSION >= 3.7:
if PYTHON_VERSION_TRIPLE[:2] == (3, 7):
import inspect
co = inspect.currentframe().f_code
from uncompyle6 import PYTHON_VERSION
co = inspect.currentframe().f_code # type: ignore
tokens, customize = Scanner37Base(PYTHON_VERSION).ingest(co)
tokens, customize = Scanner37Base(PYTHON_VERSION_TRIPLE).ingest(co)
for t in tokens:
print(t)
else:
print(
"Need to be Python 3.7 or greater to demo; I am version {PYTHON_VERSION}."
% PYTHON_VERSION
)
print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.")
pass