Handle long dict litereals in 3.4- better...

We detect them in tokenization and turn this into pseudo instructions
This commit is contained in:
rocky
2024-07-15 09:46:48 -04:00
parent 58c1512dc5
commit 1644370165
5 changed files with 139 additions and 30 deletions

Binary file not shown.

Binary file not shown.

View File

@@ -297,9 +297,8 @@ class Scanner3(Scanner):
) )
return new_tokens return new_tokens
def bound_map_from_inst( # FIXME: consider moving to scanner35
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int def bound_map_from_inst_35(self, insts: list, next_tokens: list, t: Token, i: int):
):
""" """
Try to a sequence of instruction that ends with a BUILD_MAP into Try to a sequence of instruction that ends with a BUILD_MAP into
a sequence that can be parsed much faster, but inserting the a sequence that can be parsed much faster, but inserting the
@@ -314,25 +313,19 @@ class Scanner3(Scanner):
if count < 5: if count < 5:
return None return None
if self.version >= (3, 5): # Newer Python BUILD_MAP argument's count is a
# Newer Python BUILD_MAP argument's count is a # key and value pair so it is multiplied by two.
# key and value pair so it is multiplied by two. collection_start = i - (count * 2)
collection_start = i - (count * 2) assert (count * 2) <= i
assert (count * 2) <= i
for j in range(collection_start, i, 2): for j in range(collection_start, i, 2):
if insts[j].opname not in ("LOAD_CONST",): if insts[j].opname not in ("LOAD_CONST",):
return None return None
if insts[j + 1].opname not in ("LOAD_CONST",): if insts[j + 1].opname not in ("LOAD_CONST",):
return None return None
collection_start = i - (2 * count) collection_start = i - (2 * count)
collection_enum = CONST_COLLECTIONS.index("CONST_MAP") collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
# else: Older Python count is sum of all key and value pairs
# Each pair is added individually like:
# LOAD_CONST ("Max-Age")
# LOAD_CONST ("max-age")
# STORE_MAP
# If we get here, all instructions before tokens[i] are LOAD_CONST and # If we get here, all instructions before tokens[i] are LOAD_CONST and
# we can replace add a boundary marker and change LOAD_CONST to # we can replace add a boundary marker and change LOAD_CONST to
@@ -345,7 +338,7 @@ class Scanner3(Scanner):
attr=collection_enum, attr=collection_enum,
pattr="CONST_MAP", pattr="CONST_MAP",
offset="%s_0" % start_offset, offset="%s_0" % start_offset,
linestart=False, linestart=insts[collection_start].starts_line,
has_arg=True, has_arg=True,
has_extended_arg=False, has_extended_arg=False,
opc=self.opc, opc=self.opc,
@@ -363,6 +356,7 @@ class Scanner3(Scanner):
has_arg=True, has_arg=True,
has_extended_arg=False, has_extended_arg=False,
opc=self.opc, opc=self.opc,
optype="pseudo",
) )
) )
new_tokens.append( new_tokens.append(
@@ -375,7 +369,7 @@ class Scanner3(Scanner):
has_arg=True, has_arg=True,
has_extended_arg=False, has_extended_arg=False,
opc=self.opc, opc=self.opc,
optype=insts[j + 1].optype, optype="pseudo",
) )
) )
new_tokens.append( new_tokens.append(
@@ -388,7 +382,93 @@ class Scanner3(Scanner):
has_arg=t.has_arg, has_arg=t.has_arg,
has_extended_arg=False, has_extended_arg=False,
opc=t.opc, opc=t.opc,
optype=t.optype, optype="pseudo",
)
)
return new_tokens
def bound_map_from_inst_pre35(
self, insts: list, next_tokens: list, t: Token, i: int
):
"""
Try to a sequence of instruction that ends with a BUILD_MAP into
a sequence that can be parsed much faster, but inserting the
token boundary at the beginning of the sequence.
"""
count = t.attr
assert isinstance(count, int)
# For small lists don't bother
if count < 10:
return None
# Older Python BUILD_MAP argument's count is a
# key and value pair and STORE_MAP. So it is multiplied by three.
collection_end = i + 1 + count * 3
for j in range(i + 1, collection_end, 3):
if insts[j].opname not in ("LOAD_CONST",):
return None
if insts[j + 1].opname not in ("LOAD_CONST",):
return None
if insts[j + 2].opname not in ("STORE_MAP",):
return None
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
new_tokens = next_tokens[:i]
start_offset = insts[i].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr="CONST_MAP",
offset="%s_0" % start_offset,
linestart=insts[i].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
for j in range(i + 1, collection_end, 3):
new_tokens.append(
Token(
opname="ADD_KEY",
attr=insts[j + 1].argval,
pattr=insts[j + 1].argrepr,
offset=insts[j + 1].offset,
linestart=insts[j + 1].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=insts[j].argval,
pattr=insts[j].argrepr,
offset=insts[j].offset,
linestart=insts[j].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
new_tokens.append(
Token(
opname="BUILD_DICT_OLDER",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
optype="pseudo",
) )
) )
return new_tokens return new_tokens
@@ -494,8 +574,17 @@ class Scanner3(Scanner):
last_op_was_break = False last_op_was_break = False
new_tokens = [] new_tokens = []
skip_end_offset = None
for i, inst in enumerate(self.insts): for i, inst in enumerate(self.insts):
# BUILD_MAP for < 3.5 can skip *forward* in instructions and
# replace them. So we use the below to get up to the position
# scanned and replaced forward
if skip_end_offset and inst.offset <= skip_end_offset:
continue
skip_end_offset = None
opname = inst.opname opname = inst.opname
argval = inst.argval argval = inst.argval
pattr = inst.argrepr pattr = inst.argrepr
@@ -529,17 +618,37 @@ class Scanner3(Scanner):
if try_tokens is not None: if try_tokens is not None:
new_tokens = try_tokens new_tokens = try_tokens
continue continue
elif opname in ("BUILD_MAP",) and self.version >= (3, 5): elif opname in ("BUILD_MAP",):
try_tokens = self.bound_map_from_inst( bound_map_from_insts_fn = (
self.bound_map_from_inst_35
if self.version >= (3, 5)
else self.bound_map_from_inst_pre35
)
try_tokens = bound_map_from_insts_fn(
self.insts, self.insts,
new_tokens, new_tokens,
inst,
t, t,
i, i,
) )
if try_tokens is not None: if try_tokens is not None:
new_tokens = try_tokens if self.version < (3, 5):
continue assert try_tokens[-1] == "BUILD_DICT_OLDER"
prev_offset = inst.offset
for j in range(i, len(self.insts)):
if self.insts[j].opname == "STORE_NAME":
new_tokens = try_tokens
skip_end_offset = prev_offset
# Set a hacky sentinal to indicate skipping to the
# next instruction
opname = "EXTENDED_ARG"
break
prev_offset = self.insts[j].offset
pass
pass
else:
new_tokens = try_tokens
continue
pass
argval = inst.argval argval = inst.argval
op = inst.opcode op = inst.opcode

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2019-2022 by Rocky Bernstein # Copyright (c) 2019-2022, 2024 by Rocky Bernstein
# #
# This program is free software: you can redistribute it and/or modify # This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016-2021, 2023 by Rocky Bernstein # Copyright (c) 2016-2021, 2023-2024 by Rocky Bernstein
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock # Copyright (c) 1999 John Aycock
# #