Start handling BUILD_MAP (a class of dict)

This commit is contained in:
rocky
2022-04-26 15:37:42 -04:00
parent 81ff994a41
commit c25fa61e33
5 changed files with 135 additions and 3 deletions

View File

@@ -61,6 +61,7 @@ class PythonParser(GenericASTBuilder):
"except_stmts", "except_stmts",
"exprlist", "exprlist",
"importlist", "importlist",
"key_value_pairs",
"kvlist", "kvlist",
"kwargs", "kwargs",
"l_stmts", "l_stmts",

View File

@@ -748,6 +748,12 @@ class Python3Parser(PythonParser):
kvlist_n = "expr " * (token.attr) kvlist_n = "expr " * (token.attr)
rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname) rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
self.addRule(rule, nop_func) self.addRule(rule, nop_func)
elif opname.startswith("BUILD_DICT_OLDER"):
rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER
key_value_pairs ::= key_value_pair+
key_value_pair ::= ADD_KEY ADD_VALUE
"""
self.addRule(rule, nop_func)
elif opname.startswith("BUILD_LIST_UNPACK"): elif opname.startswith("BUILD_LIST_UNPACK"):
v = token.attr v = token.attr
rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname) rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname)

View File

@@ -84,7 +84,7 @@ def long(num):
return num return num
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT") CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT", "CONST_MAP")
class Code(object): class Code(object):

View File

@@ -210,6 +210,10 @@ class Scanner3(Scanner):
def bound_collection_from_inst( def bound_collection_from_inst(
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
) -> Optional[list]: ) -> Optional[list]:
"""
Try to a sequence of instruction that ends with a BUILD_xxx into a sequence that can
be parsed much faster, but inserting the token boundary at the beginning of the sequence.
"""
count = t.attr count = t.attr
assert isinstance(count, int) assert isinstance(count, int)
@@ -285,6 +289,94 @@ class Scanner3(Scanner):
) )
return new_tokens return new_tokens
def bound_map_from_inst(
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int) -> Optional[list]:
"""
Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can
be parsed much faster, but inserting the token boundary at the beginning of the sequence.
"""
count = t.attr
assert isinstance(count, int)
if count > i:
return None
# For small lists don't bother
if count < 5:
return None
collection_start = i - (count * 2)
assert (count * 2) <= i
for j in range(collection_start, i, 2):
if insts[j].opname not in (
"LOAD_CONST",
):
return None
if insts[j+1].opname not in (
"LOAD_CONST",
):
return None
collection_start = i - (2 * count)
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
# If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-(2*count)]
start_offset = insts[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr="CONST_MAP",
offset=f"{start_offset}_0",
linestart=False,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
for j in range(collection_start, i, 2):
new_tokens.append(
Token(
opname="ADD_KEY",
attr=insts[j].argval,
pattr=insts[j].argrepr,
offset=insts[j].offset,
linestart=insts[j].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=insts[j+1].argval,
pattr=insts[j+1].argrepr,
offset=insts[j+1].offset,
linestart=insts[j+1].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
new_tokens.append(
Token(
opname=f"BUILD_DICT_OLDER",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
)
)
return new_tokens
def ingest(self, co, classname=None, code_objects={}, show_asm=None def ingest(self, co, classname=None, code_objects={}, show_asm=None
) -> Tuple[list, dict]: ) -> Tuple[list, dict]:
""" """
@@ -411,6 +503,15 @@ class Scanner3(Scanner):
if try_tokens is not None: if try_tokens is not None:
new_tokens = try_tokens new_tokens = try_tokens
continue continue
elif opname in (
"BUILD_MAP",
):
try_tokens = self.bound_map_from_inst(
self.insts, new_tokens, inst, t, i,
)
if try_tokens is not None:
new_tokens = try_tokens
continue
argval = inst.argval argval = inst.argval
op = inst.opcode op = inst.opcode

View File

@@ -244,8 +244,6 @@ class NonterminalActions:
sep = "," sep = ","
else: else:
for elem in flat_elems: for elem in flat_elems:
if elem.kind != "ADD_VALUE":
from trepan.api import debug; debug()
assert elem.kind == "ADD_VALUE" assert elem.kind == "ADD_VALUE"
value = elem.pattr value = elem.pattr
if elem.linestart is not None: if elem.linestart is not None:
@@ -395,6 +393,32 @@ class NonterminalActions:
template = ("**%C", (0, kwargs, ", **")) template = ("**%C", (0, kwargs, ", **"))
self.template_engine(template, node) self.template_engine(template, node)
sep = "" sep = ""
if node[0].kind == "COLLECTION_START":
key_value_pairs = node[1]
for key_value_pair in key_value_pairs:
key, value = key_value_pair
if key.linestart is not None:
line_number = key.linestart
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
self.line_number = line_number
self.write(sep)
self.write(key.pattr)
self.write(": ")
if value.linestart is not None:
line_number = value.linestart
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
self.line_number = line_number
else:
sep += " "
pass
self.write(value.pattr)
sep = ", "
pass
if sep.startswith(",\n"):
self.write(sep[1:])
pass
pass pass
else: else: