Handle long 2.x bytecode literals more efficiently

This commit is contained in:
rocky
2022-04-27 13:47:56 -04:00
parent cfd6166d8d
commit 8e5faa933f
7 changed files with 153 additions and 15 deletions

Binary file not shown.

View File

@@ -312,6 +312,14 @@ class Python2Parser(PythonParser):
opname_base = opname[: opname.rfind("_")] opname_base = opname[: opname.rfind("_")]
if opname in ("BUILD_CONST_LIST", "BUILD_CONST_SET"):
rule = """
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts %s
expr ::= const_list
""" % opname
self.addRule(rule, nop_func)
# The order of opname listed is roughly sorted below # The order of opname listed is roughly sorted below
if opname_base in ("BUILD_LIST", "BUILD_SET", "BUILD_TUPLE"): if opname_base in ("BUILD_LIST", "BUILD_SET", "BUILD_TUPLE"):
# We do this complicated test to speed up parsing of # We do this complicated test to speed up parsing of

View File

@@ -748,18 +748,37 @@ class Python3Parser(PythonParser):
kvlist_n = "expr " * (token.attr) kvlist_n = "expr " * (token.attr)
rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname) rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
self.addRule(rule, nop_func) self.addRule(rule, nop_func)
elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
if opname == "BUILD_CONST_DICT":
rule = """
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts %s
dict ::= const_list
expr ::= dict
""" % opname
else:
rule = """
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts %s
expr ::= const_list
""" % opname
self.addRule(rule, nop_func)
elif opname.startswith("BUILD_DICT_OLDER"): elif opname.startswith("BUILD_DICT_OLDER"):
rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER
key_value_pairs ::= key_value_pair+ key_value_pairs ::= key_value_pair+
key_value_pair ::= ADD_KEY ADD_VALUE key_value_pair ::= ADD_KEY ADD_VALUE
""" """
self.addRule(rule, nop_func) self.addRule(rule, nop_func)
elif opname.startswith("BUILD_LIST_UNPACK"): elif opname.startswith("BUILD_LIST_UNPACK"):
v = token.attr v = token.attr
rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname) rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname)
self.addRule(rule, nop_func) self.addRule(rule, nop_func)
rule = "expr ::= build_list_unpack" rule = "expr ::= build_list_unpack"
self.addRule(rule, nop_func) self.addRule(rule, nop_func)
elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"): elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"):
kvlist_n = "kvlist_%s" % token.attr kvlist_n = "kvlist_%s" % token.attr
if opname == "BUILD_MAP_n": if opname == "BUILD_MAP_n":

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016, 2018-2021 by Rocky Bernstein # Copyright (c) 2016, 2018-2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock # Copyright (c) 1999 John Aycock
@@ -24,7 +24,6 @@ scanners, e.g. for Python 2.7 or 3.4.
from typing import Optional from typing import Optional
from array import array from array import array
from collections import namedtuple from collections import namedtuple
from sys import intern # noqa
from uncompyle6.scanners.tok import Token from uncompyle6.scanners.tok import Token
from xdis.version_info import IS_PYPY, version_tuple_to_str from xdis.version_info import IS_PYPY, version_tuple_to_str
@@ -125,6 +124,80 @@ class Scanner(object):
# FIXME: This weird Python2 behavior is not Python3 # FIXME: This weird Python2 behavior is not Python3
self.resetTokenClass() self.resetTokenClass()
def bound_collection_from_tokens(
self, tokens, t, i, collection_type
):
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return None
collection_start = i - count
for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return None
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset="%s_0" % start_offset,
has_arg=True,
opc=self.opc,
has_extended_arg=False,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
has_arg=True,
linestart=tokens[j].linestart,
opc=self.opc,
has_extended_arg=False,
)
)
new_tokens.append(
Token(
opname="BUILD_%s" % collection_type,
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
has_arg=t.has_arg,
linestart=t.linestart,
opc=t.opc,
has_extended_arg=False,
)
)
return new_tokens
def build_instructions(self, co): def build_instructions(self, co):
""" """
Create a list of instructions (a structured object rather than Create a list of instructions (a structured object rather than

View File

@@ -200,7 +200,6 @@ class Scanner2(Scanner):
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take. cause specific rules for the specific number of arguments they take.
""" """
if not show_asm: if not show_asm:
show_asm = self.show_asm show_asm = self.show_asm
@@ -212,7 +211,7 @@ class Scanner2(Scanner):
print(instr.disassemble()) print(instr.disassemble())
# list of tokens/instructions # list of tokens/instructions
tokens = [] new_tokens = []
# "customize" is in the process of going away here # "customize" is in the process of going away here
customize = {} customize = {}
@@ -289,7 +288,7 @@ class Scanner2(Scanner):
if come_from_type not in ("LOOP", "EXCEPT"): if come_from_type not in ("LOOP", "EXCEPT"):
come_from_name = "COME_FROM_%s" % come_from_type come_from_name = "COME_FROM_%s" % come_from_type
pass pass
tokens.append( new_tokens.append(
Token( Token(
come_from_name, come_from_name,
jump_offset, jump_offset,
@@ -313,6 +312,24 @@ class Scanner2(Scanner):
if op == self.opc.EXTENDED_ARG: if op == self.opc.EXTENDED_ARG:
extended_arg += self.extended_arg_val(oparg) extended_arg += self.extended_arg_val(oparg)
continue continue
# Note: name used to match on rather than op since
# BUILD_SET isn't in earlier Pythons.
if op_name in (
"BUILD_LIST",
"BUILD_SET",
):
t = Token(
op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc
)
collection_type = op_name.split("_")[1]
next_tokens = self.bound_collection_from_tokens(
new_tokens, t, len(new_tokens), "CONST_%s" % collection_type
)
if next_tokens is not None:
new_tokens = next_tokens
continue
if op in self.opc.CONST_OPS: if op in self.opc.CONST_OPS:
const = co.co_consts[oparg] const = co.co_consts[oparg]
if iscode(const): if iscode(const):
@@ -347,12 +364,12 @@ class Scanner2(Scanner):
elif op in self.opc.JREL_OPS: elif op in self.opc.JREL_OPS:
# use instead: hasattr(self, 'patch_continue'): ? # use instead: hasattr(self, 'patch_continue'): ?
if self.version[:2] == (2, 7): if self.version[:2] == (2, 7):
self.patch_continue(tokens, offset, op) self.patch_continue(new_tokens, offset, op)
pattr = repr(offset + 3 + oparg) pattr = repr(offset + 3 + oparg)
elif op in self.opc.JABS_OPS: elif op in self.opc.JABS_OPS:
# use instead: hasattr(self, 'patch_continue'): ? # use instead: hasattr(self, 'patch_continue'): ?
if self.version[:2] == (2, 7): if self.version[:2] == (2, 7):
self.patch_continue(tokens, offset, op) self.patch_continue(new_tokens, offset, op)
pattr = repr(oparg) pattr = repr(oparg)
elif op in self.opc.LOCAL_OPS: elif op in self.opc.LOCAL_OPS:
pattr = varnames[oparg] pattr = varnames[oparg]
@@ -433,13 +450,13 @@ class Scanner2(Scanner):
linestart = self.linestarts.get(offset, None) linestart = self.linestarts.get(offset, None)
if offset not in replace: if offset not in replace:
tokens.append( new_tokens.append(
Token( Token(
op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc
) )
) )
else: else:
tokens.append( new_tokens.append(
Token( Token(
replace[offset], replace[offset],
oparg, oparg,
@@ -455,10 +472,10 @@ class Scanner2(Scanner):
pass pass
if show_asm in ("both", "after"): if show_asm in ("both", "after"):
for t in tokens: for t in new_tokens:
print(t.format(line_prefix="")) print(t.format(line_prefix=""))
print() print()
return tokens, customize return new_tokens, customize
def build_statement_indices(self): def build_statement_indices(self):
code = self.code code = self.code

View File

@@ -123,7 +123,9 @@ class Scanner26(scan.Scanner2):
i = self.next_stmt[i] i = self.next_stmt[i]
extended_arg = 0 extended_arg = 0
i = -1
for offset in self.op_range(0, codelen): for offset in self.op_range(0, codelen):
i += 1
op = self.code[offset] op = self.code[offset]
op_name = self.opname[op] op_name = self.opname[op]
oparg = None; pattr = None oparg = None; pattr = None
@@ -156,8 +158,28 @@ class Scanner26(scan.Scanner2):
oparg = self.get_argument(offset) + extended_arg oparg = self.get_argument(offset) + extended_arg
extended_arg = 0 extended_arg = 0
if op == self.opc.EXTENDED_ARG: if op == self.opc.EXTENDED_ARG:
extended_arg = oparg * L65536 extended_arg += self.extended_arg_val(oparg)
continue continue
# Note: name used to match on rather than op since
# BUILD_SET isn't in earlier Pythons.
if op_name in (
"BUILD_LIST",
"BUILD_SET",
):
t = Token(
op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc
)
collection_type = op_name.split("_")[1]
next_tokens = self.bound_collection_from_tokens(
tokens, t, i, "CONST_%s" % collection_type
)
if next_tokens is not None:
tokens = next_tokens
continue
if op in self.opc.CONST_OPS: if op in self.opc.CONST_OPS:
const = co.co_consts[oparg] const = co.co_consts[oparg]
# We can't use inspect.iscode() because we may be # We can't use inspect.iscode() because we may be

View File

@@ -24,8 +24,7 @@ scanner routine for Python 3.
from typing import Tuple from typing import Tuple
from uncompyle6.scanner import CONST_COLLECTIONS from uncompyle6.scanner import CONST_COLLECTIONS, Token
from uncompyle6.scanners.tok import Token
from uncompyle6.scanners.scanner37base import Scanner37Base from uncompyle6.scanners.scanner37base import Scanner37Base
# bytecode verification, verify(), uses JUMP_OPs from here # bytecode verification, verify(), uses JUMP_OPs from here