You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-04 09:22:40 +08:00
Handle BUILD_{LIST,SET} more efficiently
This commit is contained in:
BIN
test/bytecode_2.7_run/05_long_literals.pyc
Normal file
BIN
test/bytecode_2.7_run/05_long_literals.pyc
Normal file
Binary file not shown.
@@ -310,6 +310,14 @@ class Python2Parser(PythonParser):
|
||||
|
||||
opname_base = opname[: opname.rfind("_")]
|
||||
|
||||
if opname in ("BUILD_CONST_LIST", "BUILD_CONST_SET"):
|
||||
rule = """
|
||||
add_consts ::= ADD_VALUE*
|
||||
const_list ::= COLLECTION_START add_consts %s
|
||||
expr ::= const_list
|
||||
""" % opname
|
||||
self.addRule(rule, nop_func)
|
||||
|
||||
# The order of opname listed is roughly sorted below
|
||||
if opname_base in ("BUILD_LIST", "BUILD_SET", "BUILD_TUPLE"):
|
||||
# We do this complicated test to speed up parsing of
|
||||
|
@@ -750,18 +750,37 @@ class Python3Parser(PythonParser):
|
||||
kvlist_n = "expr " * (token.attr)
|
||||
rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
|
||||
self.addRule(rule, nop_func)
|
||||
|
||||
elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
|
||||
if opname == "BUILD_CONST_DICT":
|
||||
rule = """
|
||||
add_consts ::= ADD_VALUE*
|
||||
const_list ::= COLLECTION_START add_consts %s
|
||||
dict ::= const_list
|
||||
expr ::= dict
|
||||
""" % opname
|
||||
else:
|
||||
rule = """
|
||||
add_consts ::= ADD_VALUE*
|
||||
const_list ::= COLLECTION_START add_consts %s
|
||||
expr ::= const_list
|
||||
""" % opname
|
||||
self.addRule(rule, nop_func)
|
||||
|
||||
elif opname.startswith("BUILD_DICT_OLDER"):
|
||||
rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER
|
||||
key_value_pairs ::= key_value_pair+
|
||||
key_value_pair ::= ADD_KEY ADD_VALUE
|
||||
"""
|
||||
self.addRule(rule, nop_func)
|
||||
|
||||
elif opname.startswith("BUILD_LIST_UNPACK"):
|
||||
v = token.attr
|
||||
rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname)
|
||||
self.addRule(rule, nop_func)
|
||||
rule = "expr ::= build_list_unpack"
|
||||
self.addRule(rule, nop_func)
|
||||
|
||||
elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"):
|
||||
kvlist_n = "kvlist_%s" % token.attr
|
||||
if opname == "BUILD_MAP_n":
|
||||
@@ -822,22 +841,6 @@ class Python3Parser(PythonParser):
|
||||
rule = "starred ::= %s %s" % ("expr " * v, opname)
|
||||
self.addRule(rule, nop_func)
|
||||
|
||||
elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
|
||||
if opname == "BUILD_CONST_DICT":
|
||||
rule = """
|
||||
add_consts ::= ADD_VALUE*
|
||||
const_list ::= COLLECTION_START add_consts %s
|
||||
dict ::= const_list
|
||||
expr ::= dict
|
||||
""" % opname
|
||||
else:
|
||||
rule = """
|
||||
add_consts ::= ADD_VALUE*
|
||||
const_list ::= COLLECTION_START add_consts %s
|
||||
expr ::= const_list
|
||||
""" % opname
|
||||
self.addRule(rule, nop_func)
|
||||
|
||||
elif opname_base in (
|
||||
"BUILD_LIST",
|
||||
"BUILD_SET",
|
||||
|
@@ -128,8 +128,8 @@ class Scanner(object):
|
||||
# FIXME: This weird Python2 behavior is not Python3
|
||||
self.resetTokenClass()
|
||||
|
||||
def bound_collection(
|
||||
self, tokens, next_tokens, t, i, collection_type
|
||||
def bound_collection_from_tokens(
|
||||
self, tokens, t, i, collection_type
|
||||
):
|
||||
count = t.attr
|
||||
assert isinstance(count, int)
|
||||
@@ -145,7 +145,7 @@ class Scanner(object):
|
||||
|
||||
# For small lists don't bother
|
||||
if count < 5:
|
||||
return next_tokens + [t]
|
||||
return None
|
||||
|
||||
collection_start = i - count
|
||||
|
||||
@@ -156,13 +156,13 @@ class Scanner(object):
|
||||
"LOAD_GLOBAL",
|
||||
"LOAD_NAME",
|
||||
):
|
||||
return next_tokens + [t]
|
||||
return None
|
||||
|
||||
collection_enum = CONST_COLLECTIONS.index(collection_type)
|
||||
|
||||
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
|
||||
# add a boundary marker and change LOAD_CONST to something else
|
||||
new_tokens = next_tokens[:-count]
|
||||
new_tokens = tokens[:-count]
|
||||
start_offset = tokens[collection_start].offset
|
||||
new_tokens.append(
|
||||
Token(
|
||||
|
@@ -134,96 +134,6 @@ class Scanner2(Scanner):
|
||||
]
|
||||
)
|
||||
|
||||
def bound_collection_from_tokens(
|
||||
self, tokens, t, i, c, collection_type):
|
||||
"""
|
||||
Try to a replace sequence of instruction that ends with a BUILD_LIST with a sequence that can
|
||||
be parsed much faster, but inserting the token boundary at the beginning of the sequence.
|
||||
"""
|
||||
count = t.attr
|
||||
assert isinstance(count, int)
|
||||
if count > i:
|
||||
return None
|
||||
|
||||
# For small lists don't bother
|
||||
if count < 5:
|
||||
return None
|
||||
|
||||
collection_start = i - (count * 2)
|
||||
assert (count * 2) <= i
|
||||
|
||||
for j in range(collection_start, i, 2):
|
||||
try:
|
||||
tokens[j]
|
||||
except:
|
||||
from trepan.api import debug; debug()
|
||||
if tokens[j].opname not in (
|
||||
"LOAD_CONST",
|
||||
):
|
||||
return None
|
||||
if tokens[j+1].opname not in (
|
||||
"LOAD_CONST",
|
||||
):
|
||||
return None
|
||||
|
||||
collection_start = i - (2 * count)
|
||||
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
|
||||
|
||||
# If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
|
||||
# add a boundary marker and change LOAD_CONST to something else
|
||||
new_tokens = tokens[:-(2*count)]
|
||||
start_offset = tokens[collection_start].offset
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname="COLLECTION_START",
|
||||
attr=collection_enum,
|
||||
pattr="CONST_MAP",
|
||||
offset="%s_0" % start_offset,
|
||||
linestart=False,
|
||||
has_arg=True,
|
||||
has_extended_arg=False,
|
||||
opc=self.opc,
|
||||
)
|
||||
)
|
||||
for j in range(collection_start, i, 2):
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname="ADD_KEY",
|
||||
attr=tokens[j].argval,
|
||||
pattr=tokens[j].argrepr,
|
||||
offset=tokens[j].offset,
|
||||
linestart=tokens[j].starts_line,
|
||||
has_arg=True,
|
||||
has_extended_arg=False,
|
||||
opc=self.opc,
|
||||
)
|
||||
)
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname="ADD_VALUE",
|
||||
attr=tokens[j+1].argval,
|
||||
pattr=tokens[j+1].argrepr,
|
||||
offset=tokens[j+1].offset,
|
||||
linestart=tokens[j+1].starts_line,
|
||||
has_arg=True,
|
||||
has_extended_arg=False,
|
||||
opc=self.opc,
|
||||
)
|
||||
)
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname=collection_type,
|
||||
attr=t.attr,
|
||||
pattr=t.pattr,
|
||||
offset=t.offset,
|
||||
linestart=t.linestart,
|
||||
has_arg=t.has_arg,
|
||||
has_extended_arg=False,
|
||||
opc=t.opc,
|
||||
)
|
||||
)
|
||||
return new_tokens
|
||||
|
||||
@staticmethod
|
||||
def extended_arg_val(arg):
|
||||
"""Return integer value of an EXTENDED_ARG operand.
|
||||
@@ -287,7 +197,6 @@ class Scanner2(Scanner):
|
||||
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
|
||||
cause specific rules for the specific number of arguments they take.
|
||||
"""
|
||||
|
||||
if not show_asm:
|
||||
show_asm = self.show_asm
|
||||
|
||||
@@ -400,9 +309,24 @@ class Scanner2(Scanner):
|
||||
if op == self.opc.EXTENDED_ARG:
|
||||
extended_arg += self.extended_arg_val(oparg)
|
||||
continue
|
||||
###
|
||||
# Start here: look for BUILD_LIST
|
||||
###
|
||||
|
||||
# Note: name used to match on rather than op since
|
||||
# BUILD_SET isn't in earlier Pythons.
|
||||
if op_name in (
|
||||
"BUILD_LIST",
|
||||
"BUILD_SET",
|
||||
):
|
||||
t = Token(
|
||||
op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc
|
||||
)
|
||||
collection_type = op_name.split("_")[1]
|
||||
next_tokens = self.bound_collection_from_tokens(
|
||||
new_tokens, t, len(new_tokens), "CONST_%s" % collection_type
|
||||
)
|
||||
if next_tokens is not None:
|
||||
new_tokens = next_tokens
|
||||
continue
|
||||
|
||||
if op in self.opc.CONST_OPS:
|
||||
const = co.co_consts[oparg]
|
||||
if iscode(const):
|
||||
|
@@ -121,7 +121,9 @@ class Scanner26(scan.Scanner2):
|
||||
i = self.next_stmt[i]
|
||||
|
||||
extended_arg = 0
|
||||
i = -1
|
||||
for offset in self.op_range(0, codelen):
|
||||
i += 1
|
||||
op = self.code[offset]
|
||||
op_name = self.opname[op]
|
||||
oparg = None; pattr = None
|
||||
@@ -154,8 +156,28 @@ class Scanner26(scan.Scanner2):
|
||||
oparg = self.get_argument(offset) + extended_arg
|
||||
extended_arg = 0
|
||||
if op == self.opc.EXTENDED_ARG:
|
||||
extended_arg = oparg * L65536
|
||||
extended_arg += self.extended_arg_val(oparg)
|
||||
continue
|
||||
|
||||
|
||||
# Note: name used to match on rather than op since
|
||||
# BUILD_SET isn't in earlier Pythons.
|
||||
if op_name in (
|
||||
"BUILD_LIST",
|
||||
"BUILD_SET",
|
||||
):
|
||||
t = Token(
|
||||
op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc
|
||||
)
|
||||
|
||||
collection_type = op_name.split("_")[1]
|
||||
next_tokens = self.bound_collection_from_tokens(
|
||||
tokens, t, i, "CONST_%s" % collection_type
|
||||
)
|
||||
if next_tokens is not None:
|
||||
tokens = next_tokens
|
||||
continue
|
||||
|
||||
if op in self.opc.CONST_OPS:
|
||||
const = co.co_consts[oparg]
|
||||
# We can't use inspect.iscode() because we may be
|
||||
|
@@ -22,8 +22,6 @@ This sets up opcodes Python's 3.7 and calls a generalized
|
||||
scanner routine for Python 3.
|
||||
"""
|
||||
|
||||
from uncompyle6.scanner import CONST_COLLECTIONS
|
||||
from uncompyle6.scanners.tok import Token
|
||||
from uncompyle6.scanners.scanner37base import Scanner37Base
|
||||
|
||||
# bytecode verification, verify(), uses JUMP_OPs from here
|
||||
@@ -40,83 +38,6 @@ class Scanner37(Scanner37Base):
|
||||
|
||||
pass
|
||||
|
||||
def bound_collection_from_tokens(
|
||||
self, tokens, next_tokens, t, i, collection_type
|
||||
):
|
||||
count = t.attr
|
||||
assert isinstance(count, int)
|
||||
|
||||
assert count <= i
|
||||
|
||||
if collection_type == "CONST_DICT":
|
||||
# constant dictonaries work via BUILD_CONST_KEY_MAP and
|
||||
# handle the values() like sets and lists.
|
||||
# However the keys() are an LOAD_CONST of the keys.
|
||||
# adjust offset to account for this
|
||||
count += 1
|
||||
|
||||
# For small lists don't bother
|
||||
if count < 5:
|
||||
return next_tokens + [t]
|
||||
|
||||
collection_start = i - count
|
||||
|
||||
for j in range(collection_start, i):
|
||||
if tokens[j].kind not in (
|
||||
"LOAD_CODE",
|
||||
"LOAD_CONST",
|
||||
"LOAD_FAST",
|
||||
"LOAD_GLOBAL",
|
||||
"LOAD_NAME",
|
||||
"LOAD_STR",
|
||||
):
|
||||
return next_tokens + [t]
|
||||
|
||||
collection_enum = CONST_COLLECTIONS.index(collection_type)
|
||||
|
||||
# If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
|
||||
# add a boundary marker and change LOAD_CONST to something else.
|
||||
new_tokens = next_tokens[:-count]
|
||||
start_offset = tokens[collection_start].offset
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname="COLLECTION_START",
|
||||
attr=collection_enum,
|
||||
pattr=collection_type,
|
||||
offset="%s_0" % start_offset,
|
||||
linestart=False,
|
||||
has_arg=True,
|
||||
has_extended_arg=False,
|
||||
opc=self.opc,
|
||||
)
|
||||
)
|
||||
for j in range(collection_start, i):
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname="ADD_VALUE",
|
||||
attr=tokens[j].attr,
|
||||
pattr=tokens[j].pattr,
|
||||
offset=tokens[j].offset,
|
||||
linestart=tokens[j].linestart,
|
||||
has_arg=True,
|
||||
has_extended_arg=False,
|
||||
opc=self.opc,
|
||||
)
|
||||
)
|
||||
new_tokens.append(
|
||||
Token(
|
||||
opname="BUILD_%s" % collection_type,
|
||||
attr=t.attr,
|
||||
pattr=t.pattr,
|
||||
offset=t.offset,
|
||||
linestart=t.linestart,
|
||||
has_arg=t.has_arg,
|
||||
has_extended_arg=False,
|
||||
opc=t.opc,
|
||||
)
|
||||
)
|
||||
return new_tokens
|
||||
|
||||
def ingest(
|
||||
self, co, classname=None, code_objects={}, show_asm=None
|
||||
):
|
||||
@@ -151,9 +72,11 @@ class Scanner37(Scanner37Base):
|
||||
collection_type = "DICT"
|
||||
else:
|
||||
collection_type = t.kind.split("_")[1]
|
||||
new_tokens = self.bound_collection(
|
||||
tokens, new_tokens, t, i, "CONST_%s" % collection_type
|
||||
next_tokens = self.bound_collection_from_tokens(
|
||||
new_tokens, t, i, "CONST_%s" % collection_type
|
||||
)
|
||||
if next_tokens is not None:
|
||||
new_tokens = next_tokens
|
||||
continue
|
||||
|
||||
# The lowest bit of flags indicates whether the
|
||||
|
Reference in New Issue
Block a user