You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-04 01:09:52 +08:00
Handle long 2.x bytecode literals more efficiently
This commit is contained in:
BIN
test/bytecode_2.7_run/05_long_literals.pyc
Normal file
BIN
test/bytecode_2.7_run/05_long_literals.pyc
Normal file
Binary file not shown.
@@ -312,6 +312,14 @@ class Python2Parser(PythonParser):
|
|||||||
|
|
||||||
opname_base = opname[: opname.rfind("_")]
|
opname_base = opname[: opname.rfind("_")]
|
||||||
|
|
||||||
|
if opname in ("BUILD_CONST_LIST", "BUILD_CONST_SET"):
|
||||||
|
rule = """
|
||||||
|
add_consts ::= ADD_VALUE*
|
||||||
|
const_list ::= COLLECTION_START add_consts %s
|
||||||
|
expr ::= const_list
|
||||||
|
""" % opname
|
||||||
|
self.addRule(rule, nop_func)
|
||||||
|
|
||||||
# The order of opname listed is roughly sorted below
|
# The order of opname listed is roughly sorted below
|
||||||
if opname_base in ("BUILD_LIST", "BUILD_SET", "BUILD_TUPLE"):
|
if opname_base in ("BUILD_LIST", "BUILD_SET", "BUILD_TUPLE"):
|
||||||
# We do this complicated test to speed up parsing of
|
# We do this complicated test to speed up parsing of
|
||||||
|
@@ -748,18 +748,37 @@ class Python3Parser(PythonParser):
|
|||||||
kvlist_n = "expr " * (token.attr)
|
kvlist_n = "expr " * (token.attr)
|
||||||
rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
|
rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
|
||||||
self.addRule(rule, nop_func)
|
self.addRule(rule, nop_func)
|
||||||
|
|
||||||
|
elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
|
||||||
|
if opname == "BUILD_CONST_DICT":
|
||||||
|
rule = """
|
||||||
|
add_consts ::= ADD_VALUE*
|
||||||
|
const_list ::= COLLECTION_START add_consts %s
|
||||||
|
dict ::= const_list
|
||||||
|
expr ::= dict
|
||||||
|
""" % opname
|
||||||
|
else:
|
||||||
|
rule = """
|
||||||
|
add_consts ::= ADD_VALUE*
|
||||||
|
const_list ::= COLLECTION_START add_consts %s
|
||||||
|
expr ::= const_list
|
||||||
|
""" % opname
|
||||||
|
self.addRule(rule, nop_func)
|
||||||
|
|
||||||
elif opname.startswith("BUILD_DICT_OLDER"):
|
elif opname.startswith("BUILD_DICT_OLDER"):
|
||||||
rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER
|
rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER
|
||||||
key_value_pairs ::= key_value_pair+
|
key_value_pairs ::= key_value_pair+
|
||||||
key_value_pair ::= ADD_KEY ADD_VALUE
|
key_value_pair ::= ADD_KEY ADD_VALUE
|
||||||
"""
|
"""
|
||||||
self.addRule(rule, nop_func)
|
self.addRule(rule, nop_func)
|
||||||
|
|
||||||
elif opname.startswith("BUILD_LIST_UNPACK"):
|
elif opname.startswith("BUILD_LIST_UNPACK"):
|
||||||
v = token.attr
|
v = token.attr
|
||||||
rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname)
|
rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname)
|
||||||
self.addRule(rule, nop_func)
|
self.addRule(rule, nop_func)
|
||||||
rule = "expr ::= build_list_unpack"
|
rule = "expr ::= build_list_unpack"
|
||||||
self.addRule(rule, nop_func)
|
self.addRule(rule, nop_func)
|
||||||
|
|
||||||
elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"):
|
elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"):
|
||||||
kvlist_n = "kvlist_%s" % token.attr
|
kvlist_n = "kvlist_%s" % token.attr
|
||||||
if opname == "BUILD_MAP_n":
|
if opname == "BUILD_MAP_n":
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
# Copyright (c) 2016, 2018-2021 by Rocky Bernstein
|
# Copyright (c) 2016, 2018-2022 by Rocky Bernstein
|
||||||
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
||||||
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
||||||
# Copyright (c) 1999 John Aycock
|
# Copyright (c) 1999 John Aycock
|
||||||
@@ -24,7 +24,6 @@ scanners, e.g. for Python 2.7 or 3.4.
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
from array import array
|
from array import array
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from sys import intern # noqa
|
|
||||||
|
|
||||||
from uncompyle6.scanners.tok import Token
|
from uncompyle6.scanners.tok import Token
|
||||||
from xdis.version_info import IS_PYPY, version_tuple_to_str
|
from xdis.version_info import IS_PYPY, version_tuple_to_str
|
||||||
@@ -125,6 +124,80 @@ class Scanner(object):
|
|||||||
# FIXME: This weird Python2 behavior is not Python3
|
# FIXME: This weird Python2 behavior is not Python3
|
||||||
self.resetTokenClass()
|
self.resetTokenClass()
|
||||||
|
|
||||||
|
def bound_collection_from_tokens(
|
||||||
|
self, tokens, t, i, collection_type
|
||||||
|
):
|
||||||
|
count = t.attr
|
||||||
|
assert isinstance(count, int)
|
||||||
|
|
||||||
|
assert count <= i
|
||||||
|
|
||||||
|
if collection_type == "CONST_DICT":
|
||||||
|
# constant dictonaries work via BUILD_CONST_KEY_MAP and
|
||||||
|
# handle the values() like sets and lists.
|
||||||
|
# However the keys() are an LOAD_CONST of the keys.
|
||||||
|
# adjust offset to account for this
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
# For small lists don't bother
|
||||||
|
if count < 5:
|
||||||
|
return None
|
||||||
|
|
||||||
|
collection_start = i - count
|
||||||
|
|
||||||
|
for j in range(collection_start, i):
|
||||||
|
if tokens[j].kind not in (
|
||||||
|
"LOAD_CONST",
|
||||||
|
"LOAD_FAST",
|
||||||
|
"LOAD_GLOBAL",
|
||||||
|
"LOAD_NAME",
|
||||||
|
):
|
||||||
|
return None
|
||||||
|
|
||||||
|
collection_enum = CONST_COLLECTIONS.index(collection_type)
|
||||||
|
|
||||||
|
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
|
||||||
|
# add a boundary marker and change LOAD_CONST to something else
|
||||||
|
new_tokens = tokens[:-count]
|
||||||
|
start_offset = tokens[collection_start].offset
|
||||||
|
new_tokens.append(
|
||||||
|
Token(
|
||||||
|
opname="COLLECTION_START",
|
||||||
|
attr=collection_enum,
|
||||||
|
pattr=collection_type,
|
||||||
|
offset="%s_0" % start_offset,
|
||||||
|
has_arg=True,
|
||||||
|
opc=self.opc,
|
||||||
|
has_extended_arg=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for j in range(collection_start, i):
|
||||||
|
new_tokens.append(
|
||||||
|
Token(
|
||||||
|
opname="ADD_VALUE",
|
||||||
|
attr=tokens[j].attr,
|
||||||
|
pattr=tokens[j].pattr,
|
||||||
|
offset=tokens[j].offset,
|
||||||
|
has_arg=True,
|
||||||
|
linestart=tokens[j].linestart,
|
||||||
|
opc=self.opc,
|
||||||
|
has_extended_arg=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
new_tokens.append(
|
||||||
|
Token(
|
||||||
|
opname="BUILD_%s" % collection_type,
|
||||||
|
attr=t.attr,
|
||||||
|
pattr=t.pattr,
|
||||||
|
offset=t.offset,
|
||||||
|
has_arg=t.has_arg,
|
||||||
|
linestart=t.linestart,
|
||||||
|
opc=t.opc,
|
||||||
|
has_extended_arg=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return new_tokens
|
||||||
|
|
||||||
def build_instructions(self, co):
|
def build_instructions(self, co):
|
||||||
"""
|
"""
|
||||||
Create a list of instructions (a structured object rather than
|
Create a list of instructions (a structured object rather than
|
||||||
|
@@ -200,7 +200,6 @@ class Scanner2(Scanner):
|
|||||||
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
|
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
|
||||||
cause specific rules for the specific number of arguments they take.
|
cause specific rules for the specific number of arguments they take.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not show_asm:
|
if not show_asm:
|
||||||
show_asm = self.show_asm
|
show_asm = self.show_asm
|
||||||
|
|
||||||
@@ -212,7 +211,7 @@ class Scanner2(Scanner):
|
|||||||
print(instr.disassemble())
|
print(instr.disassemble())
|
||||||
|
|
||||||
# list of tokens/instructions
|
# list of tokens/instructions
|
||||||
tokens = []
|
new_tokens = []
|
||||||
|
|
||||||
# "customize" is in the process of going away here
|
# "customize" is in the process of going away here
|
||||||
customize = {}
|
customize = {}
|
||||||
@@ -289,7 +288,7 @@ class Scanner2(Scanner):
|
|||||||
if come_from_type not in ("LOOP", "EXCEPT"):
|
if come_from_type not in ("LOOP", "EXCEPT"):
|
||||||
come_from_name = "COME_FROM_%s" % come_from_type
|
come_from_name = "COME_FROM_%s" % come_from_type
|
||||||
pass
|
pass
|
||||||
tokens.append(
|
new_tokens.append(
|
||||||
Token(
|
Token(
|
||||||
come_from_name,
|
come_from_name,
|
||||||
jump_offset,
|
jump_offset,
|
||||||
@@ -313,6 +312,24 @@ class Scanner2(Scanner):
|
|||||||
if op == self.opc.EXTENDED_ARG:
|
if op == self.opc.EXTENDED_ARG:
|
||||||
extended_arg += self.extended_arg_val(oparg)
|
extended_arg += self.extended_arg_val(oparg)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Note: name used to match on rather than op since
|
||||||
|
# BUILD_SET isn't in earlier Pythons.
|
||||||
|
if op_name in (
|
||||||
|
"BUILD_LIST",
|
||||||
|
"BUILD_SET",
|
||||||
|
):
|
||||||
|
t = Token(
|
||||||
|
op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc
|
||||||
|
)
|
||||||
|
collection_type = op_name.split("_")[1]
|
||||||
|
next_tokens = self.bound_collection_from_tokens(
|
||||||
|
new_tokens, t, len(new_tokens), "CONST_%s" % collection_type
|
||||||
|
)
|
||||||
|
if next_tokens is not None:
|
||||||
|
new_tokens = next_tokens
|
||||||
|
continue
|
||||||
|
|
||||||
if op in self.opc.CONST_OPS:
|
if op in self.opc.CONST_OPS:
|
||||||
const = co.co_consts[oparg]
|
const = co.co_consts[oparg]
|
||||||
if iscode(const):
|
if iscode(const):
|
||||||
@@ -347,12 +364,12 @@ class Scanner2(Scanner):
|
|||||||
elif op in self.opc.JREL_OPS:
|
elif op in self.opc.JREL_OPS:
|
||||||
# use instead: hasattr(self, 'patch_continue'): ?
|
# use instead: hasattr(self, 'patch_continue'): ?
|
||||||
if self.version[:2] == (2, 7):
|
if self.version[:2] == (2, 7):
|
||||||
self.patch_continue(tokens, offset, op)
|
self.patch_continue(new_tokens, offset, op)
|
||||||
pattr = repr(offset + 3 + oparg)
|
pattr = repr(offset + 3 + oparg)
|
||||||
elif op in self.opc.JABS_OPS:
|
elif op in self.opc.JABS_OPS:
|
||||||
# use instead: hasattr(self, 'patch_continue'): ?
|
# use instead: hasattr(self, 'patch_continue'): ?
|
||||||
if self.version[:2] == (2, 7):
|
if self.version[:2] == (2, 7):
|
||||||
self.patch_continue(tokens, offset, op)
|
self.patch_continue(new_tokens, offset, op)
|
||||||
pattr = repr(oparg)
|
pattr = repr(oparg)
|
||||||
elif op in self.opc.LOCAL_OPS:
|
elif op in self.opc.LOCAL_OPS:
|
||||||
pattr = varnames[oparg]
|
pattr = varnames[oparg]
|
||||||
@@ -433,13 +450,13 @@ class Scanner2(Scanner):
|
|||||||
linestart = self.linestarts.get(offset, None)
|
linestart = self.linestarts.get(offset, None)
|
||||||
|
|
||||||
if offset not in replace:
|
if offset not in replace:
|
||||||
tokens.append(
|
new_tokens.append(
|
||||||
Token(
|
Token(
|
||||||
op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc
|
op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
tokens.append(
|
new_tokens.append(
|
||||||
Token(
|
Token(
|
||||||
replace[offset],
|
replace[offset],
|
||||||
oparg,
|
oparg,
|
||||||
@@ -455,10 +472,10 @@ class Scanner2(Scanner):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
if show_asm in ("both", "after"):
|
if show_asm in ("both", "after"):
|
||||||
for t in tokens:
|
for t in new_tokens:
|
||||||
print(t.format(line_prefix=""))
|
print(t.format(line_prefix=""))
|
||||||
print()
|
print()
|
||||||
return tokens, customize
|
return new_tokens, customize
|
||||||
|
|
||||||
def build_statement_indices(self):
|
def build_statement_indices(self):
|
||||||
code = self.code
|
code = self.code
|
||||||
|
@@ -123,7 +123,9 @@ class Scanner26(scan.Scanner2):
|
|||||||
i = self.next_stmt[i]
|
i = self.next_stmt[i]
|
||||||
|
|
||||||
extended_arg = 0
|
extended_arg = 0
|
||||||
|
i = -1
|
||||||
for offset in self.op_range(0, codelen):
|
for offset in self.op_range(0, codelen):
|
||||||
|
i += 1
|
||||||
op = self.code[offset]
|
op = self.code[offset]
|
||||||
op_name = self.opname[op]
|
op_name = self.opname[op]
|
||||||
oparg = None; pattr = None
|
oparg = None; pattr = None
|
||||||
@@ -156,8 +158,28 @@ class Scanner26(scan.Scanner2):
|
|||||||
oparg = self.get_argument(offset) + extended_arg
|
oparg = self.get_argument(offset) + extended_arg
|
||||||
extended_arg = 0
|
extended_arg = 0
|
||||||
if op == self.opc.EXTENDED_ARG:
|
if op == self.opc.EXTENDED_ARG:
|
||||||
extended_arg = oparg * L65536
|
extended_arg += self.extended_arg_val(oparg)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
# Note: name used to match on rather than op since
|
||||||
|
# BUILD_SET isn't in earlier Pythons.
|
||||||
|
if op_name in (
|
||||||
|
"BUILD_LIST",
|
||||||
|
"BUILD_SET",
|
||||||
|
):
|
||||||
|
t = Token(
|
||||||
|
op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc
|
||||||
|
)
|
||||||
|
|
||||||
|
collection_type = op_name.split("_")[1]
|
||||||
|
next_tokens = self.bound_collection_from_tokens(
|
||||||
|
tokens, t, i, "CONST_%s" % collection_type
|
||||||
|
)
|
||||||
|
if next_tokens is not None:
|
||||||
|
tokens = next_tokens
|
||||||
|
continue
|
||||||
|
|
||||||
if op in self.opc.CONST_OPS:
|
if op in self.opc.CONST_OPS:
|
||||||
const = co.co_consts[oparg]
|
const = co.co_consts[oparg]
|
||||||
# We can't use inspect.iscode() because we may be
|
# We can't use inspect.iscode() because we may be
|
||||||
|
@@ -24,8 +24,7 @@ scanner routine for Python 3.
|
|||||||
|
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
from uncompyle6.scanner import CONST_COLLECTIONS
|
from uncompyle6.scanner import CONST_COLLECTIONS, Token
|
||||||
from uncompyle6.scanners.tok import Token
|
|
||||||
from uncompyle6.scanners.scanner37base import Scanner37Base
|
from uncompyle6.scanners.scanner37base import Scanner37Base
|
||||||
|
|
||||||
# bytecode verification, verify(), uses JUMP_OPs from here
|
# bytecode verification, verify(), uses JUMP_OPs from here
|
||||||
|
Reference in New Issue
Block a user