Merge branch 'python-3.3-to-3.5' into python-2.4

This commit is contained in:
rocky
2022-04-26 02:46:29 -04:00
6 changed files with 243 additions and 51 deletions

Binary file not shown.

View File

@@ -816,6 +816,22 @@ class Python3Parser(PythonParser):
rule = "starred ::= %s %s" % ("expr " * v, opname)
self.addRule(rule, nop_func)
elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
if opname == "BUILD_CONST_DICT":
rule = """
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts %s
dict ::= const_list
expr ::= dict
""" % opname
else:
rule = """
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts %s
expr ::= const_list
""" % opname
self.addRule(rule, nop_func)
elif opname_base in (
"BUILD_LIST",
"BUILD_SET",

View File

@@ -40,16 +40,17 @@ if PYTHON_VERSION_TRIPLE < (2, 6):
else:
from collections import namedtuple
from xdis import iscode, instruction_size
from xdis import iscode, instruction_size, Instruction
from xdis.bytecode import _get_const_info
from uncompyle6.scanner import Token, parse_fn_counts
from uncompyle6.scanners.tok import Token
from uncompyle6.scanner import parse_fn_counts
import xdis
# Get all the opcodes into globals
import xdis.opcodes.opcode_33 as op3
from uncompyle6.scanner import Scanner
from uncompyle6.scanner import Scanner, CONST_COLLECTIONS
import sys
@@ -207,17 +208,96 @@ class Scanner3(Scanner):
# self.varargs_ops = frozenset(self.opc.hasvargs)
return
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
def bound_collection_from_inst(
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
):
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return None
collection_start = i - count
for j in range(collection_start, i):
if insts[j].opname not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return None
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = insts[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset= "%s_0" % start_offset,
linestart=False,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=insts[j].argval,
pattr=insts[j].argrepr,
offset=insts[j].offset,
linestart=insts[j].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
new_tokens.append(
Token(
opname="BUILD_%s" % collection_type,
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
)
)
return new_tokens
def ingest(self, co, classname=None, code_objects={}, show_asm=None
):
"""
Pick out tokens from an uncompyle6 code object, and transform them,
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar.
Specificially:
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- some EXTENDED_ARGS instructions are removed
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
@@ -237,9 +317,6 @@ class Scanner3(Scanner):
for instr in bytecode.get_instructions(co):
print(instr.disassemble())
# list of tokens/instructions
tokens = []
# "customize" is in the process of going away here
customize = {}
@@ -254,6 +331,7 @@ class Scanner3(Scanner):
n = len(self.insts)
for i, inst in enumerate(self.insts):
opname = inst.opname
# We need to detect the difference between:
# raise AssertionError
# and
@@ -264,7 +342,7 @@ class Scanner3(Scanner):
if self.version[:2] == (3, 0):
# Like 2.6, 3.0 doesn't have POP_JUMP_IF... so we have
# to go through more machinations
assert_can_follow = inst.opname == "POP_TOP" and i + 1 < n
assert_can_follow = opname == "POP_TOP" and i + 1 < n
if assert_can_follow:
prev_inst = self.insts[i - 1]
assert_can_follow = (
@@ -273,7 +351,7 @@ class Scanner3(Scanner):
jump_if_inst = prev_inst
else:
assert_can_follow = (
inst.opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
and i + 1 < n
)
jump_if_inst = inst
@@ -297,13 +375,48 @@ class Scanner3(Scanner):
# print("XXX2", jump_targets)
last_op_was_break = False
new_tokens = []
for i, inst in enumerate(self.insts):
opname = inst.opname
argval = inst.argval
pattr = inst.argrepr
t = Token(
opname=opname,
attr=argval,
pattr=pattr,
offset=inst.offset,
linestart=inst.starts_line,
op=inst.opcode,
has_arg=inst.has_arg,
has_extended_arg=inst.has_extended_arg,
opc=self.opc,
)
# things that smash new_tokens like BUILD_LIST have to come first.
if opname in (
"BUILD_CONST_KEY_MAP",
"BUILD_LIST",
"BUILD_SET",
):
collection_type = (
"DICT"
if opname.startswith("BUILD_CONST_KEY_MAP")
else opname.split("_")[1]
)
try_tokens = self.bound_collection_from_inst(
self.insts, new_tokens, inst, t, i, "CONST_%s" % collection_type
)
if try_tokens is not None:
new_tokens = try_tokens
continue
argval = inst.argval
op = inst.opcode
if inst.opname == "EXTENDED_ARG":
if opname == "EXTENDED_ARG":
# FIXME: The EXTENDED_ARG is used to signal annotation
# parameters
if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION:
@@ -319,18 +432,18 @@ class Scanner3(Scanner):
# "loop" tag last so the grammar rule matches that properly.
for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
come_from_name = "COME_FROM"
opname = self.opname_for_offset(jump_offset)
if opname == "EXTENDED_ARG":
come_from_opname = self.opname_for_offset(jump_offset)
if come_from_opname == "EXTENDED_ARG":
j = xdis.next_offset(op, self.opc, jump_offset)
opname = self.opname_for_offset(j)
come_from_opname = self.opname_for_offset(j)
if opname.startswith("SETUP_"):
come_from_type = opname[len("SETUP_") :]
if come_from_opname.startswith("SETUP_"):
come_from_type = come_from_opname[len("SETUP_") :]
come_from_name = "COME_FROM_%s" % come_from_type
pass
elif inst.offset in self.except_targets:
come_from_name = "COME_FROM_EXCEPT_CLAUSE"
tokens.append(
new_tokens.append(
Token(
come_from_name,
jump_offset,
@@ -345,7 +458,7 @@ class Scanner3(Scanner):
pass
elif inst.offset in self.else_start:
end_offset = self.else_start[inst.offset]
tokens.append(
new_tokens.append(
Token(
"ELSE",
None,
@@ -358,9 +471,6 @@ class Scanner3(Scanner):
pass
pattr = inst.argrepr
opname = inst.opname
if op in self.opc.CONST_OPS:
const = argval
if iscode(const):
@@ -428,7 +538,7 @@ class Scanner3(Scanner):
pass
opname = "%s_%d" % (opname, pos_args)
attr = (pos_args, name_pair_args, annotate_args)
tokens.append(
new_tokens.append(
Token(
opname=opname,
attr=attr,
@@ -514,12 +624,12 @@ class Scanner3(Scanner):
# the "continue" is not on a new line.
# There are other situations where we don't catch
# CONTINUE as well.
if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval:
if tokens[-2].kind == "BREAK_LOOP":
del tokens[-1]
if new_tokens[-1].kind == "JUMP_BACK" and new_tokens[-1].attr <= argval:
if new_tokens[-2].kind == "BREAK_LOOP":
del new_tokens[-1]
else:
# intern is used because we are changing the *previous* token
tokens[-1].kind = intern("CONTINUE")
new_tokens[-1].kind = intern("CONTINUE")
if last_op_was_break and opname == "CONTINUE":
last_op_was_break = False
continue
@@ -533,25 +643,17 @@ class Scanner3(Scanner):
opname = "LOAD_ASSERT"
last_op_was_break = opname == "BREAK_LOOP"
tokens.append(
Token(
opname=opname,
attr=argval,
pattr=pattr,
offset=inst.offset,
linestart=inst.starts_line,
op=op,
has_arg=inst.has_arg,
opc=self.opc,
)
)
t.kind = opname
t.attr = argval
t.pattr = pattr
new_tokens.append(t)
pass
if show_asm in ("both", "after"):
for t in tokens:
for t in new_tokens:
print(t.format(line_prefix=""))
print()
return tokens, customize
return new_tokens, customize
def find_jump_targets(self, debug):
"""

View File

@@ -22,6 +22,8 @@ This sets up opcodes Python's 3.7 and calls a generalized
scanner routine for Python 3.
"""
from uncompyle6.scanner import CONST_COLLECTIONS
from uncompyle6.scanners.tok import Token
from uncompyle6.scanners.scanner37base import Scanner37Base
# bytecode verification, verify(), uses JUMP_OPs from here
@@ -30,9 +32,6 @@ from xdis.opcodes import opcode_37 as opc
# bytecode verification, verify(), uses JUMP_OPS from here
JUMP_OPs = opc.JUMP_OPS
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
class Scanner37(Scanner37Base):
def __init__(self, show_asm=None, is_pypy=False):
Scanner37Base.__init__(self, (3, 7), show_asm)
@@ -41,6 +40,81 @@ class Scanner37(Scanner37Base):
pass
def bound_collection_from_tokens(
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
) -> list:
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return next_tokens + [t]
collection_start = i - count
for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return next_tokens + [t]
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset="%s_0" % start_offset,
linestart=False,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
linestart=tokens[j].linestart,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
new_tokens.append(
Token(
opname="BUILD_%s" % collection_type,
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
)
)
return new_tokens
def ingest(
self, co, classname=None, code_objects={}, show_asm=None
):

View File

@@ -227,7 +227,7 @@ class NonterminalActions:
self.indent_more(INDENT_PER_LEVEL)
sep = ""
if is_dict:
keys = flat_elems[-1].pattr
keys = flat_elems[-1].attr
assert isinstance(keys, tuple)
assert len(keys) == len(flat_elems) - 1
for i, elem in enumerate(flat_elems[:-1]):
@@ -724,8 +724,8 @@ class NonterminalActions:
def n_import_from(self, node):
relative_path_index = 0
if self.version >= (2, 5):
if node[relative_path_index].pattr > 0:
node[2].pattr = ("." * node[relative_path_index].pattr) + node[2].pattr
if node[relative_path_index].attr > 0:
node[2].pattr = ("." * node[relative_path_index].attr) + node[2].pattr
if self.version > (2, 7):
if isinstance(node[1].pattr, tuple):
imports = node[1].pattr