Merge branch 'python-3.3-to-3.5' into python-2.4

This commit is contained in:
rocky
2022-04-26 02:46:29 -04:00
6 changed files with 243 additions and 51 deletions

Binary file not shown.

View File

@@ -816,6 +816,22 @@ class Python3Parser(PythonParser):
rule = "starred ::= %s %s" % ("expr " * v, opname) rule = "starred ::= %s %s" % ("expr " * v, opname)
self.addRule(rule, nop_func) self.addRule(rule, nop_func)
elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
if opname == "BUILD_CONST_DICT":
rule = """
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts %s
dict ::= const_list
expr ::= dict
""" % opname
else:
rule = """
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts %s
expr ::= const_list
""" % opname
self.addRule(rule, nop_func)
elif opname_base in ( elif opname_base in (
"BUILD_LIST", "BUILD_LIST",
"BUILD_SET", "BUILD_SET",

View File

@@ -40,16 +40,17 @@ if PYTHON_VERSION_TRIPLE < (2, 6):
else: else:
from collections import namedtuple from collections import namedtuple
from xdis import iscode, instruction_size from xdis import iscode, instruction_size, Instruction
from xdis.bytecode import _get_const_info from xdis.bytecode import _get_const_info
from uncompyle6.scanner import Token, parse_fn_counts from uncompyle6.scanners.tok import Token
from uncompyle6.scanner import parse_fn_counts
import xdis import xdis
# Get all the opcodes into globals # Get all the opcodes into globals
import xdis.opcodes.opcode_33 as op3 import xdis.opcodes.opcode_33 as op3
from uncompyle6.scanner import Scanner from uncompyle6.scanner import Scanner, CONST_COLLECTIONS
import sys import sys
@@ -207,17 +208,96 @@ class Scanner3(Scanner):
# self.varargs_ops = frozenset(self.opc.hasvargs) # self.varargs_ops = frozenset(self.opc.hasvargs)
return return
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def bound_collection_from_inst(
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
):
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return None
collection_start = i - count
for j in range(collection_start, i):
if insts[j].opname not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return None
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = insts[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset= "%s_0" % start_offset,
linestart=False,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=insts[j].argval,
pattr=insts[j].argrepr,
offset=insts[j].offset,
linestart=insts[j].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
new_tokens.append(
Token(
opname="BUILD_%s" % collection_type,
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
)
)
return new_tokens
def ingest(self, co, classname=None, code_objects={}, show_asm=None
):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load - various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures - COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
- some EXTENDED_ARGS instructions are removed * BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
@@ -237,9 +317,6 @@ class Scanner3(Scanner):
for instr in bytecode.get_instructions(co): for instr in bytecode.get_instructions(co):
print(instr.disassemble()) print(instr.disassemble())
# list of tokens/instructions
tokens = []
# "customize" is in the process of going away here # "customize" is in the process of going away here
customize = {} customize = {}
@@ -254,6 +331,7 @@ class Scanner3(Scanner):
n = len(self.insts) n = len(self.insts)
for i, inst in enumerate(self.insts): for i, inst in enumerate(self.insts):
opname = inst.opname
# We need to detect the difference between: # We need to detect the difference between:
# raise AssertionError # raise AssertionError
# and # and
@@ -264,7 +342,7 @@ class Scanner3(Scanner):
if self.version[:2] == (3, 0): if self.version[:2] == (3, 0):
# Like 2.6, 3.0 doesn't have POP_JUMP_IF... so we have # Like 2.6, 3.0 doesn't have POP_JUMP_IF... so we have
# to go through more machinations # to go through more machinations
assert_can_follow = inst.opname == "POP_TOP" and i + 1 < n assert_can_follow = opname == "POP_TOP" and i + 1 < n
if assert_can_follow: if assert_can_follow:
prev_inst = self.insts[i - 1] prev_inst = self.insts[i - 1]
assert_can_follow = ( assert_can_follow = (
@@ -273,7 +351,7 @@ class Scanner3(Scanner):
jump_if_inst = prev_inst jump_if_inst = prev_inst
else: else:
assert_can_follow = ( assert_can_follow = (
inst.opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE") opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
and i + 1 < n and i + 1 < n
) )
jump_if_inst = inst jump_if_inst = inst
@@ -297,13 +375,48 @@ class Scanner3(Scanner):
# print("XXX2", jump_targets) # print("XXX2", jump_targets)
last_op_was_break = False last_op_was_break = False
new_tokens = []
for i, inst in enumerate(self.insts): for i, inst in enumerate(self.insts):
opname = inst.opname
argval = inst.argval
pattr = inst.argrepr
t = Token(
opname=opname,
attr=argval,
pattr=pattr,
offset=inst.offset,
linestart=inst.starts_line,
op=inst.opcode,
has_arg=inst.has_arg,
has_extended_arg=inst.has_extended_arg,
opc=self.opc,
)
# things that smash new_tokens like BUILD_LIST have to come first.
if opname in (
"BUILD_CONST_KEY_MAP",
"BUILD_LIST",
"BUILD_SET",
):
collection_type = (
"DICT"
if opname.startswith("BUILD_CONST_KEY_MAP")
else opname.split("_")[1]
)
try_tokens = self.bound_collection_from_inst(
self.insts, new_tokens, inst, t, i, "CONST_%s" % collection_type
)
if try_tokens is not None:
new_tokens = try_tokens
continue
argval = inst.argval argval = inst.argval
op = inst.opcode op = inst.opcode
if inst.opname == "EXTENDED_ARG": if opname == "EXTENDED_ARG":
# FIXME: The EXTENDED_ARG is used to signal annotation # FIXME: The EXTENDED_ARG is used to signal annotation
# parameters # parameters
if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION: if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION:
@@ -319,18 +432,18 @@ class Scanner3(Scanner):
# "loop" tag last so the grammar rule matches that properly. # "loop" tag last so the grammar rule matches that properly.
for jump_offset in sorted(jump_targets[inst.offset], reverse=True): for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
come_from_name = "COME_FROM" come_from_name = "COME_FROM"
opname = self.opname_for_offset(jump_offset) come_from_opname = self.opname_for_offset(jump_offset)
if opname == "EXTENDED_ARG": if come_from_opname == "EXTENDED_ARG":
j = xdis.next_offset(op, self.opc, jump_offset) j = xdis.next_offset(op, self.opc, jump_offset)
opname = self.opname_for_offset(j) come_from_opname = self.opname_for_offset(j)
if opname.startswith("SETUP_"): if come_from_opname.startswith("SETUP_"):
come_from_type = opname[len("SETUP_") :] come_from_type = come_from_opname[len("SETUP_") :]
come_from_name = "COME_FROM_%s" % come_from_type come_from_name = "COME_FROM_%s" % come_from_type
pass pass
elif inst.offset in self.except_targets: elif inst.offset in self.except_targets:
come_from_name = "COME_FROM_EXCEPT_CLAUSE" come_from_name = "COME_FROM_EXCEPT_CLAUSE"
tokens.append( new_tokens.append(
Token( Token(
come_from_name, come_from_name,
jump_offset, jump_offset,
@@ -345,7 +458,7 @@ class Scanner3(Scanner):
pass pass
elif inst.offset in self.else_start: elif inst.offset in self.else_start:
end_offset = self.else_start[inst.offset] end_offset = self.else_start[inst.offset]
tokens.append( new_tokens.append(
Token( Token(
"ELSE", "ELSE",
None, None,
@@ -358,9 +471,6 @@ class Scanner3(Scanner):
pass pass
pattr = inst.argrepr
opname = inst.opname
if op in self.opc.CONST_OPS: if op in self.opc.CONST_OPS:
const = argval const = argval
if iscode(const): if iscode(const):
@@ -428,7 +538,7 @@ class Scanner3(Scanner):
pass pass
opname = "%s_%d" % (opname, pos_args) opname = "%s_%d" % (opname, pos_args)
attr = (pos_args, name_pair_args, annotate_args) attr = (pos_args, name_pair_args, annotate_args)
tokens.append( new_tokens.append(
Token( Token(
opname=opname, opname=opname,
attr=attr, attr=attr,
@@ -514,12 +624,12 @@ class Scanner3(Scanner):
# the "continue" is not on a new line. # the "continue" is not on a new line.
# There are other situations where we don't catch # There are other situations where we don't catch
# CONTINUE as well. # CONTINUE as well.
if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval: if new_tokens[-1].kind == "JUMP_BACK" and new_tokens[-1].attr <= argval:
if tokens[-2].kind == "BREAK_LOOP": if new_tokens[-2].kind == "BREAK_LOOP":
del tokens[-1] del new_tokens[-1]
else: else:
# intern is used because we are changing the *previous* token # intern is used because we are changing the *previous* token
tokens[-1].kind = intern("CONTINUE") new_tokens[-1].kind = intern("CONTINUE")
if last_op_was_break and opname == "CONTINUE": if last_op_was_break and opname == "CONTINUE":
last_op_was_break = False last_op_was_break = False
continue continue
@@ -533,25 +643,17 @@ class Scanner3(Scanner):
opname = "LOAD_ASSERT" opname = "LOAD_ASSERT"
last_op_was_break = opname == "BREAK_LOOP" last_op_was_break = opname == "BREAK_LOOP"
tokens.append( t.kind = opname
Token( t.attr = argval
opname=opname, t.pattr = pattr
attr=argval, new_tokens.append(t)
pattr=pattr,
offset=inst.offset,
linestart=inst.starts_line,
op=op,
has_arg=inst.has_arg,
opc=self.opc,
)
)
pass pass
if show_asm in ("both", "after"): if show_asm in ("both", "after"):
for t in tokens: for t in new_tokens:
print(t.format(line_prefix="")) print(t.format(line_prefix=""))
print() print()
return tokens, customize return new_tokens, customize
def find_jump_targets(self, debug): def find_jump_targets(self, debug):
""" """

View File

@@ -22,6 +22,8 @@ This sets up opcodes Python's 3.7 and calls a generalized
scanner routine for Python 3. scanner routine for Python 3.
""" """
from uncompyle6.scanner import CONST_COLLECTIONS
from uncompyle6.scanners.tok import Token
from uncompyle6.scanners.scanner37base import Scanner37Base from uncompyle6.scanners.scanner37base import Scanner37Base
# bytecode verification, verify(), uses JUMP_OPs from here # bytecode verification, verify(), uses JUMP_OPs from here
@@ -30,9 +32,6 @@ from xdis.opcodes import opcode_37 as opc
# bytecode verification, verify(), uses JUMP_OPS from here # bytecode verification, verify(), uses JUMP_OPS from here
JUMP_OPs = opc.JUMP_OPS JUMP_OPs = opc.JUMP_OPS
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
class Scanner37(Scanner37Base): class Scanner37(Scanner37Base):
def __init__(self, show_asm=None, is_pypy=False): def __init__(self, show_asm=None, is_pypy=False):
Scanner37Base.__init__(self, (3, 7), show_asm) Scanner37Base.__init__(self, (3, 7), show_asm)
@@ -41,6 +40,81 @@ class Scanner37(Scanner37Base):
pass pass
def bound_collection_from_tokens(
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
) -> list:
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return next_tokens + [t]
collection_start = i - count
for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return next_tokens + [t]
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset="%s_0" % start_offset,
linestart=False,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
linestart=tokens[j].linestart,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
new_tokens.append(
Token(
opname="BUILD_%s" % collection_type,
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
)
)
return new_tokens
def ingest( def ingest(
self, co, classname=None, code_objects={}, show_asm=None self, co, classname=None, code_objects={}, show_asm=None
): ):

View File

@@ -227,7 +227,7 @@ class NonterminalActions:
self.indent_more(INDENT_PER_LEVEL) self.indent_more(INDENT_PER_LEVEL)
sep = "" sep = ""
if is_dict: if is_dict:
keys = flat_elems[-1].pattr keys = flat_elems[-1].attr
assert isinstance(keys, tuple) assert isinstance(keys, tuple)
assert len(keys) == len(flat_elems) - 1 assert len(keys) == len(flat_elems) - 1
for i, elem in enumerate(flat_elems[:-1]): for i, elem in enumerate(flat_elems[:-1]):
@@ -724,8 +724,8 @@ class NonterminalActions:
def n_import_from(self, node): def n_import_from(self, node):
relative_path_index = 0 relative_path_index = 0
if self.version >= (2, 5): if self.version >= (2, 5):
if node[relative_path_index].pattr > 0: if node[relative_path_index].attr > 0:
node[2].pattr = ("." * node[relative_path_index].pattr) + node[2].pattr node[2].pattr = ("." * node[relative_path_index].attr) + node[2].pattr
if self.version > (2, 7): if self.version > (2, 7):
if isinstance(node[1].pattr, tuple): if isinstance(node[1].pattr, tuple):
imports = node[1].pattr imports = node[1].pattr