merge from master

This commit is contained in:
rocky
2022-05-14 09:40:53 -04:00
parent 4f6d3a3d7e
commit b8856993d2
6 changed files with 65 additions and 43 deletions

View File

@@ -64,7 +64,7 @@ class Python14Parser(Python15Parser):
if opname_base == "UNPACK_VARARG":
if token.attr > 1:
self.addRule(f"star_args ::= RESERVE_FAST {opname} args_store", nop_func)
self.addRule("star_args ::= RESERVE_FAST %s args_store" % opname, nop_func)
def reduce_is_invalid(self, rule, ast, tokens, first, last):

View File

@@ -127,8 +127,8 @@ class Scanner(object):
# FIXME: This weird Python2 behavior is not Python3
self.resetTokenClass()
def bound_collection(
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
def bound_collection_from_tokens(
self, tokens, t, i, collection_type
):
count = t.attr
assert isinstance(count, int)
@@ -144,7 +144,7 @@ class Scanner(object):
# For small lists don't bother
if count < 5:
return next_tokens + [t]
return None
collection_start = i - count
@@ -155,13 +155,13 @@ class Scanner(object):
"LOAD_GLOBAL",
"LOAD_NAME",
):
return next_tokens + [t]
return None
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
new_tokens = tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(

View File

@@ -200,6 +200,7 @@ class Scanner2(Scanner):
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
if not show_asm:
show_asm = self.show_asm
@@ -1441,3 +1442,15 @@ class Scanner2(Scanner):
instr_offsets = filtered
filtered = []
return instr_offsets
if __name__ == "__main__":
import inspect
from xdis.version_info import PYTHON_VERSION_TRIPLE
co = inspect.currentframe().f_code
tokens, customize = Scanner2(PYTHON_VERSION_TRIPLE).ingest(co)
for t in tokens:
print(t)
pass

View File

@@ -26,6 +26,7 @@ import sys
import uncompyle6.scanners.scanner2 as scan
# bytecode verification, verify(), uses JUMP_OPs from here
from xdis import iscode
from xdis.opcodes import opcode_26
from xdis.bytecode import _get_const_info
@@ -72,7 +73,7 @@ class Scanner26(scan.Scanner2):
bytecode = self.build_instructions(co)
# show_asm = 'after'
if show_asm in ('both', 'before'):
if show_asm in ("both", "before"):
for instr in bytecode.get_instructions(co):
print(instr.disassemble())
@@ -81,7 +82,7 @@ class Scanner26(scan.Scanner2):
customize = {}
if self.is_pypy:
customize['PyPy'] = 1
customize["PyPy"] = 0
codelen = len(self.code)
@@ -93,6 +94,7 @@ class Scanner26(scan.Scanner2):
# 'LOAD_ASSERT' is used in assert statements.
self.load_asserts = set()
for i in self.op_range(0, codelen):
# We need to detect the difference between:
# raise AssertionError
# and
@@ -115,9 +117,9 @@ class Scanner26(scan.Scanner2):
# Distinguish "print ..." from "print ...,"
if self.code[last_stmt] == self.opc.PRINT_ITEM:
if self.code[i] == self.opc.PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
replace[i] = "PRINT_ITEM_CONT"
elif self.code[i] == self.opc.PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
replace[i] = "PRINT_NEWLINE_CONT"
last_stmt = i
i = self.next_stmt[i]
@@ -181,29 +183,25 @@ class Scanner26(scan.Scanner2):
if op in self.opc.CONST_OPS:
const = co.co_consts[oparg]
# We can't use inspect.iscode() because we may be
# using a different version of Python than the
# one that this was byte-compiled on. So the code
# types may mismatch.
if hasattr(const, 'co_name'):
if iscode(const):
oparg = const
if const.co_name == '<lambda>':
assert op_name == 'LOAD_CONST'
op_name = 'LOAD_LAMBDA'
if const.co_name == "<lambda>":
assert op_name == "LOAD_CONST"
op_name = "LOAD_LAMBDA"
elif const.co_name == self.genexpr_name:
op_name = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP'
op_name = "LOAD_GENEXPR"
elif const.co_name == "<dictcomp>":
op_name = "LOAD_DICTCOMP"
elif const.co_name == "<setcomp>":
op_name = "LOAD_SETCOMP"
else:
op_name = "LOAD_CODE"
# verify uses 'pattr' for comparison, since 'attr'
# verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' % \
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
pattr = "<code_object " + const.co_name + ">"
else:
if oparg < len(co.co_consts):
argval, _ = _get_const_info(oparg, co.co_consts)
@@ -235,6 +233,7 @@ class Scanner26(scan.Scanner2):
pattr = self.opc.cmp_op[oparg]
elif op in self.opc.FREE_OPS:
pattr = free[oparg]
if op in self.varargs_ops:
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
@@ -285,25 +284,36 @@ class Scanner26(scan.Scanner2):
elif op == self.opc.LOAD_GLOBAL:
if offset in self.load_asserts:
op_name = 'LOAD_ASSERT'
op_name = "LOAD_ASSERT"
elif op == self.opc.RETURN_VALUE:
if offset in self.return_end_ifs:
op_name = 'RETURN_END_IF'
op_name = "RETURN_END_IF"
linestart = self.linestarts.get(offset, None)
if offset not in replace:
tokens.append(Token(
op_name, oparg, pattr, offset, linestart, op,
has_arg, self.opc))
tokens.append(
Token(
op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc
)
)
else:
tokens.append(Token(
replace[offset], oparg, pattr, offset, linestart, op,
has_arg, self.opc))
tokens.append(
Token(
replace[offset],
oparg,
pattr,
offset,
linestart,
op,
has_arg,
self.opc,
)
)
pass
pass
if show_asm in ('both', 'after'):
if show_asm in ("both", "after"):
for t in tokens:
print(t.format(line_prefix=""))
print()

View File

@@ -156,7 +156,7 @@ class Scanner37(Scanner37Base):
if t.kind.startswith("BUILD_CONST_KEY_MAP")
else t.kind.split("_")[1]
)
new_tokens = self.bound_collection(
new_tokens = self.bound_collection_from_tokens(
tokens, new_tokens, t, i, "CONST_%s" % collection_type
)
continue

View File

@@ -22,8 +22,6 @@ This sets up opcodes Python's 3.8 and calls a generalized
scanner routine for Python 3.7 and up.
"""
from typing import Dict, Tuple
from uncompyle6.scanners.tok import off2int
from uncompyle6.scanners.scanner37 import Scanner37
from uncompyle6.scanners.scanner37base import Scanner37Base
@@ -45,7 +43,7 @@ class Scanner38(Scanner37):
def ingest(
self, co, classname=None, code_objects={}, show_asm=None
) -> Tuple[list, dict]:
) -> tuple:
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
@@ -73,7 +71,7 @@ class Scanner38(Scanner37):
# The value is where the loop ends. In current Python,
# JUMP_BACKS are always to loops. And blocks are ordered so that the
# JUMP_BACK with the highest offset will be where the range ends.
jump_back_targets: Dict[int, int] = {}
jump_back_targets = {}
for token in tokens:
if token.kind == "JUMP_BACK":
jump_back_targets[token.attr] = token.offset
@@ -92,7 +90,7 @@ class Scanner38(Scanner37):
if offset == next_end:
loop_ends.pop()
if self.debug:
print(f"{' ' * len(loop_ends)}remove loop offset {offset}")
print("%sremove loop offset %s" % (" " * len(loop_ends), offset))
pass
next_end = (
loop_ends[-1]
@@ -106,7 +104,8 @@ class Scanner38(Scanner37):
next_end = off2int(jump_back_targets[offset], prefer_last=False)
if self.debug:
print(
f"{' ' * len(loop_ends)}adding loop offset {offset} ending at {next_end}"
"%sadding loop offset %s ending at %s"
% (" " * len(loop_ends), offset, next_end)
)
loop_ends.append(next_end)
@@ -165,4 +164,4 @@ if __name__ == "__main__":
print(t.format())
pass
else:
print(f"Need to be Python 3.8 to demo; I am version {version_tuple_to_str()}.")
print("Need to be Python 3.8 to demo; I am version %s." % version_tuple_to_str())