Merge branch 'master' into python-3.3-to-3.5

This commit is contained in:
rocky
2022-04-27 04:00:21 -04:00
9 changed files with 150 additions and 8 deletions

Binary file not shown.

View File

@@ -31,8 +31,6 @@ import xdis.magics as magics
# ----- configure this for your needs # ----- configure this for your needs
python_versions = [v for v in magics.python_versions if re.match("^[0-9.]+$", v)] python_versions = [v for v in magics.python_versions if re.match("^[0-9.]+$", v)]
print(python_versions)
sys.exit(0)
# FIXME: we should remove Python versions that we don't support. # FIXME: we should remove Python versions that we don't support.
# These include Jython, and Python bytecode changes pre release. # These include Jython, and Python bytecode changes pre release.

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
# Mode: -*- python -*- # Mode: -*- python -*-
# #
# Copyright (c) 2015-2016, 2018, 2020 by Rocky Bernstein <rb@dustyfeet.com> # Copyright (c) 2015-2016, 2018, 2020, 2022 by Rocky Bernstein <rb@dustyfeet.com>
# #
from __future__ import print_function from __future__ import print_function
import sys, os, getopt import sys, os, getopt
@@ -16,11 +16,18 @@ Usage:
{0} [OPTIONS]... FILE {0} [OPTIONS]... FILE
{0} [--help | -h | -V | --version] {0} [--help | -h | -V | --version]
Disassemble FILE with the instruction mangling that is done to Disassemble/Tokenize FILE with in the way that is done to
assist uncompyle6 in parsing the instruction stream. For example assist uncompyle6 in parsing the instruction stream. For example
instructions with variable-length arguments like CALL_FUNCTION and instructions with variable-length arguments like CALL_FUNCTION and
BUILD_LIST have argument counts appended to the instruction name, and BUILD_LIST have argument counts appended to the instruction name, and
COME_FROM instructions are inserted into the instruction stream. COME_FROM psuedo instructions are inserted into the instruction stream.
Bit flag values encoded in an operand are expanding, EXTENDED_ARG
value are folded into the following instruction operand.
Like the parser, you may find this more high-level and or helpful.
However if you want a true disassembler see the Standard built-in
Python library module "dis", or pydisasm from the cross-version
Python bytecode package "xdis".
Examples: Examples:
{0} foo.pyc {0} foo.pyc

View File

@@ -61,6 +61,7 @@ class PythonParser(GenericASTBuilder):
"except_stmts", "except_stmts",
"exprlist", "exprlist",
"importlist", "importlist",
"key_value_pairs",
"kvlist", "kvlist",
"kwargs", "kwargs",
"l_stmts", "l_stmts",

View File

@@ -748,6 +748,12 @@ class Python3Parser(PythonParser):
kvlist_n = "expr " * (token.attr) kvlist_n = "expr " * (token.attr)
rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname) rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
self.addRule(rule, nop_func) self.addRule(rule, nop_func)
elif opname.startswith("BUILD_DICT_OLDER"):
rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER
key_value_pairs ::= key_value_pair+
key_value_pair ::= ADD_KEY ADD_VALUE
"""
self.addRule(rule, nop_func)
elif opname.startswith("BUILD_LIST_UNPACK"): elif opname.startswith("BUILD_LIST_UNPACK"):
v = token.attr v = token.attr
rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname) rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname)

View File

@@ -84,7 +84,7 @@ def long(num):
return num return num
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT") CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT", "CONST_MAP")
class Code(object): class Code(object):

View File

@@ -208,6 +208,10 @@ class Scanner3(Scanner):
def bound_collection_from_inst( def bound_collection_from_inst(
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
): ):
"""
Try to a sequence of instruction that ends with a BUILD_xxx into a sequence that can
be parsed much faster, but inserting the token boundary at the beginning of the sequence.
"""
count = t.attr count = t.attr
assert isinstance(count, int) assert isinstance(count, int)
@@ -228,10 +232,13 @@ class Scanner3(Scanner):
for j in range(collection_start, i): for j in range(collection_start, i):
if insts[j].opname not in ( if insts[j].opname not in (
"LOAD_ASSERT",
"LOAD_CODE",
"LOAD_CONST", "LOAD_CONST",
"LOAD_FAST", "LOAD_FAST",
"LOAD_GLOBAL", "LOAD_GLOBAL",
"LOAD_NAME", "LOAD_NAME",
"LOAD_STR",
): ):
return None return None
@@ -280,6 +287,94 @@ class Scanner3(Scanner):
) )
return new_tokens return new_tokens
def bound_map_from_inst(
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int):
"""
Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can
be parsed much faster, but inserting the token boundary at the beginning of the sequence.
"""
count = t.attr
assert isinstance(count, int)
if count > i:
return None
# For small lists don't bother
if count < 5:
return None
collection_start = i - (count * 2)
assert (count * 2) <= i
for j in range(collection_start, i, 2):
if insts[j].opname not in (
"LOAD_CONST",
):
return None
if insts[j+1].opname not in (
"LOAD_CONST",
):
return None
collection_start = i - (2 * count)
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
# If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-(2*count)]
start_offset = insts[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr="CONST_MAP",
offset="%s_0" % start_offset,
linestart=False,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
for j in range(collection_start, i, 2):
new_tokens.append(
Token(
opname="ADD_KEY",
attr=insts[j].argval,
pattr=insts[j].argrepr,
offset=insts[j].offset,
linestart=insts[j].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=insts[j+1].argval,
pattr=insts[j+1].argrepr,
offset=insts[j+1].offset,
linestart=insts[j+1].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
)
)
new_tokens.append(
Token(
opname="BUILD_DICT_OLDER",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
)
)
return new_tokens
def ingest(self, co, classname=None, code_objects={}, show_asm=None def ingest(self, co, classname=None, code_objects={}, show_asm=None
): ):
""" """
@@ -406,6 +501,15 @@ class Scanner3(Scanner):
if try_tokens is not None: if try_tokens is not None:
new_tokens = try_tokens new_tokens = try_tokens
continue continue
elif opname in (
"BUILD_MAP",
):
try_tokens = self.bound_map_from_inst(
self.insts, new_tokens, inst, t, i,
)
if try_tokens is not None:
new_tokens = try_tokens
continue
argval = inst.argval argval = inst.argval
op = inst.opcode op = inst.opcode

View File

@@ -63,10 +63,12 @@ class Scanner37(Scanner37Base):
for j in range(collection_start, i): for j in range(collection_start, i):
if tokens[j].kind not in ( if tokens[j].kind not in (
"LOAD_CODE",
"LOAD_CONST", "LOAD_CONST",
"LOAD_FAST", "LOAD_FAST",
"LOAD_GLOBAL", "LOAD_GLOBAL",
"LOAD_NAME", "LOAD_NAME",
"LOAD_STR",
): ):
return next_tokens + [t] return next_tokens + [t]

View File

@@ -244,8 +244,6 @@ class NonterminalActions:
sep = "," sep = ","
else: else:
for elem in flat_elems: for elem in flat_elems:
if elem.kind != "ADD_VALUE":
from trepan.api import debug; debug()
assert elem.kind == "ADD_VALUE" assert elem.kind == "ADD_VALUE"
value = elem.pattr value = elem.pattr
if elem.linestart is not None: if elem.linestart is not None:
@@ -395,6 +393,32 @@ class NonterminalActions:
template = ("**%C", (0, kwargs, ", **")) template = ("**%C", (0, kwargs, ", **"))
self.template_engine(template, node) self.template_engine(template, node)
sep = "" sep = ""
if node[0].kind == "COLLECTION_START":
key_value_pairs = node[1]
for key_value_pair in key_value_pairs:
key, value = key_value_pair
if key.linestart is not None:
line_number = key.linestart
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
self.line_number = line_number
self.write(sep)
self.write(key.pattr)
self.write(": ")
if value.linestart is not None:
line_number = value.linestart
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
self.line_number = line_number
else:
sep += " "
pass
self.write(value.pattr)
sep = ", "
pass
if sep.startswith(",\n"):
self.write(sep[1:])
pass
pass pass
else: else: