2.6 scanner show -A headers now

This commit is contained in:
rocky
2024-03-13 21:39:35 -04:00
parent bf59e3c65e
commit daf54d2740

View File

@@ -23,27 +23,31 @@ use in deparsing.
""" """
import sys import sys
import uncompyle6.scanners.scanner2 as scan
# bytecode verification, verify(), uses JUMP_OPs from here # bytecode verification, verify(), uses JUMP_OPs from here
from xdis import iscode from xdis import iscode
from xdis.opcodes import opcode_26
from xdis.bytecode import _get_const_info from xdis.bytecode import _get_const_info
from xdis.opcodes import opcode_26
import uncompyle6.scanners.scanner2 as scan
from uncompyle6.scanner import Token from uncompyle6.scanner import Token
intern = sys.intern intern = sys.intern
JUMP_OPS = opcode_26.JUMP_OPS JUMP_OPS = opcode_26.JUMP_OPS
class Scanner26(scan.Scanner2): class Scanner26(scan.Scanner2):
def __init__(self, show_asm=False): def __init__(self, show_asm=False):
super(Scanner26, self).__init__((2, 6), show_asm) super(Scanner26, self).__init__((2, 6), show_asm)
# "setup" opcodes # "setup" opcodes
self.setup_ops = frozenset([ self.setup_ops = frozenset(
self.opc.SETUP_EXCEPT, self.opc.SETUP_FINALLY, [
]) self.opc.SETUP_EXCEPT,
self.opc.SETUP_FINALLY,
]
)
return return
@@ -76,8 +80,9 @@ class Scanner26(scan.Scanner2):
# show_asm = 'after' # show_asm = 'after'
if show_asm in ("both", "before"): if show_asm in ("both", "before"):
print("\n# ---- before tokenization:")
for instr in bytecode.get_instructions(co): for instr in bytecode.get_instructions(co):
print(instr.disassemble()) print(instr.disassemble(self.opc))
# Container for tokens # Container for tokens
tokens = [] tokens = []
@@ -96,17 +101,18 @@ class Scanner26(scan.Scanner2):
# 'LOAD_ASSERT' is used in assert statements. # 'LOAD_ASSERT' is used in assert statements.
self.load_asserts = set() self.load_asserts = set()
for i in self.op_range(0, codelen): for i in self.op_range(0, codelen):
# We need to detect the difference between: # We need to detect the difference between:
# raise AssertionError # raise AssertionError
# and # and
# assert ... # assert ...
if (self.code[i] == self.opc.JUMP_IF_TRUE and if (
i + 4 < codelen and self.code[i] == self.opc.JUMP_IF_TRUE
self.code[i+3] == self.opc.POP_TOP and and i + 4 < codelen
self.code[i+4] == self.opc.LOAD_GLOBAL): and self.code[i + 3] == self.opc.POP_TOP
if names[self.get_argument(i+4)] == 'AssertionError': and self.code[i + 4] == self.opc.LOAD_GLOBAL
self.load_asserts.add(i+4) ):
if names[self.get_argument(i + 4)] == "AssertionError":
self.load_asserts.add(i + 4)
jump_targets = self.find_jump_targets(show_asm) jump_targets = self.find_jump_targets(show_asm)
# contains (code, [addrRefToCode]) # contains (code, [addrRefToCode])
@@ -131,7 +137,8 @@ class Scanner26(scan.Scanner2):
i += 1 i += 1
op = self.code[offset] op = self.code[offset]
op_name = self.opname[op] op_name = self.opname[op]
oparg = None; pattr = None oparg = None
pattr = None
if offset in jump_targets: if offset in jump_targets:
jump_idx = 0 jump_idx = 0
@@ -142,28 +149,37 @@ class Scanner26(scan.Scanner2):
# properly. For example, a "loop" with an "if" nested in it should have the # properly. For example, a "loop" with an "if" nested in it should have the
# "loop" tag last so the grammar rule matches that properly. # "loop" tag last so the grammar rule matches that properly.
last_jump_offset = -1 last_jump_offset = -1
for jump_offset in sorted(jump_targets[offset], reverse=True): for jump_offset in sorted(jump_targets[offset], reverse=True):
if jump_offset != last_jump_offset: if jump_offset != last_jump_offset:
tokens.append(Token( tokens.append(
'COME_FROM', jump_offset, repr(jump_offset), Token(
offset="%s_%d" % (offset, jump_idx), "COME_FROM",
has_arg = True)) jump_offset,
repr(jump_offset),
offset="%s_%d" % (offset, jump_idx),
has_arg=True,
)
)
jump_idx += 1 jump_idx += 1
last_jump_offset = jump_offset last_jump_offset = jump_offset
elif offset in self.thens: elif offset in self.thens:
tokens.append(Token( tokens.append(
'THEN', None, self.thens[offset], Token(
offset="%s_0" % offset, "THEN",
has_arg = True)) None,
self.thens[offset],
offset="%s_0" % offset,
has_arg=True,
)
)
has_arg = (op >= self.opc.HAVE_ARGUMENT) has_arg = op >= self.opc.HAVE_ARGUMENT
if has_arg: if has_arg:
oparg = self.get_argument(offset) + extended_arg oparg = self.get_argument(offset) + extended_arg
extended_arg = 0 extended_arg = 0
if op == self.opc.EXTENDED_ARG: if op == self.opc.EXTENDED_ARG:
extended_arg += self.extended_arg_val(oparg) extended_arg += self.extended_arg_val(oparg)
continue continue
# Note: name used to match on rather than op since # Note: name used to match on rather than op since
# BUILD_SET isn't in earlier Pythons. # BUILD_SET isn't in earlier Pythons.
@@ -172,7 +188,14 @@ class Scanner26(scan.Scanner2):
"BUILD_SET", "BUILD_SET",
): ):
t = Token( t = Token(
op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc op_name,
oparg,
pattr,
offset,
self.linestarts.get(offset, None),
op,
has_arg,
self.opc,
) )
collection_type = op_name.split("_")[1] collection_type = op_name.split("_")[1]
@@ -221,8 +244,8 @@ class Scanner26(scan.Scanner2):
# FIXME: this is a hack to catch stuff like: # FIXME: this is a hack to catch stuff like:
# if x: continue # if x: continue
# the "continue" is not on a new line. # the "continue" is not on a new line.
if len(tokens) and tokens[-1].kind == 'JUMP_BACK': if len(tokens) and tokens[-1].kind == "JUMP_BACK":
tokens[-1].kind = intern('CONTINUE') tokens[-1].kind = intern("CONTINUE")
elif op in self.opc.JABS_OPS: elif op in self.opc.JABS_OPS:
pattr = repr(oparg) pattr = repr(oparg)
@@ -240,17 +263,23 @@ class Scanner26(scan.Scanner2):
# CE - Hack for >= 2.5 # CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into # Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE. # a tuple before calling MAKE_CLOSURE.
if (self.version >= (2, 5) and op == self.opc.BUILD_TUPLE and if (
self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE): self.version >= (2, 5)
and op == self.opc.BUILD_TUPLE
and self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE
):
continue continue
else: else:
op_name = '%s_%d' % (op_name, oparg) op_name = "%s_%d" % (op_name, oparg)
customize[op_name] = oparg customize[op_name] = oparg
elif self.version > (2, 0) and op == self.opc.CONTINUE_LOOP: elif self.version > (2, 0) and op == self.opc.CONTINUE_LOOP:
customize[op_name] = 0 customize[op_name] = 0
elif op_name in """ elif (
op_name
in """
CONTINUE_LOOP EXEC_STMT LOAD_LISTCOMP LOAD_SETCOMP CONTINUE_LOOP EXEC_STMT LOAD_LISTCOMP LOAD_SETCOMP
""".split(): """.split()
):
customize[op_name] = 0 customize[op_name] = 0
elif op == self.opc.JUMP_ABSOLUTE: elif op == self.opc.JUMP_ABSOLUTE:
# Further classify JUMP_ABSOLUTE into backward jumps # Further classify JUMP_ABSOLUTE into backward jumps
@@ -266,23 +295,24 @@ class Scanner26(scan.Scanner2):
# rule for that. # rule for that.
target = self.get_target(offset) target = self.get_target(offset)
if target <= offset: if target <= offset:
op_name = 'JUMP_BACK' op_name = "JUMP_BACK"
if (offset in self.stmts if offset in self.stmts and self.code[offset + 3] not in (
and self.code[offset+3] not in (self.opc.END_FINALLY, self.opc.END_FINALLY,
self.opc.POP_BLOCK)): self.opc.POP_BLOCK,
if ((offset in self.linestarts and ):
tokens[-1].kind == 'JUMP_BACK') if (
or offset not in self.not_continue): offset in self.linestarts and tokens[-1].kind == "JUMP_BACK"
op_name = 'CONTINUE' ) or offset not in self.not_continue:
op_name = "CONTINUE"
else: else:
# FIXME: this is a hack to catch stuff like: # FIXME: this is a hack to catch stuff like:
# if x: continue # if x: continue
# the "continue" is not on a new line. # the "continue" is not on a new line.
if tokens[-1].kind == 'JUMP_BACK': if tokens[-1].kind == "JUMP_BACK":
# We need 'intern' since we have # We need 'intern' since we have
# already have processed the previous # already have processed the previous
# token. # token.
tokens[-1].kind = intern('CONTINUE') tokens[-1].kind = intern("CONTINUE")
elif op == self.opc.LOAD_GLOBAL: elif op == self.opc.LOAD_GLOBAL:
if offset in self.load_asserts: if offset in self.load_asserts:
@@ -316,6 +346,7 @@ class Scanner26(scan.Scanner2):
pass pass
if show_asm in ("both", "after"): if show_asm in ("both", "after"):
print("\n# ---- after tokenization:")
for t in tokens: for t in tokens:
print(t.format(line_prefix="")) print(t.format(line_prefix=""))
print() print()