DRY Python3 scanner code. Some cross version handling fixed.

Some Python 3.2 and 3.3 deparse fixes.
This commit is contained in:
rocky
2015-12-27 04:32:46 -05:00
parent 4640e7dece
commit 44cd349cc7
47 changed files with 1016 additions and 953 deletions

View File

@@ -11,34 +11,26 @@ for later use in deparsing.
from __future__ import print_function
import dis, inspect
from collections import namedtuple
from array import array
import uncompyle6.scanners.scanner3 as scan3
from uncompyle6 import PYTHON_VERSION
from uncompyle6.scanner import Token
import uncompyle6.opcodes.opcode_34
# Get all the opcodes into globals
JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs
globals().update(dis.opmap)
import uncompyle6.opcodes.opcode_34
# verify uses JUMP_OPs from here
JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs
from uncompyle6.opcodes.opcode_34 import *
import uncompyle6.scanner as scan
import uncompyle6.scanners.scanner33 as scan33
class Scanner34(scan.Scanner):
def __init__(self):
scan.Scanner.__init__(self, 3.4) # check
def get_argument(self, bytecode, pos):
arg = bytecode[pos+1] + bytecode[pos+2] * 256
return arg
class Scanner34(scan3.Scanner3):
def disassemble(self, co, classname=None):
fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \
else self.disassemble_cross_version
else self.disassemble_generic
return fn(co, classname)
def disassemble_built_in(self, co, classname=None):
@@ -167,255 +159,7 @@ class Scanner34(scan.Scanner):
pass
return tokens, {}
# FIXME Create and move to scanner3
def disassemble_cross_version(self, co, classname=None):
return scan33.Scanner33().disassemble(co, classname)
# FIXME Create and move to scanner3
def build_lines_data(self, code_obj):
"""
Generate various line-related helper data.
"""
# Offset: lineno pairs, only for offsets which start line.
# Locally we use list for more convenient iteration using indices
linestarts = list(dis.findlinestarts(code_obj))
self.linestarts = dict(linestarts)
# Plain set with offsets of first ops on line
self.linestart_offsets = {a for (a, _) in linestarts}
# 'List-map' which shows line number of current op and offset of
# first op on following line, given offset of op as index
self.lines = lines = []
LineTuple = namedtuple('LineTuple', ['l_no', 'next'])
# Iterate through available linestarts, and fill
# the data for all code offsets encountered until
# last linestart offset
_, prev_line_no = linestarts[0]
offset = 0
for start_offset, line_no in linestarts[1:]:
while offset < start_offset:
lines.append(LineTuple(prev_line_no, start_offset))
offset += 1
prev_line_no = line_no
# Fill remaining offsets with reference to last line number
# and code length as start offset of following non-existing line
codelen = len(self.code)
while offset < codelen:
lines.append(LineTuple(prev_line_no, codelen))
offset += 1
# FIXME Create and move to scanner3
def build_prev_op(self):
"""
Compose 'list-map' which allows to jump to previous
op, given offset of current op as index.
"""
code = self.code
codelen = len(code)
self.prev_op = [0]
for offset in self.op_range(0, codelen):
op = code[offset]
for _ in range(self.op_size(op)):
self.prev_op.append(offset)
# FIXME Create and move to scanner3
def op_size(self, op):
"""
Return size of operator with its arguments
for given opcode <op>.
"""
if op < dis.HAVE_ARGUMENT:
return 1
else:
return 3
def find_jump_targets(self):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps is counted.
"""
code = self.code
codelen = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': codelen-1}]
# All loop entry points
# self.loops = []
# Map fixed jumps to their real destination
self.fixed_jumps = {}
self.ignore_if = set()
self.build_statement_indices()
# Containers filled by detect_structure()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for offset in self.op_range(0, codelen):
op = code[offset]
# Determine structures and fix jumps for 2.3+
self.detect_structure(offset)
if op >= dis.HAVE_ARGUMENT:
label = self.fixed_jumps.get(offset)
oparg = code[offset+1] + code[offset+2] * 256
if label is None:
if op in dis.hasjrel and op != FOR_ITER:
label = offset + 3 + oparg
elif op in dis.hasjabs:
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
if oparg > offset:
label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [offset]
elif op == END_FINALLY and offset in self.fixed_jumps:
label = self.fixed_jumps[offset]
targets[label] = targets.get(label, []) + [offset]
return targets
def build_statement_indices(self):
code = self.code
start = 0
end = codelen = len(code)
statement_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR,
JUMP_ABSOLUTE
}
statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE),
(POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE
}
# Compose preliminary list of indices with statements,
# using plain statement opcodes
prelim = self.all_instr(start, end, statement_opcodes)
# Initialize final container with statements with
# preliminnary data
stmts = self.stmts = set(prelim)
# Same for opcode sequences
pass_stmts = set()
for sequence in statement_opcode_sequences:
for i in self.op_range(start, end-(len(sequence)+1)):
match = True
for elem in sequence:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match is True:
i = self.prev_op[i]
stmts.add(i)
pass_stmts.add(i)
# Initialize statement list with the full data we've gathered so far
if pass_stmts:
stmt_offset_list = list(stmts)
stmt_offset_list.sort()
else:
stmt_offset_list = prelim
# 'List-map' which contains offset of start of
# next statement, when op offset is passed as index
self.next_stmt = slist = []
last_stmt_offset = -1
i = 0
# Go through all statement offsets
for stmt_offset in stmt_offset_list:
# Process absolute jumps, but do not remove 'pass' statements
# from the set
if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts:
# If absolute jump occurs in forward direction or it takes off from the
# same line as previous statement, this is not a statement
target = self.get_target(stmt_offset)
if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no:
stmts.remove(stmt_offset)
continue
# Rewing ops till we encounter non-JA one
j = self.prev_op[stmt_offset]
while code[j] == JUMP_ABSOLUTE:
j = self.prev_op[j]
# If we got here, then it's list comprehension which
# is not a statement too
if code[j] == LIST_APPEND:
stmts.remove(stmt_offset)
continue
# Exclude ROT_TWO + POP_TOP
elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO:
stmts.remove(stmt_offset)
continue
# Exclude FOR_ITER + designators
elif code[stmt_offset] in designator_ops:
j = self.prev_op[stmt_offset]
while code[j] in designator_ops:
j = self.prev_op[j]
if code[j] == FOR_ITER:
stmts.remove(stmt_offset)
continue
# Add to list another list with offset of current statement,
# equal to length of previous statement
slist += [stmt_offset] * (stmt_offset-i)
last_stmt_offset = stmt_offset
i = stmt_offset
# Finish filling the list for last statement
slist += [codelen] * (codelen-len(slist))
# FIXME Create and move to scanner3
def get_target(self, offset):
"""
Get target offset for op located at given <offset>.
"""
op = self.code[offset]
target = self.code[offset+1] + self.code[offset+2] * 256
if op in dis.hasjrel:
target += offset + 3
return target
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
if self.code[start] == DUP_TOP:
except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev_op[self.get_target(except_match)]
self.ignore_if.add(except_match)
self.not_continue.add(jmp)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE)
self.not_continue.add(self.prev_op[i])
return self.prev_op[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
# FIXME Create and move to scanner3
# FIXME: merge with scanner3 code
def detect_structure(self, offset):
"""
Detect structures and their boundaries to fix optimizied jumps
@@ -598,41 +342,32 @@ class Scanner34(scan.Scanner):
else:
self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
def next_except_jump(self, start):
"""
Find offsets of all requested <instr> between <start> and <end>,
optionally <target>ing specified offset, and return list found
<instr> offsets which are not within any POP_JUMP_IF_TRUE jumps.
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
# Find all offsets of requested instructions
instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target)
# Get all POP_JUMP_IF_TRUE (or) offsets
pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE)
filtered = []
for pjit_offset in pjit_offsets:
pjit_tgt = self.get_target(pjit_offset) - 3
for instr_offset in instr_offsets:
if instr_offset <= pjit_offset or instr_offset >= pjit_tgt:
filtered.append(instr_offset)
instr_offsets = filtered
filtered = []
return instr_offsets
def remove_mid_line_ifs(self, ifs):
"""
Go through passed offsets, filtering ifs
located somewhere mid-line.
"""
filtered = []
for if_ in ifs:
# For each offset, if line number of current and next op
# is the same
if self.lines[if_].l_no == self.lines[if_+3].l_no:
# Check if last op on line is PJIT or PJIF, and if it is - skip it
if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE):
continue
filtered.append(if_)
return filtered
if self.code[start] == DUP_TOP:
except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev_op[self.get_target(except_match)]
self.ignore_if.add(except_match)
self.not_continue.add(jmp)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE)
self.not_continue.add(self.prev_op[i])
return self.prev_op[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
if __name__ == "__main__":
co = inspect.currentframe().f_code