DRY instruction building code...

There is a little more that could be done with  self.offset2inst_index
This commit is contained in:
rocky
2018-04-03 04:41:36 -04:00
parent 96d8daeae9
commit fad43feb3d
6 changed files with 233 additions and 282 deletions

View File

@@ -1,8 +1,6 @@
#!/usr/bin/env python
from uncompyle6 import PYTHON_VERSION, IS_PYPY
from uncompyle6.scanner import get_scanner
from xdis.bytecode import Bytecode
from array import array
def bug(state, slotstate):
if state:
if slotstate is not None:
@@ -24,16 +22,13 @@ def bug_loop(disassemble, tb=None):
def test_if_in_for():
code = bug.__code__
scan = get_scanner(PYTHON_VERSION)
print(PYTHON_VERSION)
if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY:
n = scan.setup_code(code)
bytecode = Bytecode(code, scan.opc)
scan.build_lines_data(code, n)
bytecode = scan.build_instructions(code)
scan.lines = scan.build_lines_data(code)
scan.insts = list(bytecode)
scan.offset2inst_index = {}
for i, inst in enumerate(scan.insts):
scan.offset2inst_index[inst.offset] = i
scan.build_prev_op(n)
fjt = scan.find_jump_targets(False)
## FIXME: the data below is wrong.
@@ -48,11 +43,9 @@ def test_if_in_for():
# {'start': 62, 'end': 63, 'type': 'for-else'}]
code = bug_loop.__code__
n = scan.setup_code(code)
bytecode = Bytecode(code, scan.opc)
scan.build_lines_data(code, n)
bytecode = scan.build_instructions(code)
scan.lines = scan.build_lines_data(code)
scan.insts = list(bytecode)
scan.build_prev_op(n)
scan.offset2inst_index = {}
for i, inst in enumerate(scan.insts):
scan.offset2inst_index[inst.offset] = i
@@ -69,10 +62,8 @@ def test_if_in_for():
{'start': 48, 'end': 67, 'type': 'while-loop'}]
elif 3.2 < PYTHON_VERSION <= 3.4:
bytecode = Bytecode(code, scan.opc)
scan.code = array('B', code.co_code)
bytecode = scan.build_instructions(code)
scan.lines = scan.build_lines_data(code)
scan.build_prev_op()
scan.insts = list(bytecode)
scan.offset2inst_index = {}
for i, inst in enumerate(scan.insts):
@@ -86,5 +77,6 @@ def test_if_in_for():
{'end': 59, 'type': 'for-loop', 'start': 31},
{'end': 63, 'type': 'for-else', 'start': 62}]
else:
assert True, "FIXME: should note fixed"
print("FIXME: should fix for %s" % PYTHON_VERSION)
assert True
return

View File

@@ -23,12 +23,15 @@ scanners, e.g. for Python 2.7 or 3.4.
from __future__ import print_function
from array import array
from collections import namedtuple
import sys
from uncompyle6 import PYTHON3, IS_PYPY
from uncompyle6.scanners.tok import Token
import xdis
from xdis.bytecode import instruction_size, extended_arg_val, next_offset
from xdis.bytecode import (
Bytecode, instruction_size, extended_arg_val, next_offset)
from xdis.magics import canonic_python_version
from xdis.util import code2num
@@ -90,11 +93,72 @@ class Scanner(object):
# FIXME: This weird Python2 behavior is not Python3
self.resetTokenClass()
def opname_for_offset(self, offset):
return self.opc.opname[self.code[offset]]
def build_instructions(self, co):
"""
Create a list of instructions (a structured object rather than
an array of bytes) and store that in self.insts
"""
# FIXME: remove this when all subsidiary functions have been removed.
# We should be able to get everything from the self.insts list.
self.code = array('B', co.co_code)
def op_name(self, op):
return self.opc.opname[op]
bytecode = Bytecode(co, self.opc)
self.build_prev_op()
self.insts = self.remove_extended_args(list(bytecode))
return bytecode
def build_lines_data(self, code_obj):
"""
Generate various line-related helper data.
"""
# Offset: lineno pairs, only for offsets which start line.
# Locally we use list for more convenient iteration using indices
linestarts = list(self.opc.findlinestarts(code_obj))
self.linestarts = dict(linestarts)
# Plain set with offsets of first ops on line.
# FIXME: we probably could do without
self.linestart_offsets = set(a for (a, _) in linestarts)
# 'List-map' which shows line number of current op and offset of
# first op on following line, given offset of op as index
lines = []
LineTuple = namedtuple('LineTuple', ['l_no', 'next'])
# Iterate through available linestarts, and fill
# the data for all code offsets encountered until
# last linestart offset
_, prev_line_no = linestarts[0]
offset = 0
for start_offset, line_no in linestarts[1:]:
while offset < start_offset:
lines.append(LineTuple(prev_line_no, start_offset))
offset += 1
prev_line_no = line_no
# Fill remaining offsets with reference to last line number
# and code length as start offset of following non-existing line
codelen = len(self.code)
while offset < codelen:
lines.append(LineTuple(prev_line_no, codelen))
offset += 1
return lines
def build_prev_op(self):
"""
Compose 'list-map' which allows to jump to previous
op, given offset of current op as index.
"""
code = self.code
codelen = len(code)
# 2.x uses prev 3.x uses prev_op. Sigh
# Until we get this sorted out.
self.prev = self.prev_op = [0]
for offset in self.op_range(0, codelen):
op = code[offset]
for _ in range(instruction_size(op, self.opc)):
self.prev_op.append(offset)
def is_jump_forward(self, offset):
"""
@@ -332,6 +396,12 @@ class Scanner(object):
return result
def opname_for_offset(self, offset):
return self.opc.opname[self.code[offset]]
def op_name(self, op):
return self.opc.opname[op]
def op_range(self, start, end):
"""
Iterate through positions of opcodes, skipping
@@ -341,6 +411,42 @@ class Scanner(object):
yield start
start += instruction_size(self.code[start], self.opc)
def remove_extended_args(self, instructions):
"""Go through instructions removing extended ARG.
get_instruction_bytes previously adjusted the operand values
to account for these"""
new_instructions = []
last_was_extarg = False
n = len(instructions)
for i, inst in enumerate(instructions):
if (inst.opname == 'EXTENDED_ARG' and
i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'):
last_was_extarg = True
starts_line = inst.starts_line
is_jump_target = inst.is_jump_target
offset = inst.offset
continue
if last_was_extarg:
# j = self.stmts.index(inst.offset)
# self.lines[j] = offset
new_inst= inst._replace(starts_line=starts_line,
is_jump_target=is_jump_target,
offset=offset)
inst = new_inst
if i < n:
new_prev = self.prev_op[instructions[i].offset]
j = instructions[i+1].offset
old_prev = self.prev_op[j]
while self.prev_op[j] == old_prev and j < n:
self.prev_op[j] = new_prev
j += 1
last_was_extarg = False
new_instructions.append(inst)
return new_instructions
def remove_mid_line_ifs(self, ifs):
"""
Go through passed offsets, filtering ifs
@@ -413,7 +519,7 @@ def get_scanner(version, is_pypy=False, show_asm=None):
if __name__ == "__main__":
import inspect, uncompyle6
co = inspect.currentframe().f_code
scanner = get_scanner('2.7.13', True)
scanner = get_scanner(sys.version[:5], False)
# scanner = get_scanner('2.7.13', True)
# scanner = get_scanner(sys.version[:5], False)
scanner = get_scanner(uncompyle6.PYTHON_VERSION, IS_PYPY, True)
tokens, customize = scanner.ingest(co, {})
tokens, customize = scanner.ingest(co, {}, show_asm='after')

View File

@@ -35,17 +35,15 @@ Finally we save token information.
from __future__ import print_function
from collections import namedtuple
from array import array
from copy import copy
from xdis.code import iscode
from xdis.bytecode import (
Bytecode, op_has_argument, instruction_size,
op_has_argument, instruction_size,
_get_const_info)
from xdis.util import code2num
from uncompyle6.scanner import Scanner
from uncompyle6.scanner import Scanner, Token
class Scanner2(Scanner):
def __init__(self, version, show_asm=None, is_pypy=False):
@@ -57,6 +55,57 @@ class Scanner2(Scanner):
self.genexpr_name = '<genexpr>'
self.load_asserts = set([])
# Create opcode classification sets
# Note: super initilization above initializes self.opc
# Ops that start SETUP_ ... We will COME_FROM with these names
# Some blocks and END_ statements. And they can start
# a new statement
self.statement_opcodes = frozenset([
self.opc.SETUP_LOOP, self.opc.BREAK_LOOP,
self.opc.SETUP_FINALLY, self.opc.END_FINALLY,
self.opc.SETUP_EXCEPT, self.opc.POP_BLOCK,
self.opc.STORE_FAST, self.opc.DELETE_FAST,
self.opc.STORE_DEREF, self.opc.STORE_GLOBAL,
self.opc.DELETE_GLOBAL, self.opc.STORE_NAME,
self.opc.DELETE_NAME, self.opc.STORE_ATTR,
self.opc.DELETE_ATTR, self.opc.STORE_SUBSCR,
self.opc.DELETE_SUBSCR, self.opc.RETURN_VALUE,
self.opc.RAISE_VARARGS, self.opc.POP_TOP,
self.opc.PRINT_EXPR, self.opc.PRINT_ITEM,
self.opc.PRINT_NEWLINE, self.opc.PRINT_ITEM_TO,
self.opc.PRINT_NEWLINE_TO, self.opc.CONTINUE_LOOP,
self.opc.JUMP_ABSOLUTE, self.opc.EXEC_STMT,
])
# Opcodes that can start a "store" non-terminal.
# FIXME: JUMP_ABSOLUTE is weird. What's up with that?
self.designator_ops = frozenset([
self.opc.STORE_FAST, self.opc.STORE_NAME,
self.opc.STORE_GLOBAL, self.opc.STORE_DEREF, self.opc.STORE_ATTR,
self.opc.STORE_SLICE_0, self.opc.STORE_SLICE_1, self.opc.STORE_SLICE_2,
self.opc.STORE_SLICE_3, self.opc.STORE_SUBSCR, self.opc.UNPACK_SEQUENCE,
self.opc.JUMP_ABSOLUTE
])
# Python 2.7 has POP_JUMP_IF_{TRUE,FALSE}_OR_POP but < 2.7 doesn't
# Add an empty set make processing more uniform.
self.pop_jump_if_or_pop = frozenset([])
# opcodes with expect a variable number pushed values whose
# count is in the opcode. For parsing we generally change the
# opcode name to include that number.
self.varargs_ops = frozenset([
self.opc.BUILD_LIST, self.opc.BUILD_TUPLE,
self.opc.BUILD_SLICE, self.opc.UNPACK_SEQUENCE,
self.opc.MAKE_FUNCTION, self.opc.CALL_FUNCTION,
self.opc.MAKE_CLOSURE, self.opc.CALL_FUNCTION_VAR,
self.opc.CALL_FUNCTION_KW, self.opc.CALL_FUNCTION_VAR_KW,
self.opc.DUP_TOPX, self.opc.RAISE_VARARGS])
@staticmethod
def unmangle_name(name, classname):
"""Remove __ from the end of _name_ if it starts with __classname__
@@ -106,7 +155,8 @@ class Scanner2(Scanner):
if not show_asm:
show_asm = self.show_asm
bytecode = Bytecode(co, self.opc)
bytecode = self.build_instructions(co)
# show_asm = 'after'
if show_asm in ('both', 'before'):
for instr in bytecode.get_instructions(co):
@@ -117,18 +167,12 @@ class Scanner2(Scanner):
# "customize" is in the process of going away here
customize = {}
if self.is_pypy:
customize['PyPy'] = 0
Token = self.Token # shortcut
codelen = len(self.code)
self.lines = self.build_lines_data(co)
codelen = self.setup_code(co)
self.build_lines_data(co, codelen)
self.build_prev_op(codelen)
self.insts = list(bytecode)
self.offset2inst_index = {}
for i, inst in enumerate(self.insts):
self.offset2inst_index[inst.offset] = i
@@ -314,7 +358,7 @@ class Scanner2(Scanner):
if (offset in self.stmts and
self.code[offset+3] not in (self.opc.END_FINALLY,
self.opc.POP_BLOCK)):
if ((offset in self.linestartoffsets and
if ((offset in self.linestart_offsets and
self.code[self.prev[offset]] == self.opc.JUMP_ABSOLUTE)
or self.code[target] == self.opc.FOR_ITER
or offset not in self.not_continue):
@@ -327,10 +371,7 @@ class Scanner2(Scanner):
if offset in self.return_end_ifs:
op_name = 'RETURN_END_IF'
if offset in self.linestartoffsets:
linestart = self.linestartoffsets[offset]
else:
linestart = None
linestart = self.linestarts.get(offset, None)
if offset not in replace:
tokens.append(Token(
@@ -349,63 +390,6 @@ class Scanner2(Scanner):
print()
return tokens, customize
def setup_code(self, co):
"""
Creates Python-independent bytecode structure (byte array) in
self.code and records previous instruction in self.prev
The size of self.code is returned
"""
self.code = array('B', co.co_code)
n = -1
for i in self.op_range(0, len(self.code)):
if self.code[i] in (self.opc.RETURN_VALUE, self.opc.END_FINALLY):
n = i + 1
pass
pass
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY"
self.code = array('B', co.co_code[:n])
return n
def build_prev_op(self, n):
self.prev = [0]
# mapping addresses of instruction & argument
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op_has_argument(op, self.opc):
self.prev.append(i)
self.prev.append(i)
pass
pass
def build_lines_data(self, co, n):
"""
Initializes self.lines and self.linesstartoffsets
"""
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
# self.linestarts is a tuple of (offset, line number).
# Turn that in a has that we can index
self.linestarts = list(self.opc.findlinestarts(co))
self.linestartoffsets = {}
for offset, lineno in self.linestarts:
self.linestartoffsets[offset] = lineno
j = 0
(prev_start_byte, prev_line_no) = self.linestarts[0]
for (start_byte, line_no) in self.linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
prev_line_no = start_byte
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
return
def build_statement_indices(self):
code = self.code
start = 0
@@ -972,7 +956,8 @@ class Scanner2(Scanner):
'end': pre_rtarget})
# FIXME: this is yet another case were we need dominators.
if pre_rtarget not in self.linestartoffsets or self.version < 2.7:
if (pre_rtarget not in self.linestart_offsets
or self.version < 2.7):
self.not_continue.add(pre_rtarget)
if rtarget < end_offset:
@@ -1161,6 +1146,19 @@ class Scanner2(Scanner):
return targets
def patch_continue(self, tokens, offset, op):
if op in (self.opc.JUMP_FORWARD, self.opc.JUMP_ABSOLUTE):
# FIXME: this is a hack to catch stuff like:
# for ...
# try: ...
# except: continue
# the "continue" is not on a new line.
n = len(tokens)
if (n > 2 and
tokens[-1].kind == 'JUMP_BACK' and
self.code[offset+3] == self.opc.END_FINALLY):
tokens[-1].kind = intern('CONTINUE')
# FIXME: combine with scanner3.py code and put into scanner.py
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
@@ -1200,3 +1198,17 @@ class Scanner2(Scanner):
instr_offsets = filtered
filtered = []
return instr_offsets
if __name__ == "__main__":
from uncompyle6 import PYTHON_VERSION
if 2.0 <= PYTHON_VERSION < 3.0:
import inspect
co = inspect.currentframe().f_code
from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner2(PYTHON_VERSION).ingest(co)
for t in tokens:
print(t)
else:
print("Need to be Python 2.x to demo; I am %s." %
PYTHON_VERSION)
pass

View File

@@ -32,59 +32,21 @@ from uncompyle6.scanner import L65536
# bytecode verification, verify(), uses JUMP_OPs from here
from xdis.opcodes import opcode_26
from xdis.bytecode import Bytecode
from xdis.bytecode import _get_const_info
from uncompyle6.scanner import Token
JUMP_OPS = opcode_26.JUMP_OPS
class Scanner26(scan.Scanner2):
def __init__(self, show_asm=False):
super(Scanner26, self).__init__(2.6, show_asm)
self.statement_opcodes = frozenset([
self.opc.SETUP_LOOP, self.opc.BREAK_LOOP,
self.opc.SETUP_FINALLY, self.opc.END_FINALLY,
self.opc.SETUP_EXCEPT, self.opc.POP_BLOCK,
self.opc.STORE_FAST, self.opc.DELETE_FAST,
self.opc.STORE_DEREF, self.opc.STORE_GLOBAL,
self.opc.DELETE_GLOBAL, self.opc.STORE_NAME,
self.opc.DELETE_NAME, self.opc.STORE_ATTR,
self.opc.DELETE_ATTR, self.opc.STORE_SUBSCR,
self.opc.DELETE_SUBSCR, self.opc.RETURN_VALUE,
self.opc.RAISE_VARARGS, self.opc.POP_TOP,
self.opc.PRINT_EXPR, self.opc.PRINT_ITEM,
self.opc.PRINT_NEWLINE, self.opc.PRINT_ITEM_TO,
self.opc.PRINT_NEWLINE_TO, self.opc.CONTINUE_LOOP,
self.opc.JUMP_ABSOLUTE, self.opc.EXEC_STMT,
])
# "setup" opcodes
self.setup_ops = frozenset([
self.opc.SETUP_EXCEPT, self.opc.SETUP_FINALLY,
])
# opcodes with expect a variable number pushed values whose
# count is in the opcode. For parsing we generally change the
# opcode name to include that number.
self.varargs_ops = frozenset([
self.opc.BUILD_LIST, self.opc.BUILD_TUPLE,
self.opc.BUILD_SLICE, self.opc.UNPACK_SEQUENCE,
self.opc.MAKE_FUNCTION, self.opc.CALL_FUNCTION,
self.opc.MAKE_CLOSURE, self.opc.CALL_FUNCTION_VAR,
self.opc.CALL_FUNCTION_KW, self.opc.CALL_FUNCTION_VAR_KW,
self.opc.DUP_TOPX, self.opc.RAISE_VARARGS])
# opcodes that store values into a variable
self.designator_ops = frozenset([
self.opc.STORE_FAST, self.opc.STORE_NAME,
self.opc.STORE_GLOBAL, self.opc.STORE_DEREF, self.opc.STORE_ATTR,
self.opc.STORE_SLICE_0, self.opc.STORE_SLICE_1, self.opc.STORE_SLICE_2,
self.opc.STORE_SLICE_3, self.opc.STORE_SUBSCR, self.opc.UNPACK_SEQUENCE,
self.opc.JUMP_ABSOLUTE
])
# Python 2.7 has POP_JUMP_IF_{TRUE,FALSE}_OR_POP but < 2.7 doesn't
# Add an empty set make processing more uniform.
self.pop_jump_if_or_pop = frozenset([])
return
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
@@ -106,7 +68,8 @@ class Scanner26(scan.Scanner2):
if not show_asm:
show_asm = self.show_asm
bytecode = Bytecode(co, self.opc)
bytecode = self.build_instructions(co)
# show_asm = 'after'
if show_asm in ('both', 'before'):
for instr in bytecode.get_instructions(co):
@@ -119,12 +82,9 @@ class Scanner26(scan.Scanner2):
if self.is_pypy:
customize['PyPy'] = 1
Token = self.Token # shortcut
codelen = len(self.code)
codelen = self.setup_code(co)
self.build_lines_data(co, codelen)
self.build_prev_op(codelen)
self.lines = self.build_lines_data(co)
self.insts = list(bytecode)
self.offset2inst_index = {}
@@ -288,7 +248,7 @@ class Scanner26(scan.Scanner2):
if (offset in self.stmts
and self.code[offset+3] not in (self.opc.END_FINALLY,
self.opc.POP_BLOCK)):
if ((offset in self.linestartoffsets and
if ((offset in self.linestart_offsets and
tokens[-1].kind == 'JUMP_BACK')
or offset not in self.not_continue):
op_name = 'CONTINUE'
@@ -309,10 +269,7 @@ class Scanner26(scan.Scanner2):
if offset in self.return_end_ifs:
op_name = 'RETURN_END_IF'
if offset in self.linestartoffsets:
linestart = self.linestartoffsets[offset]
else:
linestart = None
linestart = self.linestarts.get(offset, None)
if offset not in replace:
tokens.append(Token(

View File

@@ -25,28 +25,15 @@ class Scanner27(Scanner2):
super(Scanner27, self).__init__(2.7, show_asm, is_pypy)
# opcodes that start statements
self.statement_opcodes = frozenset([
self.opc.SETUP_LOOP, self.opc.BREAK_LOOP,
self.opc.SETUP_FINALLY, self.opc.END_FINALLY,
self.opc.SETUP_EXCEPT,
self.opc.POP_BLOCK, self.opc.STORE_FAST, self.opc.DELETE_FAST,
self.opc.STORE_DEREF, self.opc.STORE_GLOBAL,
self.opc.DELETE_GLOBAL, self.opc.STORE_NAME,
self.opc.DELETE_NAME, self.opc.STORE_ATTR,
self.opc.DELETE_ATTR, self.opc.STORE_SUBSCR,
self.opc.DELETE_SUBSCR, self.opc.RETURN_VALUE,
self.opc.RAISE_VARARGS, self.opc.POP_TOP,
self.opc.PRINT_EXPR, self.opc.PRINT_ITEM,
self.opc.PRINT_NEWLINE, self.opc.PRINT_ITEM_TO,
self.opc.PRINT_NEWLINE_TO, self.opc.CONTINUE_LOOP,
self.opc.JUMP_ABSOLUTE, self.opc.EXEC_STMT,
# New in 2.7
self.opc.SETUP_WITH,
self.opc.STORE_SLICE_0, self.opc.STORE_SLICE_1,
self.opc.STORE_SLICE_2, self.opc.STORE_SLICE_3,
self.opc.DELETE_SLICE_0, self.opc.DELETE_SLICE_1,
self.opc.DELETE_SLICE_2, self.opc.DELETE_SLICE_3,
])
self.statement_opcodes = frozenset(
self.statement_opcodes | set([
# New in 2.7
self.opc.SETUP_WITH,
self.opc.STORE_SLICE_0, self.opc.STORE_SLICE_1,
self.opc.STORE_SLICE_2, self.opc.STORE_SLICE_3,
self.opc.DELETE_SLICE_0, self.opc.DELETE_SLICE_1,
self.opc.DELETE_SLICE_2, self.opc.DELETE_SLICE_3,
]))
# opcodes which expect a variable number pushed values and whose
# count is in the opcode. For parsing we generally change the
@@ -85,19 +72,6 @@ class Scanner27(Scanner2):
return
def patch_continue(self, tokens, offset, op):
if op in (self.opc.JUMP_FORWARD, self.opc.JUMP_ABSOLUTE):
# FIXME: this is a hack to catch stuff like:
# for ...
# try: ...
# except: continue
# the "continue" is not on a new line.
n = len(tokens)
if (n > 2 and
tokens[-1].kind == 'JUMP_BACK' and
self.code[offset+3] == self.opc.END_FINALLY):
tokens[-1].kind = intern('CONTINUE')
pass
if __name__ == "__main__":

View File

@@ -35,11 +35,8 @@ Finally we save token information.
from __future__ import print_function
from collections import namedtuple
from array import array
from xdis.code import iscode
from xdis.bytecode import Bytecode, instruction_size, _get_const_info
from xdis.bytecode import instruction_size, _get_const_info
from uncompyle6.scanner import Token, parse_fn_counts
import xdis
@@ -100,7 +97,7 @@ class Scanner3(Scanner):
self.statement_opcodes = frozenset(statement_opcodes) | self.setup_ops_no_loop
# Opcodes that can start a designator non-terminal.
# Opcodes that can start a "store" non-terminal.
# FIXME: JUMP_ABSOLUTE is weird. What's up with that?
self.designator_ops = frozenset([
self.opc.STORE_FAST, self.opc.STORE_NAME, self.opc.STORE_GLOBAL,
@@ -154,42 +151,6 @@ class Scanner3(Scanner):
# FIXME: remove the above in favor of:
# self.varargs_ops = frozenset(self.opc.hasvargs)
def remove_extended_args(self, instructions):
"""Go through instructions removing extended ARG.
get_instruction_bytes previously adjusted the operand values
to account for these"""
new_instructions = []
last_was_extarg = False
n = len(instructions)
for i, inst in enumerate(instructions):
if (inst.opname == 'EXTENDED_ARG' and
i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'):
last_was_extarg = True
starts_line = inst.starts_line
is_jump_target = inst.is_jump_target
offset = inst.offset
continue
if last_was_extarg:
# j = self.stmts.index(inst.offset)
# self.lines[j] = offset
new_inst= inst._replace(starts_line=starts_line,
is_jump_target=is_jump_target,
offset=offset)
inst = new_inst
if i < n:
new_prev = self.prev_op[instructions[i].offset]
j = instructions[i+1].offset
old_prev = self.prev_op[j]
while self.prev_op[j] == old_prev and j < n:
self.prev_op[j] = new_prev
j += 1
last_was_extarg = False
new_instructions.append(inst)
return new_instructions
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Pick out tokens from an uncompyle6 code object, and transform them,
@@ -207,13 +168,12 @@ class Scanner3(Scanner):
cause specific rules for the specific number of arguments they take.
"""
# FIXME: remove this when all subsidiary functions have been removed.
# We should be able to get everything from the self.insts list.
self.code = array('B', co.co_code)
if not show_asm:
show_asm = self.show_asm
bytecode = Bytecode(co, self.opc)
show_asm = self.show_asm if not show_asm else show_asm
# show_asm = 'both'
bytecode = self.build_instructions(co)
# show_asm = 'after'
if show_asm in ('both', 'before'):
for instr in bytecode.get_instructions(co):
print(instr.disassemble())
@@ -228,14 +188,11 @@ class Scanner3(Scanner):
customize['PyPy'] = 0
self.lines = self.build_lines_data(co)
self.build_prev_op()
# FIXME: put as its own method?
# Scan for assertions. Later we will
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
# 'LOAD_ASSERT' is used in assert statements.
self.load_asserts = set()
self.insts = self.remove_extended_args(list(bytecode))
self.offset2inst_index = {}
n = len(self.insts)
@@ -482,53 +439,6 @@ class Scanner3(Scanner):
print()
return tokens, customize
def build_lines_data(self, code_obj):
"""
Generate various line-related helper data.
"""
# Offset: lineno pairs, only for offsets which start line.
# Locally we use list for more convenient iteration using indices
linestarts = list(self.opc.findlinestarts(code_obj))
self.linestarts = dict(linestarts)
# Plain set with offsets of first ops on line
self.linestart_offsets = set(a for (a, _) in linestarts)
# 'List-map' which shows line number of current op and offset of
# first op on following line, given offset of op as index
lines = []
LineTuple = namedtuple('LineTuple', ['l_no', 'next'])
# Iterate through available linestarts, and fill
# the data for all code offsets encountered until
# last linestart offset
_, prev_line_no = linestarts[0]
offset = 0
for start_offset, line_no in linestarts[1:]:
while offset < start_offset:
lines.append(LineTuple(prev_line_no, start_offset))
offset += 1
prev_line_no = line_no
# Fill remaining offsets with reference to last line number
# and code length as start offset of following non-existing line
codelen = len(self.code)
while offset < codelen:
lines.append(LineTuple(prev_line_no, codelen))
offset += 1
return lines
def build_prev_op(self):
"""
Compose 'list-map' which allows to jump to previous
op, given offset of current op as index.
"""
code = self.code
codelen = len(code)
# 2.x uses prev 3.x uses prev_op. Sigh
# Until we get this sorted out.
self.prev = self.prev_op = [0]
for offset in self.op_range(0, codelen):
op = code[offset]
for _ in range(instruction_size(op, self.opc)):
self.prev_op.append(offset)
def find_jump_targets(self, debug):
"""
Detect all offsets in a byte code which are jump targets