Merge branch 'master' into extended_args

This commit is contained in:
rocky
2018-02-25 12:30:57 -05:00
3 changed files with 80 additions and 82 deletions

View File

@@ -17,8 +17,8 @@ import sys
from uncompyle6 import PYTHON3, IS_PYPY from uncompyle6 import PYTHON3, IS_PYPY
from uncompyle6.scanners.tok import Token from uncompyle6.scanners.tok import Token
import xdis import xdis
from xdis.bytecode import op_size, extended_arg_val from xdis.bytecode import instruction_size, extended_arg_val, next_offset
from xdis.magics import py_str2float, canonic_python_version from xdis.magics import canonic_python_version
from xdis.util import code2num from xdis.util import code2num
# The byte code versions we support. # The byte code versions we support.
@@ -98,12 +98,20 @@ class Scanner(object):
# FIXME 0 isn't always correct # FIXME 0 isn't always correct
return offset < self.get_target(offset, 0) return offset < self.get_target(offset, 0)
def get_target(self, pos, op=None): def get_target(self, offset, extended_arg=0):
if op is None: """
op = self.code[pos] Get next instruction offset for op located at given <offset>.
target = self.get_argument(pos) NOTE: extended_arg is no longer used
if op in self.opc.JREL_OPS: """
target += pos + 3 # instructions can get moved as a result of EXTENDED_ARGS removal
if offset not in self.offset2inst_index:
offset -= instruction_size(self.opc.EXTENDED_ARG, self.opc)
inst = self.insts[self.offset2inst_index[offset]]
if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS:
target = inst.argval
else:
# No jump offset, so use fall-through offset
target = next_offset(inst.opcode, self.opc, inst.offset)
return target return target
def get_argument(self, pos): def get_argument(self, pos):
@@ -269,7 +277,7 @@ class Scanner(object):
""" """
while start < end: while start < end:
yield start yield start
start += op_size(self.code[start], self.opc) start += instruction_size(self.code[start], self.opc)
def remove_mid_line_ifs(self, ifs): def remove_mid_line_ifs(self, ifs):
""" """

View File

@@ -26,7 +26,7 @@ from collections import namedtuple
from array import array from array import array
from xdis.code import iscode from xdis.code import iscode
from xdis.bytecode import Bytecode, op_has_argument, op_size, instruction_size from xdis.bytecode import Bytecode, op_has_argument, instruction_size
from xdis.util import code2num from xdis.util import code2num
from uncompyle6.scanner import Scanner from uncompyle6.scanner import Scanner
@@ -72,13 +72,14 @@ class Scanner2(Scanner):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Pick out tokens from an uncompyle6 code object, and transform them,
returning a list of uncompyle6 'Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. The transformations are made to assist the deparsing grammar.
Specificially: Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load - various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures - COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- some EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
@@ -112,6 +113,7 @@ class Scanner2(Scanner):
self.insts = list(bytecode) self.insts = list(bytecode)
self.offset2inst_index = {} self.offset2inst_index = {}
n = len(self.insts)
for i, inst in enumerate(self.insts): for i, inst in enumerate(self.insts):
self.offset2inst_index[inst.offset] = i self.offset2inst_index[inst.offset] = i
@@ -141,8 +143,10 @@ class Scanner2(Scanner):
if names[self.get_argument(i+3)] == 'AssertionError': if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3) self.load_asserts.add(i+3)
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets(show_asm) jump_targets = self.find_jump_targets(show_asm)
# contains (code, [addrRefToCode]) # print("XXX2", jump_targets)
last_stmt = self.next_stmt[0] last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt] i = self.next_stmt[last_stmt]
@@ -383,7 +387,7 @@ class Scanner2(Scanner):
if elem != code[i]: if elem != code[i]:
match = False match = False
break break
i += op_size(code[i], self.opc) i += instruction_size(code[i], self.opc)
if match: if match:
i = self.prev[i] i = self.prev[i]
@@ -629,7 +633,7 @@ class Scanner2(Scanner):
'start': jump_back_offset+3, 'start': jump_back_offset+3,
'end': loop_end_offset}) 'end': loop_end_offset})
elif op == self.opc.SETUP_EXCEPT: elif op == self.opc.SETUP_EXCEPT:
start = offset + op_size(op, self.opc) start = offset + instruction_size(op, self.opc)
target = self.get_target(offset, op) target = self.get_target(offset, op)
end_offset = self.restrict_to_parent(target, parent) end_offset = self.restrict_to_parent(target, parent)
if target != end_offset: if target != end_offset:
@@ -653,7 +657,7 @@ class Scanner2(Scanner):
setup_except_nest -= 1 setup_except_nest -= 1
elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT: elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT:
setup_except_nest += 1 setup_except_nest += 1
end_finally_offset += op_size(code[end_finally_offset], self.opc) end_finally_offset += instruction_size(code[end_finally_offset], self.opc)
pass pass
# Add the except blocks # Add the except blocks
@@ -866,7 +870,7 @@ class Scanner2(Scanner):
else: else:
# We still have the case in 2.7 that the next instruction # We still have the case in 2.7 that the next instruction
# is a jump to a SETUP_LOOP target. # is a jump to a SETUP_LOOP target.
next_offset = target + op_size(self.code[target], self.opc) next_offset = target + instruction_size(self.code[target], self.opc)
next_op = self.code[next_offset] next_op = self.code[next_offset]
if self.op_name(next_op) == 'JUMP_FORWARD': if self.op_name(next_op) == 'JUMP_FORWARD':
jump_target = self.get_target(next_offset, next_op) jump_target = self.get_target(next_offset, next_op)

View File

@@ -25,9 +25,8 @@ from __future__ import print_function
from collections import namedtuple from collections import namedtuple
from array import array from array import array
from uncompyle6.scanner import Scanner
from xdis.code import iscode from xdis.code import iscode
from xdis.bytecode import Bytecode, instruction_size, next_offset from xdis.bytecode import Bytecode, instruction_size
from uncompyle6.scanner import Token, parse_fn_counts from uncompyle6.scanner import Token, parse_fn_counts
import xdis import xdis
@@ -35,6 +34,8 @@ import xdis
# Get all the opcodes into globals # Get all the opcodes into globals
import xdis.opcodes.opcode_33 as op3 import xdis.opcodes.opcode_33 as op3
from uncompyle6.scanner import Scanner
import sys import sys
from uncompyle6 import PYTHON3 from uncompyle6 import PYTHON3
if PYTHON3: if PYTHON3:
@@ -42,38 +43,6 @@ if PYTHON3:
globals().update(op3.opmap) globals().update(op3.opmap)
def remove_extended_args(instructions, prev_op):
"""Go through instructions removing extended ARG.
get_instruction_bytes previously adjusted the operand values
to account for these"""
new_instructions = []
last_was_extarg = False
n = len(instructions)
for i, inst in enumerate(instructions):
if (inst.opname == 'EXTENDED_ARG' and
i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'):
last_was_extarg = True
starts_line = inst.starts_line
is_jump_target = inst.is_jump_target
offset = inst.offset
continue
if last_was_extarg:
new_inst= inst._replace(starts_line=starts_line,
is_jump_target=is_jump_target,
offset=offset)
inst = new_inst
if i < n:
j = instructions[i+1].offset
old_prev = prev_op[j]
while prev_op[j] == old_prev and j < n:
prev_op[j] = prev_op[i]
j += 1
last_was_extarg = False
new_instructions.append(inst)
return new_instructions
class Scanner3(Scanner): class Scanner3(Scanner):
def __init__(self, version, show_asm=None, is_pypy=False): def __init__(self, version, show_asm=None, is_pypy=False):
@@ -172,6 +141,41 @@ class Scanner3(Scanner):
# FIXME: remove the above in favor of: # FIXME: remove the above in favor of:
# self.varargs_ops = frozenset(self.opc.hasvargs) # self.varargs_ops = frozenset(self.opc.hasvargs)
def remove_extended_args(self, instructions):
"""Go through instructions removing extended ARG.
get_instruction_bytes previously adjusted the operand values
to account for these"""
new_instructions = []
last_was_extarg = False
n = len(instructions)
for i, inst in enumerate(instructions):
if (inst.opname == 'EXTENDED_ARG' and
i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'):
last_was_extarg = True
starts_line = inst.starts_line
is_jump_target = inst.is_jump_target
offset = inst.offset
continue
if last_was_extarg:
# j = self.stmts.index(inst.offset)
# self.lines[j] = offset
new_inst= inst._replace(starts_line=starts_line,
is_jump_target=is_jump_target,
offset=offset)
inst = new_inst
if i < n:
j = instructions[i+1].offset
old_prev = self.prev_op[j]
while self.prev_op[j] == old_prev and j < n:
self.prev_op[j] = self.prev_op[i]
j += 1
last_was_extarg = False
new_instructions.append(inst)
return new_instructions
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Pick out tokens from an uncompyle6 code object, and transform them,
@@ -203,17 +207,13 @@ class Scanner3(Scanner):
# list of tokens/instructions # list of tokens/instructions
tokens = [] tokens = []
# "customize" is a dict whose keys are nonterminals # "customize" is in the process of going away here
# and the value is the argument stack entries for that
# nonterminal. The count is a little hoaky. It is mostly
# not used, but sometimes it is.
# "customize" is a dict whose keys are nonterminals
customize = {} customize = {}
if self.is_pypy: if self.is_pypy:
customize['PyPy'] = 0 customize['PyPy'] = 0
self.build_lines_data(co) self.lines = self.build_lines_data(co)
self.build_prev_op() self.build_prev_op()
# FIXME: put as its own method? # FIXME: put as its own method?
@@ -221,7 +221,7 @@ class Scanner3(Scanner):
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
# 'LOAD_ASSERT' is used in assert statements. # 'LOAD_ASSERT' is used in assert statements.
self.load_asserts = set() self.load_asserts = set()
self.insts = remove_extended_args(list(bytecode), self.prev_op) self.insts = self.remove_extended_args(list(bytecode))
self.offset2inst_index = {} self.offset2inst_index = {}
n = len(self.insts) n = len(self.insts)
@@ -450,7 +450,7 @@ class Scanner3(Scanner):
if show_asm in ('both', 'after'): if show_asm in ('both', 'after'):
for t in tokens: for t in tokens:
print(t) print(t.format(line_prefix='L.'))
print() print()
return tokens, customize return tokens, customize
@@ -466,7 +466,7 @@ class Scanner3(Scanner):
self.linestart_offsets = set(a for (a, _) in linestarts) self.linestart_offsets = set(a for (a, _) in linestarts)
# 'List-map' which shows line number of current op and offset of # 'List-map' which shows line number of current op and offset of
# first op on following line, given offset of op as index # first op on following line, given offset of op as index
self.lines = lines = [] lines = []
LineTuple = namedtuple('LineTuple', ['l_no', 'next']) LineTuple = namedtuple('LineTuple', ['l_no', 'next'])
# Iterate through available linestarts, and fill # Iterate through available linestarts, and fill
# the data for all code offsets encountered until # the data for all code offsets encountered until
@@ -484,6 +484,7 @@ class Scanner3(Scanner):
while offset < codelen: while offset < codelen:
lines.append(LineTuple(prev_line_no, codelen)) lines.append(LineTuple(prev_line_no, codelen))
offset += 1 offset += 1
return lines
def build_prev_op(self): def build_prev_op(self):
""" """
@@ -653,27 +654,12 @@ class Scanner3(Scanner):
# Finish filling the list for last statement # Finish filling the list for last statement
slist += [codelen] * (codelen-len(slist)) slist += [codelen] * (codelen-len(slist))
def get_target(self, offset, extended_arg=0):
"""
Get next instruction offset for op located at given <offset>.
NOTE: extended_arg is no longer used
"""
inst = self.insts[self.offset2inst_index[offset]]
if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS:
target = inst.argval
else:
# No jump offset, so use fall-through offset
target = next_offset(inst.opcode, self.opc, inst.offset)
return target
def detect_control_flow(self, offset, targets, inst_index): def detect_control_flow(self, offset, targets, inst_index):
""" """
Detect structures and their boundaries to fix optimized jumps Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+ in python2.3+
""" """
# TODO: check the struct boundaries more precisely -Dan
code = self.code code = self.code
op = self.insts[inst_index].opcode op = self.insts[inst_index].opcode