Sync python2 and python3 scanner/injest code more

This commit is contained in:
rocky
2018-02-25 09:42:04 -05:00
parent 6e2ca8f53d
commit 8c0f256b78
3 changed files with 37 additions and 46 deletions

View File

@@ -17,8 +17,8 @@ import sys
from uncompyle6 import PYTHON3, IS_PYPY
from uncompyle6.scanners.tok import Token
import xdis
from xdis.bytecode import op_size, extended_arg_val
from xdis.magics import py_str2float, canonic_python_version
from xdis.bytecode import op_size, extended_arg_val, next_offset
from xdis.magics import canonic_python_version
from xdis.util import code2num
# The byte code versions we support.
@@ -98,12 +98,17 @@ class Scanner(object):
# FIXME 0 isn't always correct
return offset < self.get_target(offset, 0)
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.get_argument(pos)
if op in self.opc.JREL_OPS:
target += pos + 3
def get_target(self, offset, extended_arg=0):
"""
Get next instruction offset for op located at given <offset>.
NOTE: extended_arg is no longer used
"""
inst = self.insts[self.offset2inst_index[offset]]
if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS:
target = inst.argval
else:
# No jump offset, so use fall-through offset
target = next_offset(inst.opcode, self.opc, inst.offset)
return target
def get_argument(self, pos):

View File

@@ -26,7 +26,7 @@ from collections import namedtuple
from array import array
from xdis.code import iscode
from xdis.bytecode import Bytecode, op_has_argument, op_size, instruction_size
from xdis.bytecode import Bytecode, op_has_argument, instruction_size
from xdis.util import code2num
from uncompyle6.scanner import Scanner
@@ -72,13 +72,14 @@ class Scanner2(Scanner):
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Pick out tokens from an uncompyle6 code object, and transform them,
returning a list of uncompyle6 'Token's.
returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar.
Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- some EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
@@ -112,6 +113,7 @@ class Scanner2(Scanner):
self.insts = list(bytecode)
self.offset2inst_index = {}
n = len(self.insts)
for i, inst in enumerate(self.insts):
self.offset2inst_index[inst.offset] = i
@@ -141,8 +143,10 @@ class Scanner2(Scanner):
if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3)
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets(show_asm)
# contains (code, [addrRefToCode])
# print("XXX2", jump_targets)
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
@@ -383,7 +387,7 @@ class Scanner2(Scanner):
if elem != code[i]:
match = False
break
i += op_size(code[i], self.opc)
i += instruction_size(code[i], self.opc)
if match:
i = self.prev[i]
@@ -629,7 +633,7 @@ class Scanner2(Scanner):
'start': jump_back_offset+3,
'end': loop_end_offset})
elif op == self.opc.SETUP_EXCEPT:
start = offset + op_size(op, self.opc)
start = offset + instruction_size(op, self.opc)
target = self.get_target(offset, op)
end_offset = self.restrict_to_parent(target, parent)
if target != end_offset:
@@ -653,7 +657,7 @@ class Scanner2(Scanner):
setup_except_nest -= 1
elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT:
setup_except_nest += 1
end_finally_offset += op_size(code[end_finally_offset], self.opc)
end_finally_offset += instruction_size(code[end_finally_offset], self.opc)
pass
# Add the except blocks
@@ -866,7 +870,7 @@ class Scanner2(Scanner):
else:
# We still have the case in 2.7 that the next instruction
# is a jump to a SETUP_LOOP target.
next_offset = target + op_size(self.code[target], self.opc)
next_offset = target + instruction_size(self.code[target], self.opc)
next_op = self.code[next_offset]
if self.op_name(next_op) == 'JUMP_FORWARD':
jump_target = self.get_target(next_offset, next_op)

View File

@@ -1,6 +1,6 @@
# Copyright (c) 2015-2018 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2015-2018 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
"""
Python 3 Generic bytecode scanner/deparser
@@ -25,9 +25,8 @@ from __future__ import print_function
from collections import namedtuple
from array import array
from uncompyle6.scanner import Scanner
from xdis.code import iscode
from xdis.bytecode import Bytecode, instruction_size, next_offset
from xdis.bytecode import Bytecode, instruction_size
from uncompyle6.scanner import Token, parse_fn_counts
import xdis
@@ -35,6 +34,8 @@ import xdis
# Get all the opcodes into globals
import xdis.opcodes.opcode_33 as op3
from uncompyle6.scanner import Scanner
import sys
from uncompyle6 import PYTHON3
if PYTHON3:
@@ -171,11 +172,7 @@ class Scanner3(Scanner):
# list of tokens/instructions
tokens = []
# "customize" is a dict whose keys are nonterminals
# and the value is the argument stack entries for that
# nonterminal. The count is a little hoaky. It is mostly
# not used, but sometimes it is.
# "customize" is a dict whose keys are nonterminals
# "customize" is in the process of going away here
customize = {}
if self.is_pypy:
@@ -417,7 +414,7 @@ class Scanner3(Scanner):
if show_asm in ('both', 'after'):
for t in tokens:
print(t)
print(t.format(line_prefix='L.'))
print()
return tokens, customize
@@ -620,34 +617,19 @@ class Scanner3(Scanner):
# Finish filling the list for last statement
slist += [codelen] * (codelen-len(slist))
def get_target(self, offset, extended_arg=0):
"""
Get next instruction offset for op located at given <offset>.
NOTE: extended_arg is no longer used
"""
inst = self.insts[self.offset2inst_index[offset]]
if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS:
target = inst.argval
else:
# No jump offset, so use fall-through offset
target = next_offset(inst.opcode, self.opc, inst.offset)
return target
def detect_control_flow(self, offset, targets, inst_index):
"""
Detect structures and their boundaries to fix optimized jumps
Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
op = self.insts[inst_index].opcode
# Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
start = parent['start']
end = parent['end']
# Pick inner-most parent for our offset
for struct in self.structs:
@@ -655,8 +637,8 @@ class Scanner3(Scanner):
current_end = struct['end']
if ((current_start <= offset < current_end)
and (current_start >= start and current_end <= end)):
start = current_start
end = current_end
start = current_start
end = current_end
parent = struct
if op == self.opc.SETUP_LOOP: