Sync python2 and python3 scanner/injest code more

This commit is contained in:
rocky
2018-02-25 09:42:04 -05:00
parent 6e2ca8f53d
commit 8c0f256b78
3 changed files with 37 additions and 46 deletions

View File

@@ -17,8 +17,8 @@ import sys
from uncompyle6 import PYTHON3, IS_PYPY from uncompyle6 import PYTHON3, IS_PYPY
from uncompyle6.scanners.tok import Token from uncompyle6.scanners.tok import Token
import xdis import xdis
from xdis.bytecode import op_size, extended_arg_val from xdis.bytecode import op_size, extended_arg_val, next_offset
from xdis.magics import py_str2float, canonic_python_version from xdis.magics import canonic_python_version
from xdis.util import code2num from xdis.util import code2num
# The byte code versions we support. # The byte code versions we support.
@@ -98,12 +98,17 @@ class Scanner(object):
# FIXME 0 isn't always correct # FIXME 0 isn't always correct
return offset < self.get_target(offset, 0) return offset < self.get_target(offset, 0)
def get_target(self, pos, op=None): def get_target(self, offset, extended_arg=0):
if op is None: """
op = self.code[pos] Get next instruction offset for op located at given <offset>.
target = self.get_argument(pos) NOTE: extended_arg is no longer used
if op in self.opc.JREL_OPS: """
target += pos + 3 inst = self.insts[self.offset2inst_index[offset]]
if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS:
target = inst.argval
else:
# No jump offset, so use fall-through offset
target = next_offset(inst.opcode, self.opc, inst.offset)
return target return target
def get_argument(self, pos): def get_argument(self, pos):

View File

@@ -26,7 +26,7 @@ from collections import namedtuple
from array import array from array import array
from xdis.code import iscode from xdis.code import iscode
from xdis.bytecode import Bytecode, op_has_argument, op_size, instruction_size from xdis.bytecode import Bytecode, op_has_argument, instruction_size
from xdis.util import code2num from xdis.util import code2num
from uncompyle6.scanner import Scanner from uncompyle6.scanner import Scanner
@@ -72,13 +72,14 @@ class Scanner2(Scanner):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Pick out tokens from an uncompyle6 code object, and transform them,
returning a list of uncompyle6 'Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. The transformations are made to assist the deparsing grammar.
Specificially: Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load - various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures - COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- some EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
@@ -112,6 +113,7 @@ class Scanner2(Scanner):
self.insts = list(bytecode) self.insts = list(bytecode)
self.offset2inst_index = {} self.offset2inst_index = {}
n = len(self.insts)
for i, inst in enumerate(self.insts): for i, inst in enumerate(self.insts):
self.offset2inst_index[inst.offset] = i self.offset2inst_index[inst.offset] = i
@@ -141,8 +143,10 @@ class Scanner2(Scanner):
if names[self.get_argument(i+3)] == 'AssertionError': if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3) self.load_asserts.add(i+3)
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets(show_asm) jump_targets = self.find_jump_targets(show_asm)
# contains (code, [addrRefToCode]) # print("XXX2", jump_targets)
last_stmt = self.next_stmt[0] last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt] i = self.next_stmt[last_stmt]
@@ -383,7 +387,7 @@ class Scanner2(Scanner):
if elem != code[i]: if elem != code[i]:
match = False match = False
break break
i += op_size(code[i], self.opc) i += instruction_size(code[i], self.opc)
if match: if match:
i = self.prev[i] i = self.prev[i]
@@ -629,7 +633,7 @@ class Scanner2(Scanner):
'start': jump_back_offset+3, 'start': jump_back_offset+3,
'end': loop_end_offset}) 'end': loop_end_offset})
elif op == self.opc.SETUP_EXCEPT: elif op == self.opc.SETUP_EXCEPT:
start = offset + op_size(op, self.opc) start = offset + instruction_size(op, self.opc)
target = self.get_target(offset, op) target = self.get_target(offset, op)
end_offset = self.restrict_to_parent(target, parent) end_offset = self.restrict_to_parent(target, parent)
if target != end_offset: if target != end_offset:
@@ -653,7 +657,7 @@ class Scanner2(Scanner):
setup_except_nest -= 1 setup_except_nest -= 1
elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT: elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT:
setup_except_nest += 1 setup_except_nest += 1
end_finally_offset += op_size(code[end_finally_offset], self.opc) end_finally_offset += instruction_size(code[end_finally_offset], self.opc)
pass pass
# Add the except blocks # Add the except blocks
@@ -866,7 +870,7 @@ class Scanner2(Scanner):
else: else:
# We still have the case in 2.7 that the next instruction # We still have the case in 2.7 that the next instruction
# is a jump to a SETUP_LOOP target. # is a jump to a SETUP_LOOP target.
next_offset = target + op_size(self.code[target], self.opc) next_offset = target + instruction_size(self.code[target], self.opc)
next_op = self.code[next_offset] next_op = self.code[next_offset]
if self.op_name(next_op) == 'JUMP_FORWARD': if self.op_name(next_op) == 'JUMP_FORWARD':
jump_target = self.get_target(next_offset, next_op) jump_target = self.get_target(next_offset, next_op)

View File

@@ -1,6 +1,6 @@
# Copyright (c) 2015-2018 by Rocky Bernstein # Copyright (c) 2015-2018 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
""" """
Python 3 Generic bytecode scanner/deparser Python 3 Generic bytecode scanner/deparser
@@ -25,9 +25,8 @@ from __future__ import print_function
from collections import namedtuple from collections import namedtuple
from array import array from array import array
from uncompyle6.scanner import Scanner
from xdis.code import iscode from xdis.code import iscode
from xdis.bytecode import Bytecode, instruction_size, next_offset from xdis.bytecode import Bytecode, instruction_size
from uncompyle6.scanner import Token, parse_fn_counts from uncompyle6.scanner import Token, parse_fn_counts
import xdis import xdis
@@ -35,6 +34,8 @@ import xdis
# Get all the opcodes into globals # Get all the opcodes into globals
import xdis.opcodes.opcode_33 as op3 import xdis.opcodes.opcode_33 as op3
from uncompyle6.scanner import Scanner
import sys import sys
from uncompyle6 import PYTHON3 from uncompyle6 import PYTHON3
if PYTHON3: if PYTHON3:
@@ -171,11 +172,7 @@ class Scanner3(Scanner):
# list of tokens/instructions # list of tokens/instructions
tokens = [] tokens = []
# "customize" is a dict whose keys are nonterminals # "customize" is in the process of going away here
# and the value is the argument stack entries for that
# nonterminal. The count is a little hoaky. It is mostly
# not used, but sometimes it is.
# "customize" is a dict whose keys are nonterminals
customize = {} customize = {}
if self.is_pypy: if self.is_pypy:
@@ -417,7 +414,7 @@ class Scanner3(Scanner):
if show_asm in ('both', 'after'): if show_asm in ('both', 'after'):
for t in tokens: for t in tokens:
print(t) print(t.format(line_prefix='L.'))
print() print()
return tokens, customize return tokens, customize
@@ -620,34 +617,19 @@ class Scanner3(Scanner):
# Finish filling the list for last statement # Finish filling the list for last statement
slist += [codelen] * (codelen-len(slist)) slist += [codelen] * (codelen-len(slist))
def get_target(self, offset, extended_arg=0):
"""
Get next instruction offset for op located at given <offset>.
NOTE: extended_arg is no longer used
"""
inst = self.insts[self.offset2inst_index[offset]]
if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS:
target = inst.argval
else:
# No jump offset, so use fall-through offset
target = next_offset(inst.opcode, self.opc, inst.offset)
return target
def detect_control_flow(self, offset, targets, inst_index): def detect_control_flow(self, offset, targets, inst_index):
""" """
Detect structures and their boundaries to fix optimized jumps Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+ in python2.3+
""" """
# TODO: check the struct boundaries more precisely -Dan
code = self.code code = self.code
op = self.insts[inst_index].opcode op = self.insts[inst_index].opcode
# Detect parent structure # Detect parent structure
parent = self.structs[0] parent = self.structs[0]
start = parent['start'] start = parent['start']
end = parent['end'] end = parent['end']
# Pick inner-most parent for our offset # Pick inner-most parent for our offset
for struct in self.structs: for struct in self.structs:
@@ -655,8 +637,8 @@ class Scanner3(Scanner):
current_end = struct['end'] current_end = struct['end']
if ((current_start <= offset < current_end) if ((current_start <= offset < current_end)
and (current_start >= start and current_end <= end)): and (current_start >= start and current_end <= end)):
start = current_start start = current_start
end = current_end end = current_end
parent = struct parent = struct
if op == self.opc.SETUP_LOOP: if op == self.opc.SETUP_LOOP: