You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-04 01:09:52 +08:00
Merge branch 'master' into extended_args
This commit is contained in:
@@ -17,8 +17,8 @@ import sys
|
|||||||
from uncompyle6 import PYTHON3, IS_PYPY
|
from uncompyle6 import PYTHON3, IS_PYPY
|
||||||
from uncompyle6.scanners.tok import Token
|
from uncompyle6.scanners.tok import Token
|
||||||
import xdis
|
import xdis
|
||||||
from xdis.bytecode import op_size, extended_arg_val
|
from xdis.bytecode import instruction_size, extended_arg_val, next_offset
|
||||||
from xdis.magics import py_str2float, canonic_python_version
|
from xdis.magics import canonic_python_version
|
||||||
from xdis.util import code2num
|
from xdis.util import code2num
|
||||||
|
|
||||||
# The byte code versions we support.
|
# The byte code versions we support.
|
||||||
@@ -98,12 +98,20 @@ class Scanner(object):
|
|||||||
# FIXME 0 isn't always correct
|
# FIXME 0 isn't always correct
|
||||||
return offset < self.get_target(offset, 0)
|
return offset < self.get_target(offset, 0)
|
||||||
|
|
||||||
def get_target(self, pos, op=None):
|
def get_target(self, offset, extended_arg=0):
|
||||||
if op is None:
|
"""
|
||||||
op = self.code[pos]
|
Get next instruction offset for op located at given <offset>.
|
||||||
target = self.get_argument(pos)
|
NOTE: extended_arg is no longer used
|
||||||
if op in self.opc.JREL_OPS:
|
"""
|
||||||
target += pos + 3
|
# instructions can get moved as a result of EXTENDED_ARGS removal
|
||||||
|
if offset not in self.offset2inst_index:
|
||||||
|
offset -= instruction_size(self.opc.EXTENDED_ARG, self.opc)
|
||||||
|
inst = self.insts[self.offset2inst_index[offset]]
|
||||||
|
if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS:
|
||||||
|
target = inst.argval
|
||||||
|
else:
|
||||||
|
# No jump offset, so use fall-through offset
|
||||||
|
target = next_offset(inst.opcode, self.opc, inst.offset)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def get_argument(self, pos):
|
def get_argument(self, pos):
|
||||||
@@ -269,7 +277,7 @@ class Scanner(object):
|
|||||||
"""
|
"""
|
||||||
while start < end:
|
while start < end:
|
||||||
yield start
|
yield start
|
||||||
start += op_size(self.code[start], self.opc)
|
start += instruction_size(self.code[start], self.opc)
|
||||||
|
|
||||||
def remove_mid_line_ifs(self, ifs):
|
def remove_mid_line_ifs(self, ifs):
|
||||||
"""
|
"""
|
||||||
|
@@ -26,7 +26,7 @@ from collections import namedtuple
|
|||||||
from array import array
|
from array import array
|
||||||
|
|
||||||
from xdis.code import iscode
|
from xdis.code import iscode
|
||||||
from xdis.bytecode import Bytecode, op_has_argument, op_size, instruction_size
|
from xdis.bytecode import Bytecode, op_has_argument, instruction_size
|
||||||
from xdis.util import code2num
|
from xdis.util import code2num
|
||||||
|
|
||||||
from uncompyle6.scanner import Scanner
|
from uncompyle6.scanner import Scanner
|
||||||
@@ -72,13 +72,14 @@ class Scanner2(Scanner):
|
|||||||
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
|
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
|
||||||
"""
|
"""
|
||||||
Pick out tokens from an uncompyle6 code object, and transform them,
|
Pick out tokens from an uncompyle6 code object, and transform them,
|
||||||
returning a list of uncompyle6 'Token's.
|
returning a list of uncompyle6 Token's.
|
||||||
|
|
||||||
The transformations are made to assist the deparsing grammar.
|
The transformations are made to assist the deparsing grammar.
|
||||||
Specificially:
|
Specificially:
|
||||||
- various types of LOAD_CONST's are categorized in terms of what they load
|
- various types of LOAD_CONST's are categorized in terms of what they load
|
||||||
- COME_FROM instructions are added to assist parsing control structures
|
- COME_FROM instructions are added to assist parsing control structures
|
||||||
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
|
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
|
||||||
|
- some EXTENDED_ARGS instructions are removed
|
||||||
|
|
||||||
Also, when we encounter certain tokens, we add them to a set which will cause custom
|
Also, when we encounter certain tokens, we add them to a set which will cause custom
|
||||||
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
|
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
|
||||||
@@ -112,6 +113,7 @@ class Scanner2(Scanner):
|
|||||||
|
|
||||||
self.insts = list(bytecode)
|
self.insts = list(bytecode)
|
||||||
self.offset2inst_index = {}
|
self.offset2inst_index = {}
|
||||||
|
n = len(self.insts)
|
||||||
for i, inst in enumerate(self.insts):
|
for i, inst in enumerate(self.insts):
|
||||||
self.offset2inst_index[inst.offset] = i
|
self.offset2inst_index[inst.offset] = i
|
||||||
|
|
||||||
@@ -141,8 +143,10 @@ class Scanner2(Scanner):
|
|||||||
if names[self.get_argument(i+3)] == 'AssertionError':
|
if names[self.get_argument(i+3)] == 'AssertionError':
|
||||||
self.load_asserts.add(i+3)
|
self.load_asserts.add(i+3)
|
||||||
|
|
||||||
|
# Get jump targets
|
||||||
|
# Format: {target offset: [jump offsets]}
|
||||||
jump_targets = self.find_jump_targets(show_asm)
|
jump_targets = self.find_jump_targets(show_asm)
|
||||||
# contains (code, [addrRefToCode])
|
# print("XXX2", jump_targets)
|
||||||
|
|
||||||
last_stmt = self.next_stmt[0]
|
last_stmt = self.next_stmt[0]
|
||||||
i = self.next_stmt[last_stmt]
|
i = self.next_stmt[last_stmt]
|
||||||
@@ -383,7 +387,7 @@ class Scanner2(Scanner):
|
|||||||
if elem != code[i]:
|
if elem != code[i]:
|
||||||
match = False
|
match = False
|
||||||
break
|
break
|
||||||
i += op_size(code[i], self.opc)
|
i += instruction_size(code[i], self.opc)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
i = self.prev[i]
|
i = self.prev[i]
|
||||||
@@ -629,7 +633,7 @@ class Scanner2(Scanner):
|
|||||||
'start': jump_back_offset+3,
|
'start': jump_back_offset+3,
|
||||||
'end': loop_end_offset})
|
'end': loop_end_offset})
|
||||||
elif op == self.opc.SETUP_EXCEPT:
|
elif op == self.opc.SETUP_EXCEPT:
|
||||||
start = offset + op_size(op, self.opc)
|
start = offset + instruction_size(op, self.opc)
|
||||||
target = self.get_target(offset, op)
|
target = self.get_target(offset, op)
|
||||||
end_offset = self.restrict_to_parent(target, parent)
|
end_offset = self.restrict_to_parent(target, parent)
|
||||||
if target != end_offset:
|
if target != end_offset:
|
||||||
@@ -653,7 +657,7 @@ class Scanner2(Scanner):
|
|||||||
setup_except_nest -= 1
|
setup_except_nest -= 1
|
||||||
elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT:
|
elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT:
|
||||||
setup_except_nest += 1
|
setup_except_nest += 1
|
||||||
end_finally_offset += op_size(code[end_finally_offset], self.opc)
|
end_finally_offset += instruction_size(code[end_finally_offset], self.opc)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Add the except blocks
|
# Add the except blocks
|
||||||
@@ -866,7 +870,7 @@ class Scanner2(Scanner):
|
|||||||
else:
|
else:
|
||||||
# We still have the case in 2.7 that the next instruction
|
# We still have the case in 2.7 that the next instruction
|
||||||
# is a jump to a SETUP_LOOP target.
|
# is a jump to a SETUP_LOOP target.
|
||||||
next_offset = target + op_size(self.code[target], self.opc)
|
next_offset = target + instruction_size(self.code[target], self.opc)
|
||||||
next_op = self.code[next_offset]
|
next_op = self.code[next_offset]
|
||||||
if self.op_name(next_op) == 'JUMP_FORWARD':
|
if self.op_name(next_op) == 'JUMP_FORWARD':
|
||||||
jump_target = self.get_target(next_offset, next_op)
|
jump_target = self.get_target(next_offset, next_op)
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
# Copyright (c) 2015-2018 by Rocky Bernstein
|
# Copyright (c) 2015-2018 by Rocky Bernstein
|
||||||
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
||||||
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
||||||
"""
|
"""
|
||||||
Python 3 Generic bytecode scanner/deparser
|
Python 3 Generic bytecode scanner/deparser
|
||||||
|
|
||||||
@@ -25,9 +25,8 @@ from __future__ import print_function
|
|||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from array import array
|
from array import array
|
||||||
|
|
||||||
from uncompyle6.scanner import Scanner
|
|
||||||
from xdis.code import iscode
|
from xdis.code import iscode
|
||||||
from xdis.bytecode import Bytecode, instruction_size, next_offset
|
from xdis.bytecode import Bytecode, instruction_size
|
||||||
|
|
||||||
from uncompyle6.scanner import Token, parse_fn_counts
|
from uncompyle6.scanner import Token, parse_fn_counts
|
||||||
import xdis
|
import xdis
|
||||||
@@ -35,6 +34,8 @@ import xdis
|
|||||||
# Get all the opcodes into globals
|
# Get all the opcodes into globals
|
||||||
import xdis.opcodes.opcode_33 as op3
|
import xdis.opcodes.opcode_33 as op3
|
||||||
|
|
||||||
|
from uncompyle6.scanner import Scanner
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from uncompyle6 import PYTHON3
|
from uncompyle6 import PYTHON3
|
||||||
if PYTHON3:
|
if PYTHON3:
|
||||||
@@ -42,38 +43,6 @@ if PYTHON3:
|
|||||||
|
|
||||||
globals().update(op3.opmap)
|
globals().update(op3.opmap)
|
||||||
|
|
||||||
def remove_extended_args(instructions, prev_op):
|
|
||||||
"""Go through instructions removing extended ARG.
|
|
||||||
get_instruction_bytes previously adjusted the operand values
|
|
||||||
to account for these"""
|
|
||||||
new_instructions = []
|
|
||||||
last_was_extarg = False
|
|
||||||
n = len(instructions)
|
|
||||||
for i, inst in enumerate(instructions):
|
|
||||||
if (inst.opname == 'EXTENDED_ARG' and
|
|
||||||
i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'):
|
|
||||||
last_was_extarg = True
|
|
||||||
starts_line = inst.starts_line
|
|
||||||
is_jump_target = inst.is_jump_target
|
|
||||||
offset = inst.offset
|
|
||||||
continue
|
|
||||||
if last_was_extarg:
|
|
||||||
new_inst= inst._replace(starts_line=starts_line,
|
|
||||||
is_jump_target=is_jump_target,
|
|
||||||
offset=offset)
|
|
||||||
inst = new_inst
|
|
||||||
if i < n:
|
|
||||||
j = instructions[i+1].offset
|
|
||||||
old_prev = prev_op[j]
|
|
||||||
while prev_op[j] == old_prev and j < n:
|
|
||||||
prev_op[j] = prev_op[i]
|
|
||||||
j += 1
|
|
||||||
|
|
||||||
last_was_extarg = False
|
|
||||||
new_instructions.append(inst)
|
|
||||||
return new_instructions
|
|
||||||
|
|
||||||
|
|
||||||
class Scanner3(Scanner):
|
class Scanner3(Scanner):
|
||||||
|
|
||||||
def __init__(self, version, show_asm=None, is_pypy=False):
|
def __init__(self, version, show_asm=None, is_pypy=False):
|
||||||
@@ -172,6 +141,41 @@ class Scanner3(Scanner):
|
|||||||
# FIXME: remove the above in favor of:
|
# FIXME: remove the above in favor of:
|
||||||
# self.varargs_ops = frozenset(self.opc.hasvargs)
|
# self.varargs_ops = frozenset(self.opc.hasvargs)
|
||||||
|
|
||||||
|
def remove_extended_args(self, instructions):
|
||||||
|
"""Go through instructions removing extended ARG.
|
||||||
|
get_instruction_bytes previously adjusted the operand values
|
||||||
|
to account for these"""
|
||||||
|
new_instructions = []
|
||||||
|
last_was_extarg = False
|
||||||
|
n = len(instructions)
|
||||||
|
for i, inst in enumerate(instructions):
|
||||||
|
if (inst.opname == 'EXTENDED_ARG' and
|
||||||
|
i+1 < n and instructions[i+1].opname != 'MAKE_FUNCTION'):
|
||||||
|
last_was_extarg = True
|
||||||
|
starts_line = inst.starts_line
|
||||||
|
is_jump_target = inst.is_jump_target
|
||||||
|
offset = inst.offset
|
||||||
|
continue
|
||||||
|
if last_was_extarg:
|
||||||
|
|
||||||
|
# j = self.stmts.index(inst.offset)
|
||||||
|
# self.lines[j] = offset
|
||||||
|
|
||||||
|
new_inst= inst._replace(starts_line=starts_line,
|
||||||
|
is_jump_target=is_jump_target,
|
||||||
|
offset=offset)
|
||||||
|
inst = new_inst
|
||||||
|
if i < n:
|
||||||
|
j = instructions[i+1].offset
|
||||||
|
old_prev = self.prev_op[j]
|
||||||
|
while self.prev_op[j] == old_prev and j < n:
|
||||||
|
self.prev_op[j] = self.prev_op[i]
|
||||||
|
j += 1
|
||||||
|
|
||||||
|
last_was_extarg = False
|
||||||
|
new_instructions.append(inst)
|
||||||
|
return new_instructions
|
||||||
|
|
||||||
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
|
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
|
||||||
"""
|
"""
|
||||||
Pick out tokens from an uncompyle6 code object, and transform them,
|
Pick out tokens from an uncompyle6 code object, and transform them,
|
||||||
@@ -203,17 +207,13 @@ class Scanner3(Scanner):
|
|||||||
# list of tokens/instructions
|
# list of tokens/instructions
|
||||||
tokens = []
|
tokens = []
|
||||||
|
|
||||||
# "customize" is a dict whose keys are nonterminals
|
# "customize" is in the process of going away here
|
||||||
# and the value is the argument stack entries for that
|
|
||||||
# nonterminal. The count is a little hoaky. It is mostly
|
|
||||||
# not used, but sometimes it is.
|
|
||||||
# "customize" is a dict whose keys are nonterminals
|
|
||||||
customize = {}
|
customize = {}
|
||||||
|
|
||||||
if self.is_pypy:
|
if self.is_pypy:
|
||||||
customize['PyPy'] = 0
|
customize['PyPy'] = 0
|
||||||
|
|
||||||
self.build_lines_data(co)
|
self.lines = self.build_lines_data(co)
|
||||||
self.build_prev_op()
|
self.build_prev_op()
|
||||||
|
|
||||||
# FIXME: put as its own method?
|
# FIXME: put as its own method?
|
||||||
@@ -221,7 +221,7 @@ class Scanner3(Scanner):
|
|||||||
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
|
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
|
||||||
# 'LOAD_ASSERT' is used in assert statements.
|
# 'LOAD_ASSERT' is used in assert statements.
|
||||||
self.load_asserts = set()
|
self.load_asserts = set()
|
||||||
self.insts = remove_extended_args(list(bytecode), self.prev_op)
|
self.insts = self.remove_extended_args(list(bytecode))
|
||||||
|
|
||||||
self.offset2inst_index = {}
|
self.offset2inst_index = {}
|
||||||
n = len(self.insts)
|
n = len(self.insts)
|
||||||
@@ -450,7 +450,7 @@ class Scanner3(Scanner):
|
|||||||
|
|
||||||
if show_asm in ('both', 'after'):
|
if show_asm in ('both', 'after'):
|
||||||
for t in tokens:
|
for t in tokens:
|
||||||
print(t)
|
print(t.format(line_prefix='L.'))
|
||||||
print()
|
print()
|
||||||
return tokens, customize
|
return tokens, customize
|
||||||
|
|
||||||
@@ -466,7 +466,7 @@ class Scanner3(Scanner):
|
|||||||
self.linestart_offsets = set(a for (a, _) in linestarts)
|
self.linestart_offsets = set(a for (a, _) in linestarts)
|
||||||
# 'List-map' which shows line number of current op and offset of
|
# 'List-map' which shows line number of current op and offset of
|
||||||
# first op on following line, given offset of op as index
|
# first op on following line, given offset of op as index
|
||||||
self.lines = lines = []
|
lines = []
|
||||||
LineTuple = namedtuple('LineTuple', ['l_no', 'next'])
|
LineTuple = namedtuple('LineTuple', ['l_no', 'next'])
|
||||||
# Iterate through available linestarts, and fill
|
# Iterate through available linestarts, and fill
|
||||||
# the data for all code offsets encountered until
|
# the data for all code offsets encountered until
|
||||||
@@ -484,6 +484,7 @@ class Scanner3(Scanner):
|
|||||||
while offset < codelen:
|
while offset < codelen:
|
||||||
lines.append(LineTuple(prev_line_no, codelen))
|
lines.append(LineTuple(prev_line_no, codelen))
|
||||||
offset += 1
|
offset += 1
|
||||||
|
return lines
|
||||||
|
|
||||||
def build_prev_op(self):
|
def build_prev_op(self):
|
||||||
"""
|
"""
|
||||||
@@ -653,34 +654,19 @@ class Scanner3(Scanner):
|
|||||||
# Finish filling the list for last statement
|
# Finish filling the list for last statement
|
||||||
slist += [codelen] * (codelen-len(slist))
|
slist += [codelen] * (codelen-len(slist))
|
||||||
|
|
||||||
def get_target(self, offset, extended_arg=0):
|
|
||||||
"""
|
|
||||||
Get next instruction offset for op located at given <offset>.
|
|
||||||
NOTE: extended_arg is no longer used
|
|
||||||
"""
|
|
||||||
inst = self.insts[self.offset2inst_index[offset]]
|
|
||||||
if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS:
|
|
||||||
target = inst.argval
|
|
||||||
else:
|
|
||||||
# No jump offset, so use fall-through offset
|
|
||||||
target = next_offset(inst.opcode, self.opc, inst.offset)
|
|
||||||
return target
|
|
||||||
|
|
||||||
def detect_control_flow(self, offset, targets, inst_index):
|
def detect_control_flow(self, offset, targets, inst_index):
|
||||||
"""
|
"""
|
||||||
Detect structures and their boundaries to fix optimized jumps
|
Detect type of block structures and their boundaries to fix optimized jumps
|
||||||
in python2.3+
|
in python2.3+
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO: check the struct boundaries more precisely -Dan
|
|
||||||
|
|
||||||
code = self.code
|
code = self.code
|
||||||
op = self.insts[inst_index].opcode
|
op = self.insts[inst_index].opcode
|
||||||
|
|
||||||
# Detect parent structure
|
# Detect parent structure
|
||||||
parent = self.structs[0]
|
parent = self.structs[0]
|
||||||
start = parent['start']
|
start = parent['start']
|
||||||
end = parent['end']
|
end = parent['end']
|
||||||
|
|
||||||
# Pick inner-most parent for our offset
|
# Pick inner-most parent for our offset
|
||||||
for struct in self.structs:
|
for struct in self.structs:
|
||||||
@@ -688,8 +674,8 @@ class Scanner3(Scanner):
|
|||||||
current_end = struct['end']
|
current_end = struct['end']
|
||||||
if ((current_start <= offset < current_end)
|
if ((current_start <= offset < current_end)
|
||||||
and (current_start >= start and current_end <= end)):
|
and (current_start >= start and current_end <= end)):
|
||||||
start = current_start
|
start = current_start
|
||||||
end = current_end
|
end = current_end
|
||||||
parent = struct
|
parent = struct
|
||||||
|
|
||||||
if op == self.opc.SETUP_LOOP:
|
if op == self.opc.SETUP_LOOP:
|
||||||
|
Reference in New Issue
Block a user