Fix up Python 2.x's ability to get code from Python 3.x's bytecode

This commit is contained in:
rocky
2015-12-27 19:34:44 -05:00
parent d774222eb1
commit 820fdb4771
13 changed files with 58 additions and 72 deletions

Binary file not shown.

View File

@@ -52,7 +52,7 @@ def load_file(filename):
fp.close() fp.close()
return co return co
def load_module(filename): def load_module(filename, code_objects={}):
""" """
load a module without importing it. load a module without importing it.
load_module(filename: string): version, magic_int, code_object load_module(filename: string): version, magic_int, code_object
@@ -100,7 +100,7 @@ def load_module(filename):
bytecode = fp.read() bytecode = fp.read()
co = marshal.loads(bytecode) co = marshal.loads(bytecode)
else: else:
co = uncompyle6.marsh.load_code(fp, magic_int) co = uncompyle6.marsh.load_code(fp, magic_int, code_objects)
pass pass
return version, timestamp, magic_int, co return version, timestamp, magic_int, co

View File

@@ -8,7 +8,7 @@ from uncompyle6.semantics import pysource
from uncompyle6.load import load_module from uncompyle6.load import load_module
def uncompyle(version, co, out=None, showasm=False, showast=False, def uncompyle(version, co, out=None, showasm=False, showast=False,
timestamp=None, showgrammar=False): timestamp=None, showgrammar=False, code_objects={}):
""" """
disassembles and deparses a given code block 'co' disassembles and deparses a given code block 'co'
""" """
@@ -26,7 +26,8 @@ def uncompyle(version, co, out=None, showasm=False, showast=False,
file=real_out) file=real_out)
try: try:
pysource.deparse_code(version, co, out, showasm, showast, showgrammar) pysource.deparse_code(version, co, out, showasm, showast, showgrammar,
code_objects=code_objects)
except pysource.WalkerError as e: except pysource.WalkerError as e:
# deparsing failed # deparsing failed
if real_out != out: if real_out != out:
@@ -40,15 +41,17 @@ def uncompyle_file(filename, outstream=None, showasm=False, showast=False,
""" """
filename = check_object_path(filename) filename = check_object_path(filename)
version, timestamp, magic_int, co = load_module(filename) code_objects = {}
version, timestamp, magic_int, co = load_module(filename, code_objects)
if type(co) == list: if type(co) == list:
for con in co: for con in co:
uncompyle(version, con, outstream, showasm, showast, uncompyle(version, con, outstream, showasm, showast,
timestamp, showgrammar) timestamp, showgrammar, code_objects=code_objects)
else: else:
uncompyle(version, co, outstream, showasm, showast, uncompyle(version, co, outstream, showasm, showast,
timestamp, showgrammar) timestamp, showgrammar, code_objects=code_objects)
co = None co = None
def main(in_base, out_base, files, codes, outfile=None, def main(in_base, out_base, files, codes, outfile=None,

View File

@@ -30,7 +30,7 @@ if PYTHON3:
def compat_str(s): def compat_str(s):
return s.decode('utf-8', errors='ignore') if PYTHON3 else str(s) return s.decode('utf-8', errors='ignore') if PYTHON3 else str(s)
def load_code(fp, magic_int): def load_code(fp, magic_int, code_objects={}):
""" """
marshal.load() written in Python. When the Python bytecode magic loaded is the marshal.load() written in Python. When the Python bytecode magic loaded is the
same magic for the running Python interpreter, we can simply use the same magic for the running Python interpreter, we can simply use the
@@ -47,9 +47,9 @@ def load_code(fp, magic_int):
raise TypeError("File %s doesn't smell like Python bytecode" % fp.name) raise TypeError("File %s doesn't smell like Python bytecode" % fp.name)
fp.seek(seek_pos) fp.seek(seek_pos)
return load_code_internal(fp, magic_int) return load_code_internal(fp, magic_int, code_objects=code_objects)
def load_code_internal(fp, magic_int, bytes_for_s=False): def load_code_internal(fp, magic_int, bytes_for_s=False, code_objects={}):
global internStrings global internStrings
b1 = fp.read(1) b1 = fp.read(1)
@@ -69,16 +69,17 @@ def load_code_internal(fp, magic_int, bytes_for_s=False):
if 3000 < magic_int < 20121: if 3000 < magic_int < 20121:
fp.read(4) fp.read(4)
co_code = load_code_internal(fp, magic_int, bytes_for_s=True) co_code = load_code_internal(fp, magic_int, bytes_for_s=True,
co_consts = load_code_internal(fp, magic_int) code_objects=code_objects)
co_names = load_code_internal(fp, magic_int) co_consts = load_code_internal(fp, magic_int, code_objects=code_objects)
co_varnames = load_code_internal(fp, magic_int) co_names = load_code_internal(fp, magic_int, code_objects=code_objects)
co_freevars = load_code_internal(fp, magic_int) co_varnames = load_code_internal(fp, magic_int, code_objects=code_objects)
co_cellvars = load_code_internal(fp, magic_int) co_freevars = load_code_internal(fp, magic_int, code_objects=code_objects)
co_filename = load_code_internal(fp, magic_int) co_cellvars = load_code_internal(fp, magic_int, code_objects=code_objects)
co_filename = load_code_internal(fp, magic_int, code_objects=code_objects)
co_name = load_code_internal(fp, magic_int) co_name = load_code_internal(fp, magic_int)
co_firstlineno = unpack('i', fp.read(4))[0] co_firstlineno = unpack('i', fp.read(4))[0]
co_lnotab = load_code_internal(fp, magic_int) co_lnotab = load_code_internal(fp, magic_int, code_objects=code_objects)
# The Python3 code object is different than Python2's which # The Python3 code object is different than Python2's which
# we are reading if we get here. # we are reading if we get here.
# Also various parameters which were strings are now # Also various parameters which were strings are now
@@ -87,13 +88,13 @@ def load_code_internal(fp, magic_int, bytes_for_s=False):
if PYTHON_MAGIC_INT > 3020: if PYTHON_MAGIC_INT > 3020:
# In later Python3 magic_ints, there is a # In later Python3 magic_ints, there is a
# kwonlyargcount parameter which we set to 0. # kwonlyargcount parameter which we set to 0.
return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, code = Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
co_code, co_code,
co_consts, co_names, co_varnames, co_filename, co_name, co_consts, co_names, co_varnames, co_filename, co_name,
co_firstlineno, bytes(co_lnotab, encoding='utf-8'), co_firstlineno, bytes(co_lnotab, encoding='utf-8'),
co_freevars, co_cellvars) co_freevars, co_cellvars)
else: else:
return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, code = Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
co_code, co_code,
co_consts, co_names, co_varnames, co_filename, co_name, co_consts, co_names, co_varnames, co_filename, co_name,
co_firstlineno, bytes(co_lnotab, encoding='utf-8'), co_firstlineno, bytes(co_lnotab, encoding='utf-8'),
@@ -107,9 +108,11 @@ def load_code_internal(fp, magic_int, bytes_for_s=False):
co_varnames = tuple([str(s) if s else None for s in co_varnames]) co_varnames = tuple([str(s) if s else None for s in co_varnames])
co_filename = str(co_filename) co_filename = str(co_filename)
co_name = str(co_name) co_name = str(co_name)
return Code(co_argcount, co_nlocals, co_stacksize, co_flags, co_code, code = Code(co_argcount, co_nlocals, co_stacksize, co_flags, co_code,
co_consts, co_names, co_varnames, co_filename, co_name, co_consts, co_names, co_varnames, co_filename, co_name,
co_firstlineno, co_lnotab, co_freevars, co_cellvars) co_firstlineno, co_lnotab, co_freevars, co_cellvars)
code_objects[str(code)] = code
return code
# const type # const type
elif marshalType == '.': elif marshalType == '.':
@@ -177,7 +180,7 @@ def load_code_internal(fp, magic_int, bytes_for_s=False):
tuplesize = unpack('i', fp.read(4))[0] tuplesize = unpack('i', fp.read(4))[0]
ret = tuple() ret = tuple()
while tuplesize > 0: while tuplesize > 0:
ret += load_code_internal(fp, magic_int), ret += load_code_internal(fp, magic_int, code_objects=code_objects),
tuplesize -= 1 tuplesize -= 1
return ret return ret
elif marshalType == '[': elif marshalType == '[':

View File

@@ -32,14 +32,6 @@ else:
from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_33, opcode_34 from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_33, opcode_34
class GenericPythonCode:
'''
Class for representing code-like objects across different versions of
Python.
'''
def __init__(self):
return
class Code: class Code:
''' '''
Class for representing code-objects. Class for representing code-objects.
@@ -321,7 +313,7 @@ if __name__ == "__main__":
import inspect, uncompyle6 import inspect, uncompyle6
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
scanner = get_scanner(uncompyle6.PYTHON_VERSION) scanner = get_scanner(uncompyle6.PYTHON_VERSION)
tokens, customize = scanner.disassemble(co) tokens, customize = scanner.disassemble(co, {})
print('-' * 30) print('-' * 30)
for t in tokens: for t in tokens:
print(t) print(t)

View File

@@ -23,7 +23,7 @@ class Scanner25(scan.Scanner):
def __init__(self): def __init__(self):
scan.Scanner.__init__(self, 2.5) # check scan.Scanner.__init__(self, 2.5) # check
def disassemble(self, co, classname=None): def disassemble(self, co, classname=None, code_objects={}):
''' '''
Disassemble a code object, returning a list of 'Token'. Disassemble a code object, returning a list of 'Token'.

View File

@@ -22,7 +22,7 @@ class Scanner26(scan.Scanner):
def __init__(self): def __init__(self):
scan.Scanner.__init__(self, 2.5) # check scan.Scanner.__init__(self, 2.5) # check
def disassemble(self, co, classname=None): def disassemble(self, co, classname=None, code_objects={}):
''' '''
Disassemble a code object, returning a list of 'Token'. Disassemble a code object, returning a list of 'Token'.

View File

@@ -24,7 +24,7 @@ class Scanner27(scan.Scanner):
def __init__(self): def __init__(self):
scan.Scanner.__init__(self, 2.7) # check scan.Scanner.__init__(self, 2.7) # check
def disassemble(self, co, classname=None): def disassemble(self, co, classname=None, code_objects={}):
""" """
Disassemble a code object, returning a list of 'Token'. Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after The main part of this procedure is modelled after
@@ -341,7 +341,7 @@ class Scanner27(scan.Scanner):
def detect_structure(self, pos, op=None): def detect_structure(self, pos, op=None):
''' '''
Detect type of block structures and their boundaries to fix optimizied jumps Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+ in python2.3+
''' '''

View File

@@ -1,6 +1,6 @@
# Copyright (c) 2015 by Rocky Bernstein # Copyright (c) 2015 by Rocky Bernstein
""" """
Python 3 Generic ytecode scanner/deparser Python 3 Generic bytecode scanner/deparser
This overlaps various Python3's dis module, but it can be run from This overlaps various Python3's dis module, but it can be run from
Python 2 and other versions of Python. Also, we save token information Python 2 and other versions of Python. Also, we save token information
@@ -9,7 +9,7 @@ for later use in deparsing.
from __future__ import print_function from __future__ import print_function
import dis, re import dis
from collections import namedtuple from collections import namedtuple
from array import array from array import array
@@ -28,7 +28,7 @@ class Scanner3(scan.Scanner):
def __init__(self): def __init__(self):
scan.Scanner.__init__(self, PYTHON_VERSION) scan.Scanner.__init__(self, PYTHON_VERSION)
def disassemble_generic(self, co, classname=None): def disassemble_generic(self, co, classname=None, code_objects={}):
""" """
Convert code object <co> into a sequence of tokens. Convert code object <co> into a sequence of tokens.
@@ -41,6 +41,7 @@ class Scanner3(scan.Scanner):
codelen = len(code) codelen = len(code)
self.build_lines_data(co) self.build_lines_data(co)
self.build_prev_op() self.build_prev_op()
self.code_objects = code_objects
# self.lines contains (block,addrLastInstr) # self.lines contains (block,addrLastInstr)
if classname: if classname:
@@ -117,18 +118,12 @@ class Scanner3(scan.Scanner):
if op in hasconst: if op in hasconst:
const = co.co_consts[oparg] const = co.co_consts[oparg]
if not PYTHON3 and isinstance(const, str): if not PYTHON3 and isinstance(const, str):
m = re.search('^<code object (.*) ' if const in code_objects:
'at 0x(.*), file "(.*)", line (.*)>', const) const = code_objects[const]
if m: # Not sure if'we can inspect.iscode() because we may be
const = scan.GenericPythonCode()
const.co_name = m.group(1)
const.co_filenaame = m.group(3)
const.co_firstlineno = m.group(4)
pass
# We can't use inspect.iscode() because we may be
# using a different version of Python than the # using a different version of Python than the
# one that this was byte-compiled on. So the code # one that this was byte-compiled on. Is probably okay,
# types may mismatch. # but we'll use hasattr instead here.
if hasattr(const, 'co_name'): if hasattr(const, 'co_name'):
oparg = const oparg = const
if const.co_name == '<lambda>': if const.co_name == '<lambda>':
@@ -425,11 +420,15 @@ class Scanner3(scan.Scanner):
def detect_structure(self, offset): def detect_structure(self, offset):
""" """
Detect structures and their boundaries to fix optimizied jumps Detect structures and their boundaries to fix optimized jumps
in python2.3+ in python2.3+
""" """
# TODO: check the struct boundaries more precisely -Dan
code = self.code code = self.code
op = code[offset] op = code[offset]
# Detect parent structure # Detect parent structure
parent = self.structs[0] parent = self.structs[0]
start = parent['start'] start = parent['start']

View File

@@ -17,8 +17,8 @@ JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs
class Scanner32(scan3.Scanner3): class Scanner32(scan3.Scanner3):
def disassemble(self, co, classname=None): def disassemble(self, co, classname=None, code_objects={}):
return self.disassemble_generic(co, classname) return self.disassemble_generic(co, classname, code_objects=code_objects)
if __name__ == "__main__": if __name__ == "__main__":
import inspect import inspect

View File

@@ -17,8 +17,8 @@ JUMP_OPs = uncompyle6.opcodes.opcode_33.JUMP_OPs
class Scanner33(scan3.Scanner3): class Scanner33(scan3.Scanner3):
def disassemble(self, co, classname=None): def disassemble(self, co, classname=None, code_objects={}):
return self.disassemble_generic(co, classname) return self.disassemble_generic(co, classname, code_objects=code_objects)
if __name__ == "__main__": if __name__ == "__main__":
import inspect import inspect

View File

@@ -28,12 +28,13 @@ from uncompyle6.opcodes.opcode_34 import *
class Scanner34(scan3.Scanner3): class Scanner34(scan3.Scanner3):
def disassemble(self, co, classname=None): def disassemble(self, co, classname=None, code_objects={}):
fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \ fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \
else self.disassemble_generic else self.disassemble_generic
return fn(co, classname) return fn(co, classname, code_objects=code_objects)
def disassemble_built_in(self, co, classname=None): def disassemble_built_in(self, co, classname=None,
code_objects={}):
# Container for tokens # Container for tokens
tokens = [] tokens = []
customize = {} customize = {}

View File

@@ -67,7 +67,7 @@ from uncompyle6 import PYTHON3
from uncompyle6.parser import get_python_parser from uncompyle6.parser import get_python_parser
from uncompyle6.parsers.astnode import AST from uncompyle6.parsers.astnode import AST
from uncompyle6.parsers.spark import GenericASTTraversal, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from uncompyle6.parsers.spark import GenericASTTraversal, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6.scanner import Code, GenericPythonCode, get_scanner from uncompyle6.scanner import Code, get_scanner
from uncompyle6.scanners.tok import Token, NoneToken from uncompyle6.scanners.tok import Token, NoneToken
import uncompyle6.parser as python_parser import uncompyle6.parser as python_parser
@@ -981,10 +981,6 @@ class Walker(GenericASTTraversal, object):
self.prec = 27 self.prec = 27
code = node[code_index].attr code = node[code_index].attr
if isinstance(code, GenericPythonCode):
self.write(' for i_am in ["Python 2-3 deparsing limitation"]')
return
assert inspect.iscode(code) assert inspect.iscode(code)
code = Code(code, self.scanner, self.currentclass) code = Code(code, self.scanner, self.currentclass)
@@ -1031,10 +1027,6 @@ class Walker(GenericASTTraversal, object):
self.prec = 27 self.prec = 27
code = node[code_index].attr code = node[code_index].attr
if isinstance(code, GenericPythonCode):
self.write(' for i_am in ["Python 2-3 deparsing limitation"]')
return
assert inspect.iscode(code) assert inspect.iscode(code)
code = Code(code, self.scanner, self.currentclass) code = Code(code, self.scanner, self.currentclass)
# assert isinstance(code, Code) # assert isinstance(code, Code)
@@ -1454,10 +1446,6 @@ class Walker(GenericASTTraversal, object):
defparams = node[:node[-1].attr] defparams = node[:node[-1].attr]
code = node[code_index].attr code = node[code_index].attr
if isinstance(code, GenericPythonCode):
self.write('(limitation="Cross Python 2/3 deparsing")')
return
assert inspect.iscode(code) assert inspect.iscode(code)
code = Code(code, self.scanner, self.currentclass) code = Code(code, self.scanner, self.currentclass)
# assert isinstance(code, Code) # assert isinstance(code, Code)
@@ -1631,7 +1619,7 @@ class Walker(GenericASTTraversal, object):
return ast return ast
def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False, def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False,
showgrammar=False): showgrammar=False, code_objects={}):
""" """
disassembles and deparses a given code block 'co' disassembles and deparses a given code block 'co'
""" """
@@ -1640,7 +1628,7 @@ def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False,
# store final output stream for case of error # store final output stream for case of error
scanner = get_scanner(version) scanner = get_scanner(version)
tokens, customize = scanner.disassemble(co) tokens, customize = scanner.disassemble(co, code_objects=code_objects)
if showasm: if showasm:
for t in tokens: for t in tokens:
print(t) print(t)