You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 00:45:53 +08:00
Fix up Python 2.x's ability to get code from Python 3.x's bytecode
This commit is contained in:
BIN
test/bytecode_3.2/05_list_comprehension.pyc
Normal file
BIN
test/bytecode_3.2/05_list_comprehension.pyc
Normal file
Binary file not shown.
@@ -52,7 +52,7 @@ def load_file(filename):
|
||||
fp.close()
|
||||
return co
|
||||
|
||||
def load_module(filename):
|
||||
def load_module(filename, code_objects={}):
|
||||
"""
|
||||
load a module without importing it.
|
||||
load_module(filename: string): version, magic_int, code_object
|
||||
@@ -100,7 +100,7 @@ def load_module(filename):
|
||||
bytecode = fp.read()
|
||||
co = marshal.loads(bytecode)
|
||||
else:
|
||||
co = uncompyle6.marsh.load_code(fp, magic_int)
|
||||
co = uncompyle6.marsh.load_code(fp, magic_int, code_objects)
|
||||
pass
|
||||
|
||||
return version, timestamp, magic_int, co
|
||||
|
@@ -8,7 +8,7 @@ from uncompyle6.semantics import pysource
|
||||
from uncompyle6.load import load_module
|
||||
|
||||
def uncompyle(version, co, out=None, showasm=False, showast=False,
|
||||
timestamp=None, showgrammar=False):
|
||||
timestamp=None, showgrammar=False, code_objects={}):
|
||||
"""
|
||||
disassembles and deparses a given code block 'co'
|
||||
"""
|
||||
@@ -26,7 +26,8 @@ def uncompyle(version, co, out=None, showasm=False, showast=False,
|
||||
file=real_out)
|
||||
|
||||
try:
|
||||
pysource.deparse_code(version, co, out, showasm, showast, showgrammar)
|
||||
pysource.deparse_code(version, co, out, showasm, showast, showgrammar,
|
||||
code_objects=code_objects)
|
||||
except pysource.WalkerError as e:
|
||||
# deparsing failed
|
||||
if real_out != out:
|
||||
@@ -40,15 +41,17 @@ def uncompyle_file(filename, outstream=None, showasm=False, showast=False,
|
||||
"""
|
||||
|
||||
filename = check_object_path(filename)
|
||||
version, timestamp, magic_int, co = load_module(filename)
|
||||
code_objects = {}
|
||||
version, timestamp, magic_int, co = load_module(filename, code_objects)
|
||||
|
||||
|
||||
if type(co) == list:
|
||||
for con in co:
|
||||
uncompyle(version, con, outstream, showasm, showast,
|
||||
timestamp, showgrammar)
|
||||
timestamp, showgrammar, code_objects=code_objects)
|
||||
else:
|
||||
uncompyle(version, co, outstream, showasm, showast,
|
||||
timestamp, showgrammar)
|
||||
timestamp, showgrammar, code_objects=code_objects)
|
||||
co = None
|
||||
|
||||
def main(in_base, out_base, files, codes, outfile=None,
|
||||
|
@@ -30,7 +30,7 @@ if PYTHON3:
|
||||
def compat_str(s):
|
||||
return s.decode('utf-8', errors='ignore') if PYTHON3 else str(s)
|
||||
|
||||
def load_code(fp, magic_int):
|
||||
def load_code(fp, magic_int, code_objects={}):
|
||||
"""
|
||||
marshal.load() written in Python. When the Python bytecode magic loaded is the
|
||||
same magic for the running Python interpreter, we can simply use the
|
||||
@@ -47,9 +47,9 @@ def load_code(fp, magic_int):
|
||||
raise TypeError("File %s doesn't smell like Python bytecode" % fp.name)
|
||||
|
||||
fp.seek(seek_pos)
|
||||
return load_code_internal(fp, magic_int)
|
||||
return load_code_internal(fp, magic_int, code_objects=code_objects)
|
||||
|
||||
def load_code_internal(fp, magic_int, bytes_for_s=False):
|
||||
def load_code_internal(fp, magic_int, bytes_for_s=False, code_objects={}):
|
||||
global internStrings
|
||||
|
||||
b1 = fp.read(1)
|
||||
@@ -69,16 +69,17 @@ def load_code_internal(fp, magic_int, bytes_for_s=False):
|
||||
if 3000 < magic_int < 20121:
|
||||
fp.read(4)
|
||||
|
||||
co_code = load_code_internal(fp, magic_int, bytes_for_s=True)
|
||||
co_consts = load_code_internal(fp, magic_int)
|
||||
co_names = load_code_internal(fp, magic_int)
|
||||
co_varnames = load_code_internal(fp, magic_int)
|
||||
co_freevars = load_code_internal(fp, magic_int)
|
||||
co_cellvars = load_code_internal(fp, magic_int)
|
||||
co_filename = load_code_internal(fp, magic_int)
|
||||
co_code = load_code_internal(fp, magic_int, bytes_for_s=True,
|
||||
code_objects=code_objects)
|
||||
co_consts = load_code_internal(fp, magic_int, code_objects=code_objects)
|
||||
co_names = load_code_internal(fp, magic_int, code_objects=code_objects)
|
||||
co_varnames = load_code_internal(fp, magic_int, code_objects=code_objects)
|
||||
co_freevars = load_code_internal(fp, magic_int, code_objects=code_objects)
|
||||
co_cellvars = load_code_internal(fp, magic_int, code_objects=code_objects)
|
||||
co_filename = load_code_internal(fp, magic_int, code_objects=code_objects)
|
||||
co_name = load_code_internal(fp, magic_int)
|
||||
co_firstlineno = unpack('i', fp.read(4))[0]
|
||||
co_lnotab = load_code_internal(fp, magic_int)
|
||||
co_lnotab = load_code_internal(fp, magic_int, code_objects=code_objects)
|
||||
# The Python3 code object is different than Python2's which
|
||||
# we are reading if we get here.
|
||||
# Also various parameters which were strings are now
|
||||
@@ -87,13 +88,13 @@ def load_code_internal(fp, magic_int, bytes_for_s=False):
|
||||
if PYTHON_MAGIC_INT > 3020:
|
||||
# In later Python3 magic_ints, there is a
|
||||
# kwonlyargcount parameter which we set to 0.
|
||||
return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
|
||||
code = Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
|
||||
co_code,
|
||||
co_consts, co_names, co_varnames, co_filename, co_name,
|
||||
co_firstlineno, bytes(co_lnotab, encoding='utf-8'),
|
||||
co_freevars, co_cellvars)
|
||||
else:
|
||||
return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
|
||||
code = Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
|
||||
co_code,
|
||||
co_consts, co_names, co_varnames, co_filename, co_name,
|
||||
co_firstlineno, bytes(co_lnotab, encoding='utf-8'),
|
||||
@@ -107,9 +108,11 @@ def load_code_internal(fp, magic_int, bytes_for_s=False):
|
||||
co_varnames = tuple([str(s) if s else None for s in co_varnames])
|
||||
co_filename = str(co_filename)
|
||||
co_name = str(co_name)
|
||||
return Code(co_argcount, co_nlocals, co_stacksize, co_flags, co_code,
|
||||
code = Code(co_argcount, co_nlocals, co_stacksize, co_flags, co_code,
|
||||
co_consts, co_names, co_varnames, co_filename, co_name,
|
||||
co_firstlineno, co_lnotab, co_freevars, co_cellvars)
|
||||
code_objects[str(code)] = code
|
||||
return code
|
||||
|
||||
# const type
|
||||
elif marshalType == '.':
|
||||
@@ -177,7 +180,7 @@ def load_code_internal(fp, magic_int, bytes_for_s=False):
|
||||
tuplesize = unpack('i', fp.read(4))[0]
|
||||
ret = tuple()
|
||||
while tuplesize > 0:
|
||||
ret += load_code_internal(fp, magic_int),
|
||||
ret += load_code_internal(fp, magic_int, code_objects=code_objects),
|
||||
tuplesize -= 1
|
||||
return ret
|
||||
elif marshalType == '[':
|
||||
|
@@ -32,14 +32,6 @@ else:
|
||||
from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_33, opcode_34
|
||||
|
||||
|
||||
class GenericPythonCode:
|
||||
'''
|
||||
Class for representing code-like objects across different versions of
|
||||
Python.
|
||||
'''
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
class Code:
|
||||
'''
|
||||
Class for representing code-objects.
|
||||
@@ -321,7 +313,7 @@ if __name__ == "__main__":
|
||||
import inspect, uncompyle6
|
||||
co = inspect.currentframe().f_code
|
||||
scanner = get_scanner(uncompyle6.PYTHON_VERSION)
|
||||
tokens, customize = scanner.disassemble(co)
|
||||
tokens, customize = scanner.disassemble(co, {})
|
||||
print('-' * 30)
|
||||
for t in tokens:
|
||||
print(t)
|
||||
|
@@ -23,7 +23,7 @@ class Scanner25(scan.Scanner):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, 2.5) # check
|
||||
|
||||
def disassemble(self, co, classname=None):
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
'''
|
||||
Disassemble a code object, returning a list of 'Token'.
|
||||
|
||||
|
@@ -22,7 +22,7 @@ class Scanner26(scan.Scanner):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, 2.5) # check
|
||||
|
||||
def disassemble(self, co, classname=None):
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
'''
|
||||
Disassemble a code object, returning a list of 'Token'.
|
||||
|
||||
|
@@ -24,7 +24,7 @@ class Scanner27(scan.Scanner):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, 2.7) # check
|
||||
|
||||
def disassemble(self, co, classname=None):
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
"""
|
||||
Disassemble a code object, returning a list of 'Token'.
|
||||
The main part of this procedure is modelled after
|
||||
@@ -341,7 +341,7 @@ class Scanner27(scan.Scanner):
|
||||
|
||||
def detect_structure(self, pos, op=None):
|
||||
'''
|
||||
Detect type of block structures and their boundaries to fix optimizied jumps
|
||||
Detect type of block structures and their boundaries to fix optimized jumps
|
||||
in python2.3+
|
||||
'''
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
# Copyright (c) 2015 by Rocky Bernstein
|
||||
"""
|
||||
Python 3 Generic ytecode scanner/deparser
|
||||
Python 3 Generic bytecode scanner/deparser
|
||||
|
||||
This overlaps various Python3's dis module, but it can be run from
|
||||
Python 2 and other versions of Python. Also, we save token information
|
||||
@@ -9,7 +9,7 @@ for later use in deparsing.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import dis, re
|
||||
import dis
|
||||
from collections import namedtuple
|
||||
from array import array
|
||||
|
||||
@@ -28,7 +28,7 @@ class Scanner3(scan.Scanner):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, PYTHON_VERSION)
|
||||
|
||||
def disassemble_generic(self, co, classname=None):
|
||||
def disassemble_generic(self, co, classname=None, code_objects={}):
|
||||
"""
|
||||
Convert code object <co> into a sequence of tokens.
|
||||
|
||||
@@ -41,6 +41,7 @@ class Scanner3(scan.Scanner):
|
||||
codelen = len(code)
|
||||
self.build_lines_data(co)
|
||||
self.build_prev_op()
|
||||
self.code_objects = code_objects
|
||||
|
||||
# self.lines contains (block,addrLastInstr)
|
||||
if classname:
|
||||
@@ -117,18 +118,12 @@ class Scanner3(scan.Scanner):
|
||||
if op in hasconst:
|
||||
const = co.co_consts[oparg]
|
||||
if not PYTHON3 and isinstance(const, str):
|
||||
m = re.search('^<code object (.*) '
|
||||
'at 0x(.*), file "(.*)", line (.*)>', const)
|
||||
if m:
|
||||
const = scan.GenericPythonCode()
|
||||
const.co_name = m.group(1)
|
||||
const.co_filenaame = m.group(3)
|
||||
const.co_firstlineno = m.group(4)
|
||||
pass
|
||||
# We can't use inspect.iscode() because we may be
|
||||
if const in code_objects:
|
||||
const = code_objects[const]
|
||||
# Not sure if'we can inspect.iscode() because we may be
|
||||
# using a different version of Python than the
|
||||
# one that this was byte-compiled on. So the code
|
||||
# types may mismatch.
|
||||
# one that this was byte-compiled on. Is probably okay,
|
||||
# but we'll use hasattr instead here.
|
||||
if hasattr(const, 'co_name'):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
@@ -425,11 +420,15 @@ class Scanner3(scan.Scanner):
|
||||
|
||||
def detect_structure(self, offset):
|
||||
"""
|
||||
Detect structures and their boundaries to fix optimizied jumps
|
||||
Detect structures and their boundaries to fix optimized jumps
|
||||
in python2.3+
|
||||
"""
|
||||
|
||||
# TODO: check the struct boundaries more precisely -Dan
|
||||
|
||||
code = self.code
|
||||
op = code[offset]
|
||||
|
||||
# Detect parent structure
|
||||
parent = self.structs[0]
|
||||
start = parent['start']
|
||||
|
@@ -17,8 +17,8 @@ JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs
|
||||
|
||||
class Scanner32(scan3.Scanner3):
|
||||
|
||||
def disassemble(self, co, classname=None):
|
||||
return self.disassemble_generic(co, classname)
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
return self.disassemble_generic(co, classname, code_objects=code_objects)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import inspect
|
||||
|
@@ -17,8 +17,8 @@ JUMP_OPs = uncompyle6.opcodes.opcode_33.JUMP_OPs
|
||||
|
||||
class Scanner33(scan3.Scanner3):
|
||||
|
||||
def disassemble(self, co, classname=None):
|
||||
return self.disassemble_generic(co, classname)
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
return self.disassemble_generic(co, classname, code_objects=code_objects)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import inspect
|
||||
|
@@ -28,12 +28,13 @@ from uncompyle6.opcodes.opcode_34 import *
|
||||
|
||||
class Scanner34(scan3.Scanner3):
|
||||
|
||||
def disassemble(self, co, classname=None):
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \
|
||||
else self.disassemble_generic
|
||||
return fn(co, classname)
|
||||
return fn(co, classname, code_objects=code_objects)
|
||||
|
||||
def disassemble_built_in(self, co, classname=None):
|
||||
def disassemble_built_in(self, co, classname=None,
|
||||
code_objects={}):
|
||||
# Container for tokens
|
||||
tokens = []
|
||||
customize = {}
|
||||
|
@@ -67,7 +67,7 @@ from uncompyle6 import PYTHON3
|
||||
from uncompyle6.parser import get_python_parser
|
||||
from uncompyle6.parsers.astnode import AST
|
||||
from uncompyle6.parsers.spark import GenericASTTraversal, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
|
||||
from uncompyle6.scanner import Code, GenericPythonCode, get_scanner
|
||||
from uncompyle6.scanner import Code, get_scanner
|
||||
from uncompyle6.scanners.tok import Token, NoneToken
|
||||
import uncompyle6.parser as python_parser
|
||||
|
||||
@@ -981,10 +981,6 @@ class Walker(GenericASTTraversal, object):
|
||||
self.prec = 27
|
||||
code = node[code_index].attr
|
||||
|
||||
if isinstance(code, GenericPythonCode):
|
||||
self.write(' for i_am in ["Python 2-3 deparsing limitation"]')
|
||||
return
|
||||
|
||||
assert inspect.iscode(code)
|
||||
code = Code(code, self.scanner, self.currentclass)
|
||||
|
||||
@@ -1031,10 +1027,6 @@ class Walker(GenericASTTraversal, object):
|
||||
self.prec = 27
|
||||
code = node[code_index].attr
|
||||
|
||||
if isinstance(code, GenericPythonCode):
|
||||
self.write(' for i_am in ["Python 2-3 deparsing limitation"]')
|
||||
return
|
||||
|
||||
assert inspect.iscode(code)
|
||||
code = Code(code, self.scanner, self.currentclass)
|
||||
# assert isinstance(code, Code)
|
||||
@@ -1454,10 +1446,6 @@ class Walker(GenericASTTraversal, object):
|
||||
defparams = node[:node[-1].attr]
|
||||
code = node[code_index].attr
|
||||
|
||||
if isinstance(code, GenericPythonCode):
|
||||
self.write('(limitation="Cross Python 2/3 deparsing")')
|
||||
return
|
||||
|
||||
assert inspect.iscode(code)
|
||||
code = Code(code, self.scanner, self.currentclass)
|
||||
# assert isinstance(code, Code)
|
||||
@@ -1631,7 +1619,7 @@ class Walker(GenericASTTraversal, object):
|
||||
return ast
|
||||
|
||||
def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False,
|
||||
showgrammar=False):
|
||||
showgrammar=False, code_objects={}):
|
||||
"""
|
||||
disassembles and deparses a given code block 'co'
|
||||
"""
|
||||
@@ -1640,7 +1628,7 @@ def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False,
|
||||
# store final output stream for case of error
|
||||
scanner = get_scanner(version)
|
||||
|
||||
tokens, customize = scanner.disassemble(co)
|
||||
tokens, customize = scanner.disassemble(co, code_objects=code_objects)
|
||||
if showasm:
|
||||
for t in tokens:
|
||||
print(t)
|
||||
|
Reference in New Issue
Block a user