You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-02 16:44:46 +08:00
README.rst: note addition of pydisassemble
Remove duplicate disassembly printing from scanners and put common code in caller(s). Show source-code line numbers in disassembly output and fix alignment of byte offsets. disas.py: workaround Python 2/3 different layouts before we get to bytecodes in a code object.
This commit is contained in:
2
PKG-INFO
2
PKG-INFO
@@ -2,7 +2,7 @@ Metadata-Version: 2.0
|
||||
Name: uncompyle6
|
||||
Version: 2.0.1
|
||||
Summary: Python byte-code to source-code converter
|
||||
Home-page: http://github.com/rocky/uncompyle6
|
||||
Home-page: http://github.com/rocky/python-uncompyle6
|
||||
Author: Rocky
|
||||
Author-email: rb@dustyfeet.com
|
||||
License: GPLv3
|
||||
|
48
README.rst
48
README.rst
@@ -1,8 +1,7 @@
|
||||
uncompyle6
|
||||
==========
|
||||
|
||||
A CPython 2.x and possibly 3.x byte-code disassembler and
|
||||
adecompiler.
|
||||
A Python 2.x and possibly 3.x byte-code decompiler.
|
||||
|
||||
This is written in Python 2.7 but is Python3 compatible.
|
||||
|
||||
@@ -10,46 +9,34 @@ This is written in Python 2.7 but is Python3 compatible.
|
||||
Introduction
|
||||
------------
|
||||
|
||||
'uncompyle6' converts Python byte-code back into equivalent Python
|
||||
_uncompyle6_ converts Python byte-code back into equivalent Python
|
||||
source code. It accepts byte-codes from Python version 2.5 to 2.7.
|
||||
It runs on Python 2.7 and, with a little more work, on Python 3 as well.
|
||||
It runs on Python 2.7 and with a little more work Python 3.
|
||||
|
||||
The generated source is fairly readable: docstrings, lists, tuples and
|
||||
hashes are somewhat pretty-printed.
|
||||
|
||||
'uncompyle6' is based on John Aycock's generic small languages
|
||||
compiler 'spark' (http://pages.cpsc.ucalgary.ca/~aycock/spark/) and his
|
||||
_uncompyle6_ is based on John Aycock's generic small languages
|
||||
compiler 'spark' (http://www.csr.uvic.ca/~aycock/python/) and his
|
||||
prior work on a tool called 'decompyle'. This was improved by Hartmut Goebel
|
||||
http://www.crazy-compilers.com
|
||||
`http://www.crazy-compilers.com/`_
|
||||
|
||||
In order to the decompile a program, we need to be able to disassemble
|
||||
it first. And this process may be useful in of itself. So we provide a
|
||||
utility for just that piece as well.
|
||||
# Additional note (3 July 2004):
|
||||
|
||||
'pydisassemble' gives a CPython disassembly of Python byte-code. How
|
||||
is this different than what Python already provides via the "dis"
|
||||
module? Here, we can cross disassemble bytecodes from different
|
||||
versions of CPython than the version of CPython that is doing the
|
||||
disassembly.
|
||||
This software is no longer available from the original website.
|
||||
However http://www.crazy-compilers.com/decompyle/ provides a
|
||||
decompilation service.
|
||||
|
||||
'pydisassemble works on the same versions as 'uncompyle6' and handles the
|
||||
same sets of CPython bytecode versions.
|
||||
|
||||
*Note from 3 July 2004:*
|
||||
|
||||
This software was original available from http://www.crazy-compilers.com;
|
||||
http://www.crazy-compilers.com/decompyle/ provides a decompilation service.
|
||||
|
||||
*Note (5 June 2012):*
|
||||
# Additional note (5 June 2012):
|
||||
|
||||
The decompilation of python bytecode 2.5 & 2.6 is based on the work of
|
||||
Eloi Vanderbeken. bytecode is translated to a pseudo 2.7 python bytecode
|
||||
and then decompiled.
|
||||
|
||||
*Note (12 Dec 2016):*
|
||||
# Additional note (12 Dec 2016):
|
||||
|
||||
This project will be used to deparse fragments of code inside my
|
||||
trepan_ debuggers_. For that, I need to record text fragements for all
|
||||
I will be using this to deparse fragments of code inside my trepan_
|
||||
debuggers_. For that, I need to record text fragements for all
|
||||
byte-code offsets (of interest). This purpose although largely
|
||||
compatible with the original intention is yet a little bit different.
|
||||
|
||||
@@ -80,8 +67,6 @@ Installation
|
||||
|
||||
This uses setup.py, so it follows the standard Python routine:
|
||||
|
||||
::
|
||||
|
||||
python setup.py install # may need sudo
|
||||
# or if you have pyenv:
|
||||
python setup.py develop
|
||||
@@ -103,18 +88,15 @@ Usage
|
||||
|
||||
Run
|
||||
|
||||
::
|
||||
|
||||
./scripts/uncompyle6 -h
|
||||
|
||||
|
||||
for usage help
|
||||
|
||||
|
||||
Known Bugs/Restrictions
|
||||
-----------------------
|
||||
|
||||
Support for Python 3 bytecode and syntax is lacking.
|
||||
Support Python 3 bytecode and syntax is lacking.
|
||||
|
||||
.. _trepan: https://pypi.python.org/pypi/trepan
|
||||
.. _debuggers: https://pypi.python.org/pypi/trepan3k
|
||||
|
@@ -38,11 +38,11 @@ def disassemble_code(version, co, out=None):
|
||||
assert isinstance(co, types.CodeType)
|
||||
|
||||
# store final output stream for case of error
|
||||
__real_out = out or sys.stdout
|
||||
print('# Python %s' % version, file=__real_out)
|
||||
real_out = out or sys.stdout
|
||||
print('# Python %s' % version, file=real_out)
|
||||
if co.co_filename:
|
||||
print('# Embedded file name: %s' % co.co_filename,
|
||||
file=__real_out)
|
||||
file=real_out)
|
||||
|
||||
# Pick up appropriate scanner
|
||||
if version == 2.7:
|
||||
@@ -63,6 +63,11 @@ def disassemble_code(version, co, out=None):
|
||||
scanner.setShowAsm(True, out)
|
||||
tokens, customize = scanner.disassemble(co)
|
||||
|
||||
for t in tokens:
|
||||
print(t, file=real_out)
|
||||
print(file=out)
|
||||
|
||||
|
||||
|
||||
def disassemble_file(filename, outstream=None, showasm=False, showast=False):
|
||||
"""
|
||||
|
@@ -81,11 +81,11 @@ def load_module(filename):
|
||||
|
||||
# print version
|
||||
fp.read(4) # timestamp
|
||||
magic_int = magics.magic2int(magic)
|
||||
|
||||
if version == PYTHON_VERSION:
|
||||
magic_int = magics.magic2int(magic)
|
||||
# Note: a higher magic number necessarily mean a later
|
||||
# release. At Pyton 3.0 the magic number decreased
|
||||
# release. At Python 3.0 the magic number decreased
|
||||
# significantly. Hence the range below. Also note
|
||||
# inclusion of the size info, occurred within a
|
||||
# Python magor/minor release. Hence the test on the
|
||||
@@ -95,7 +95,7 @@ def load_module(filename):
|
||||
bytecode = fp.read()
|
||||
co = marshal.loads(bytecode)
|
||||
else:
|
||||
co = disas.load(fp)
|
||||
co = disas.load(fp, magic_int)
|
||||
pass
|
||||
|
||||
return version, co
|
||||
@@ -108,11 +108,11 @@ def uncompyle(version, co, out=None, showasm=False, showast=False):
|
||||
assert isinstance(co, types.CodeType)
|
||||
|
||||
# store final output stream for case of error
|
||||
__real_out = out or sys.stdout
|
||||
print('# Python %s' % version, file=__real_out)
|
||||
real_out = out or sys.stdout
|
||||
print('# Python %s' % version, file=real_out)
|
||||
if co.co_filename:
|
||||
print('# Embedded file name: %s' % co.co_filename,
|
||||
file=__real_out)
|
||||
file=real_out)
|
||||
|
||||
# Pick up appropriate scanner
|
||||
if version == 2.7:
|
||||
@@ -133,12 +133,17 @@ def uncompyle(version, co, out=None, showasm=False, showast=False):
|
||||
scanner.setShowAsm(showasm, out)
|
||||
tokens, customize = scanner.disassemble(co)
|
||||
|
||||
if showasm:
|
||||
for t in tokens:
|
||||
print(t, file=real_out)
|
||||
print(file=out)
|
||||
|
||||
# Build AST from disassembly.
|
||||
walk = walker.Walker(out, scanner, showast=showast)
|
||||
try:
|
||||
ast = walk.build_ast(tokens, customize)
|
||||
except walker.ParserError as e : # parser failed, dump disassembly
|
||||
print(e, file=__real_out)
|
||||
print(e, file=real_out)
|
||||
raise
|
||||
del tokens # save memory
|
||||
|
||||
|
@@ -36,7 +36,7 @@ def marshalLoad(fp):
|
||||
internStrings = []
|
||||
return load(fp)
|
||||
|
||||
def load(fp):
|
||||
def load(fp, magic_int):
|
||||
"""
|
||||
marshal.load() written in Python. When the Python bytecode magic loaded is the
|
||||
same magic for the running Python interpreter, we can simply use the
|
||||
@@ -51,27 +51,34 @@ def load(fp):
|
||||
if marshalType == 'c':
|
||||
Code = types.CodeType
|
||||
|
||||
# FIXME If 'i' is deprecated, what would we use?
|
||||
co_argcount = unpack('i', fp.read(4))[0]
|
||||
co_nlocals = unpack('i', fp.read(4))[0]
|
||||
co_stacksize = unpack('i', fp.read(4))[0]
|
||||
co_flags = unpack('i', fp.read(4))[0]
|
||||
co_code = load(fp)
|
||||
co_consts = load(fp)
|
||||
co_names = load(fp)
|
||||
co_varnames = load(fp)
|
||||
co_freevars = load(fp)
|
||||
co_cellvars = load(fp)
|
||||
co_filename = load(fp)
|
||||
co_name = load(fp)
|
||||
# FIXME: somewhere between Python 2.7 and python 3.2 there's
|
||||
# another 4 bytes before we get to the bytecode. What's going on?
|
||||
# Again, because magic ints decreased between python 2.7 and 3.0 we need
|
||||
# a range here.
|
||||
if 3000 < magic_int < 20121:
|
||||
fp.read(4)
|
||||
co_code = load(fp, magic_int)
|
||||
co_consts = load(fp, magic_int)
|
||||
co_names = load(fp, magic_int)
|
||||
co_varnames = load(fp, magic_int)
|
||||
co_freevars = load(fp, magic_int)
|
||||
co_cellvars = load(fp, magic_int)
|
||||
co_filename = load(fp, magic_int)
|
||||
co_name = load(fp, magic_int)
|
||||
co_firstlineno = unpack('i', fp.read(4))[0]
|
||||
co_lnotab = load(fp)
|
||||
co_lnotab = load(fp, magic_int)
|
||||
# The Python3 code object is different than Python2's which
|
||||
# we are reading if we get here.
|
||||
# Also various parameters which were strings are now
|
||||
# bytes (which is probably more logical).
|
||||
if PYTHON3:
|
||||
if PYTHON_MAGIC_INT > 3020:
|
||||
# In later Python3 versions, there is a
|
||||
# In later Python3 magic_ints, there is a
|
||||
# kwonlyargcount parameter which we set to 0.
|
||||
return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
|
||||
bytes(co_code, encoding='utf-8'),
|
||||
@@ -152,7 +159,7 @@ def load(fp):
|
||||
tuplesize = unpack('i', fp.read(4))[0]
|
||||
ret = tuple()
|
||||
while tuplesize > 0:
|
||||
ret += load(fp),
|
||||
ret += load(fp, magic_int),
|
||||
tuplesize -= 1
|
||||
return ret
|
||||
elif marshalType == '[':
|
||||
|
@@ -21,7 +21,7 @@ if (sys.version_info > (3, 0)):
|
||||
else:
|
||||
L65536 = long(65536)
|
||||
|
||||
from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_34
|
||||
from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_34
|
||||
|
||||
|
||||
class Token:
|
||||
@@ -31,7 +31,7 @@ class Token:
|
||||
A byte-code token is equivalent to the contents of one line
|
||||
as output by dis.dis().
|
||||
'''
|
||||
def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=False):
|
||||
def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=None):
|
||||
self.type = intern(type_)
|
||||
self.attr = attr
|
||||
self.pattr = pattr
|
||||
@@ -51,9 +51,9 @@ class Token:
|
||||
def __str__(self):
|
||||
pattr = self.pattr
|
||||
if self.linestart:
|
||||
return '\n%s\t%-17s %r' % (self.offset, self.type, pattr)
|
||||
return '\n%4d %6s\t%-17s %r' % (self.linestart, self.offset, self.type, pattr)
|
||||
else:
|
||||
return '%s\t%-17s %r' % (self.offset, self.type, pattr)
|
||||
return ' %6s\t%-17s %r' % (self.offset, self.type, pattr)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.type)
|
||||
|
@@ -35,8 +35,14 @@ class Scanner25(scan.Scanner):
|
||||
if self.code[i] in (RETURN_VALUE, END_FINALLY):
|
||||
n = i + 1
|
||||
self.code = array('B', co.co_code[:n])
|
||||
# linestarts contains bloc code adresse (addr,block)
|
||||
|
||||
# linestarts is a tuple of (offset, line number.
|
||||
# Turn that in a has that we can index
|
||||
self.linestarts = list(dis.findlinestarts(co))
|
||||
linestartoffsets = {}
|
||||
for offset, lineno in self.linestarts:
|
||||
linestartoffsets[offset] = lineno
|
||||
|
||||
self.prev = [0]
|
||||
|
||||
# class and names
|
||||
@@ -72,7 +78,13 @@ class Scanner25(scan.Scanner):
|
||||
linestarts = self.linestarts
|
||||
self.lines = []
|
||||
linetuple = namedtuple('linetuple', ['l_no', 'next'])
|
||||
linestartoffsets = {a for (a, _) in linestarts}
|
||||
|
||||
# linestarts is a tuple of (offset, line number).
|
||||
# Turn that in a has that we can index
|
||||
linestartoffsets = {}
|
||||
for offset, lineno in linestarts:
|
||||
linestartoffsets[offset] = lineno
|
||||
|
||||
(prev_start_byte, prev_line_no) = linestarts[0]
|
||||
for (start_byte, line_no) in linestarts[1:]:
|
||||
while j < start_byte:
|
||||
@@ -202,16 +214,16 @@ class Scanner25(scan.Scanner):
|
||||
if offset in self.return_end_ifs:
|
||||
op_name = 'RETURN_END_IF'
|
||||
|
||||
if offset not in replace:
|
||||
rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets))
|
||||
if offset in linestartoffsets:
|
||||
linestart = linestartoffsets[offset]
|
||||
else:
|
||||
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
|
||||
linestart = None
|
||||
|
||||
if offset not in replace:
|
||||
rv.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
else:
|
||||
rv.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||
|
||||
if self.showasm:
|
||||
out = self.out # shortcut
|
||||
for t in rv:
|
||||
print >>out, t
|
||||
print >>out
|
||||
return rv, customize
|
||||
|
||||
def getOpcodeToDel(self, i):
|
||||
|
@@ -13,7 +13,7 @@ from operator import itemgetter
|
||||
|
||||
from uncompyle6.opcodes.opcode_26 import *
|
||||
import dis
|
||||
import scanner as scan
|
||||
import uncompyle6.scanner as scan
|
||||
|
||||
class Scanner26(scan.Scanner):
|
||||
def __init__(self):
|
||||
@@ -71,7 +71,13 @@ class Scanner26(scan.Scanner):
|
||||
linestarts = self.linestarts
|
||||
self.lines = []
|
||||
linetuple = namedtuple('linetuple', ['l_no', 'next'])
|
||||
linestartoffsets = {a for (a, _) in linestarts}
|
||||
|
||||
# linestarts is a tuple of (offset, line number).
|
||||
# Turn that in a has that we can index
|
||||
linestartoffsets = {}
|
||||
for offset, lineno in linestarts:
|
||||
linestartoffsets[offset] = lineno
|
||||
|
||||
(prev_start_byte, prev_line_no) = linestarts[0]
|
||||
for (start_byte, line_no) in linestarts[1:]:
|
||||
while j < start_byte:
|
||||
@@ -202,16 +208,16 @@ class Scanner26(scan.Scanner):
|
||||
if offset in self.return_end_ifs:
|
||||
op_name = 'RETURN_END_IF'
|
||||
|
||||
if offset not in replace:
|
||||
rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets))
|
||||
if offset in linestartoffsets:
|
||||
linestart = linestartoffsets[offset]
|
||||
else:
|
||||
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
|
||||
linestart = None
|
||||
|
||||
if offset not in replace:
|
||||
rv.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
else:
|
||||
rv.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||
|
||||
if self.showasm:
|
||||
out = self.out # shortcut
|
||||
for t in rv:
|
||||
print >>out, t
|
||||
print >>out
|
||||
return rv, customize
|
||||
|
||||
def getOpcodeToDel(self, i):
|
||||
|
@@ -46,10 +46,16 @@ class Scanner27(scan.Scanner):
|
||||
|
||||
self.lines = []
|
||||
linetuple = namedtuple('linetuple', ['l_no', 'next'])
|
||||
|
||||
j = 0
|
||||
# linestarts contains bloc code adresse (addr,block)
|
||||
|
||||
# linestarts is a tuple of (offset, line number).
|
||||
# Turn that in a has that we can index
|
||||
linestarts = list(dis.findlinestarts(co))
|
||||
linestartoffsets = {a for (a, _) in linestarts}
|
||||
linestartoffsets = {}
|
||||
for offset, lineno in linestarts:
|
||||
linestartoffsets[offset] = lineno
|
||||
|
||||
(prev_start_byte, prev_line_no) = linestarts[0]
|
||||
for (start_byte, line_no) in linestarts[1:]:
|
||||
while j < start_byte:
|
||||
@@ -190,16 +196,16 @@ class Scanner27(scan.Scanner):
|
||||
if offset in self.return_end_ifs:
|
||||
op_name = 'RETURN_END_IF'
|
||||
|
||||
if offset not in replace:
|
||||
rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets))
|
||||
if offset in linestartoffsets:
|
||||
linestart = linestartoffsets[offset]
|
||||
else:
|
||||
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
|
||||
linestart = None
|
||||
|
||||
if offset not in replace:
|
||||
rv.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
else:
|
||||
rv.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||
|
||||
if self.showasm:
|
||||
out = self.out # shortcut
|
||||
for t in rv:
|
||||
print(t, file=out)
|
||||
print(file=out)
|
||||
return rv, customize
|
||||
|
||||
def op_size(self, op):
|
||||
|
@@ -11,7 +11,7 @@ from __future__ import print_function
|
||||
import dis, marshal
|
||||
from collections import namedtuple
|
||||
|
||||
from uncompyle6.scanner import Token
|
||||
from uncompyle6.scanner import Token, L65536
|
||||
|
||||
|
||||
# Get all the opcodes into globals
|
||||
@@ -20,7 +20,7 @@ from uncompyle6.opcodes.opcode_27 import *
|
||||
import uncompyle6.scanner as scan
|
||||
|
||||
|
||||
class Scanner34(scan.Scanner):
|
||||
class Scanner32(scan.Scanner):
|
||||
def __init__(self):
|
||||
self.Token = scan.Scanner.__init__(self, 3.2) # check
|
||||
|
||||
@@ -62,14 +62,19 @@ class Scanner34(scan.Scanner):
|
||||
# w/o touching arguments
|
||||
current_token = Token(dis.opname[op])
|
||||
current_token.offset = offset
|
||||
current_token.linestart = True if offset in self.linestarts else False
|
||||
|
||||
if offset in self.linestarts:
|
||||
current_token.linestart = self.linestarts[offset]
|
||||
else:
|
||||
current_token.linestart = None
|
||||
|
||||
if op >= dis.HAVE_ARGUMENT:
|
||||
# Calculate op's argument value based on its argument and
|
||||
# preceding extended argument, if any
|
||||
oparg = code[offset+1] + code[offset+2]*256 + extended_arg
|
||||
extended_arg = 0
|
||||
if op == dis.EXTENDED_ARG:
|
||||
extended_arg = oparg*65536
|
||||
extended_arg = oparg * L65536
|
||||
|
||||
# Fill token's attr/pattr fields
|
||||
current_token.attr = oparg
|
||||
@@ -88,6 +93,7 @@ class Scanner34(scan.Scanner):
|
||||
free = co.co_cellvars + co.co_freevars
|
||||
current_token.pattr = free[oparg]
|
||||
tokens.append(current_token)
|
||||
|
||||
return tokens, customize
|
||||
|
||||
def build_lines_data(self, code_obj):
|
||||
|
@@ -11,8 +11,7 @@ from __future__ import print_function
|
||||
import dis, marshal
|
||||
from collections import namedtuple
|
||||
|
||||
from uncompyle6.scanner import Token
|
||||
|
||||
from uncompyle6.scanner import Token, L65536
|
||||
|
||||
# Get all the opcodes into globals
|
||||
globals().update(dis.opmap)
|
||||
@@ -62,14 +61,19 @@ class Scanner34(scan.Scanner):
|
||||
# w/o touching arguments
|
||||
current_token = Token(dis.opname[op])
|
||||
current_token.offset = offset
|
||||
current_token.linestart = True if offset in self.linestarts else False
|
||||
|
||||
if offset in self.linestarts:
|
||||
current_token.linestart = self.linestarts[offset]
|
||||
else:
|
||||
current_token.linestart = None
|
||||
|
||||
if op >= dis.HAVE_ARGUMENT:
|
||||
# Calculate op's argument value based on its argument and
|
||||
# preceding extended argument, if any
|
||||
oparg = code[offset+1] + code[offset+2]*256 + extended_arg
|
||||
extended_arg = 0
|
||||
if op == dis.EXTENDED_ARG:
|
||||
extended_arg = oparg*65536
|
||||
extended_arg = oparg * L65536
|
||||
|
||||
# Fill token's attr/pattr fields
|
||||
current_token.attr = oparg
|
||||
|
Reference in New Issue
Block a user