Add a standalone disassemble utility. __init__.py: Shun using 0, 1 for booleans

This commit is contained in:
rocky
2015-12-14 22:01:58 -05:00
parent 2d5191ba67
commit ae42f61ef0
7 changed files with 257 additions and 43 deletions

View File

@@ -62,7 +62,7 @@ DISTCLEAN_FILES = build dist *.pyc
#: Remove ALL derived files
distclean: clean
-rm -fr $(DISTCLEAN_FILES) || true
-rm -fvr $(DISTCLEAN_FILES) || true
-find . -name \*.pyc -exec rm -v {} \;
-find . -name \*.egg-info -exec rm -vr {} \;

View File

@@ -1,7 +1,8 @@
uncompyle6
==========
A Python 2.x and possibly 3.x byte-code decompiler.
A CPython 2.x and possibly 3.x byte-code disassembler and
adecompiler.
This is written in Python 2.7 but is Python3 compatible.
@@ -21,22 +22,34 @@ compiler 'spark' (http://pages.cpsc.ucalgary.ca/~aycock/spark/) and his
prior work on a tool called 'decompyle'. This was improved by Hartmut Goebel
http://www.crazy-compilers.com
*Additional note (3 July 2004):*
In order to the decompile a program, we need to be able to disassemble
it first. And this process may be useful in of itself. So we provide a
utility for just that piece as well.
This software is no longer available from the original website.
However http://www.crazy-compilers.com/decompyle/ provides a
decompilation service.
'pydisassemble' gives a CPython disassembly of Python byte-code. How
is this different than what Python already provides via the "dis"
module? Here, we can cross disassemble bytecodes from different
versions of CPython than the version of CPython that is doing the
disassembly.
*Additional note (5 June 2012):*
'pydisassemble works on the same versions as 'uncompyle6' and handles the
same sets of CPython bytecode versions.
*Note from 3 July 2004:*
This software was original available from http://www.crazy-compilers.com;
http://www.crazy-compilers.com/decompyle/ provides a decompilation service.
*Note (5 June 2012):*
The decompilation of python bytecode 2.5 & 2.6 is based on the work of
Eloi Vanderbeken. bytecode is translated to a pseudo 2.7 python bytecode
and then decompiled.
*Additional note (12 Dec 2016):*
*Note (12 Dec 2016):*
This will be used to deparse fragments of code inside my trepan_
debuggers_. For that, I need to record text fragements for all
This project will be used to deparse fragments of code inside my
trepan_ debuggers_. For that, I need to record text fragements for all
byte-code offsets (of interest). This purpose although largely
compatible with the original intention is yet a little bit different.

196
scripts/pydissassemble Executable file
View File

@@ -0,0 +1,196 @@
#!/usr/bin/env python
# Mode: -*- python -*-
#
# Copyright (c) 2015 by Rocky Bernstein <rb@dustyfeet.com>
#
"""
Usage: pydisassemble [OPTIONS]... FILE
Examples:
pydisassemble foo.pyc
pydisassemble foo.py
pydisassemble -o foo.pydis foo.pyc
pydisassemble -o /tmp foo.pyc
Options:
-o <path> output decompiled files to this path:
if multiple input files are decompiled, the common prefix
is stripped from these names and the remainder appended to
<path>
--help show this message
"""
from __future__ import print_function
Usage_short = \
"pydissassemble [--help] [--verify] [--showasm] [--showast] [-o <path>] FILE|DIR..."
import sys, os, getopt, time, types
import os.path
import uncompyle6
def disassemble_code(version, co, out=None):
"""
diassembles and deparses a given code block 'co'
"""
assert isinstance(co, types.CodeType)
# store final output stream for case of error
__real_out = out or sys.stdout
print('# Python %s' % version, file=__real_out)
if co.co_filename:
print('# Embedded file name: %s' % co.co_filename,
file=__real_out)
# Pick up appropriate scanner
if version == 2.7:
import uncompyle6.scanners.scanner27 as scan
scanner = scan.Scanner27()
elif version == 2.6:
import uncompyle6.scanners.scanner26 as scan
scanner = scan.Scanner26()
elif version == 2.5:
import uncompyle6.scanners.scanner25 as scan
scanner = scan.Scanner25()
elif version == 3.2:
import uncompyle6.scanners.scanner32 as scan
scanner = scan.Scanner32()
elif version == 3.4:
import uncompyle6.scanners.scanner34 as scan
scanner = scan.Scanner34()
scanner.setShowAsm(True, out)
tokens, customize = scanner.disassemble(co)
def disassemble_file(filename, outstream=None, showasm=False, showast=False):
"""
disassemble Python byte-code file (.pyc)
"""
version, co = uncompyle6.load_module(filename)
if type(co) == list:
for con in co:
disassemble_code(version, con, outstream)
else:
from trepan.api import debug; debug
disassemble_code(version, co, outstream)
co = None
def disassemble_files(in_base, out_base, files, outfile=None):
"""
in_base base directory for input files
out_base base directory for output files (ignored when
files list of filenames to be uncompyled (relative to src_base)
outfile write output to this filename (overwrites out_base)
For redirecting output to
- <filename> outfile=<filename> (out_base is ignored)
- files below out_base out_base=...
- stdout out_base=None, outfile=None
"""
def _get_outstream(outfile):
dir = os.path.dirname(outfile)
failed_file = outfile + '_failed'
if os.path.exists(failed_file):
os.remove(failed_file)
try:
os.makedirs(dir)
except OSError:
pass
return open(outfile, 'w')
of = outfile
if outfile == '-':
outfile = None # use stdout
elif outfile and os.path.isdir(outfile):
out_base = outfile; outfile = None
elif outfile:
out_base = outfile; outfile = None
for filename in files:
infile = os.path.join(in_base, filename)
# print (infile, file=sys.stderr)
if of: # outfile was given as parameter
outstream = _get_outstream(outfile)
elif out_base is None:
outstream = sys.stdout
else:
outfile = os.path.join(out_base, file) + '_dis'
outstream = _get_outstream(outfile)
# print(outfile, file=sys.stderr)
pass
# try to decomyple the input file
try:
disassemble_file(infile, outstream, showasm=True, showast=False)
except KeyboardInterrupt:
if outfile:
outstream.close()
os.remove(outfile)
raise
except:
if outfile:
outstream.close()
os.rename(outfile, outfile + '_failed')
else:
sys.stderr.write("\n# Can't disassemble %s\n" % infile)
import traceback
traceback.print_exc()
else: # uncompyle successfull
if outfile:
outstream.close()
if not outfile: print('\n# okay decompyling', infile)
sys.stdout.flush()
if outfile:
sys.stdout.write("\n")
sys.stdout.flush()
return
if sys.version[:3] != '2.7' and sys.version[:3] != '3.4':
print('Error: pydisassemble requires Python 2.7 or 3.4.', file=sys.stderr)
sys.exit(-1)
outfile = '-'
out_base = None
try:
opts, files = getopt.getopt(sys.argv[1:], 'ho:', ['help'])
except getopt.GetoptError as e:
print('%s: %s' % (os.path.basename(sys.argv[0]), e), file=sys.stderr)
sys.exit(-1)
for opt, val in opts:
if opt in ('-h', '--help'):
print(__doc__)
sys.exit(0)
elif opt == '-o':
outfile = val
else:
print(opt)
print(Usage_short)
sys.exit(1)
# argl, commonprefix works on strings, not on path parts,
# thus we must handle the case with files in 'some/classes'
# and 'some/cmds'
src_base = os.path.commonprefix(files)
if src_base[-1:] != os.sep:
src_base = os.path.dirname(src_base)
if src_base:
sb_len = len( os.path.join(src_base, '') )
files = [f[sb_len:] for f in files]
del sb_len
if outfile == '-':
outfile = None # use stdout
elif outfile and os.path.isdir(outfile):
out_base = outfile; outfile = None
elif outfile and len(files) > 1:
out_base = outfile; outfile = None
disassemble_files(src_base, out_base, files, outfile)

View File

@@ -3,9 +3,7 @@
#
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
#
from __future__ import print_function
'''
"""
Usage: uncompyle6 [OPTIONS]... [ FILE | DIR]...
Examples:
@@ -18,11 +16,11 @@ Options:
if multiple input files are decompiled, the common prefix
is stripped from these names and the remainder appended to
<path>
uncompyle -o /tmp bla/fasel.pyc bla/foo.pyc
uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc
-> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis
uncompyle -o /tmp bla/fasel.pyc bar/foo.pyc
uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc
-> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis
uncompyle -o /tmp /usr/lib/python1.5
uncompyle6 -o /tmp /usr/lib/python1.5
-> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis
-c <file> attempts a disassembly after compiling <file>
-d do not print timestamps
@@ -40,7 +38,9 @@ Extensions of generated files:
'.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify)
+ '_unverified' successfully decompile but --verify failed
+ '_failed' decompile failed (contact author for enhancement)
'''
"""
from __future__ import print_function
Usage_short = \
"uncompyle6 [--help] [--verify] [--showasm] [--showast] [-o <path>] FILE|DIR..."
@@ -54,7 +54,8 @@ if sys.version[:3] != '2.7' and sys.version[:3] != '3.4':
print('Error: uncompyle6 requires Python 2.7 or 3.4.', file=sys.stderr)
sys.exit(-1)
showasm = showast = do_verify = numproc = recurse_dirs = 0
showasm = showast = do_verify = recurse_dirs = False
numproc = 0
outfile = '-'
out_base = None
codes = []
@@ -73,13 +74,13 @@ for opt, val in opts:
print(__doc__)
sys.exit(0)
elif opt == '--verify':
do_verify = 1
do_verify = True
elif opt in ('--showasm', '-a'):
showasm = 1
do_verify = 0
showasm = True
do_verify = False
elif opt in ('--showast', '-t'):
showast = 1
do_verify = 0
showast = True
do_verify = False
elif opt == '-o':
outfile = val
elif opt == '-d':
@@ -89,7 +90,7 @@ for opt, val in opts:
elif opt == '-p':
numproc = int(val)
elif opt == '-r':
recurse_dirs = 1
recurse_dirs = True
else:
print(opt)
print(Usage_short)

View File

@@ -38,4 +38,4 @@ clean-unverified:
#: Clean temporary compile/decompile/verify direcotries in /tmp
clean-py-dis:
rm -fr /tmp/py-dis-* || true
rm -fvr /tmp/py-dis-* || true

View File

@@ -42,7 +42,7 @@ __all__ = ['uncompyle_file', 'main']
def _load_file(filename):
'''
load a Python source file and compile it to byte-code
_load_module(filename: string): code_object
_load_file(filename: string): code_object
filename: name of file containing Python source code
(normally a .py)
code_object: code_object compiled from this source code
@@ -58,10 +58,10 @@ def _load_file(filename):
fp.close()
return co
def _load_module(filename):
def load_module(filename):
'''
load a module without importing it
_load_module(filename: string): code_object
load_module(filename: string): code_object
filename: name of file containing Python byte-code object
(normally a .pyc)
code_object: code_object from this file
@@ -100,10 +100,10 @@ def _load_module(filename):
return version, co
def uncompyle(version, co, out=None, showasm=0, showast=0):
'''
diassembles a given code block 'co'
'''
def uncompyle(version, co, out=None, showasm=False, showast=False):
"""
diassembles and deparses a given code block 'co'
"""
assert isinstance(co, types.CodeType)
@@ -160,11 +160,11 @@ def uncompyle(version, co, out=None, showasm=0, showast=0):
if walk.ERROR:
raise walk.ERROR
def uncompyle_file(filename, outstream=None, showasm=0, showast=0):
def uncompyle_file(filename, outstream=None, showasm=False, showast=False):
"""
decompile Python byte-code file (.pyc)
"""
version, co = _load_module(filename)
version, co = load_module(filename)
if type(co) == list:
for con in co:
uncompyle(version, con, outstream, showasm, showast)
@@ -174,9 +174,10 @@ def uncompyle_file(filename, outstream=None, showasm=0, showast=0):
# ---- main ----
if sys.platform.startswith('linux') and os.uname()[2][:2] == '2.':
if sys.platform.startswith('linux') and os.uname()[2][:2] in ['2.', '3.', '4.']:
def __memUsage():
mi = open('/proc/self/stat', 'r')
from trepan.api import debug; debug()
mu = mi.readline().split()[22]
mi.close()
return int(mu) / 1000000
@@ -202,7 +203,7 @@ def status_msg(do_verify, tot_files, okay_files, failed_files,
def main(in_base, out_base, files, codes, outfile=None,
showasm=0, showast=0, do_verify=0):
showasm=False, showast=False, do_verify=False):
'''
in_base base directory for input files
out_base base directory for output files (ignored when
@@ -234,8 +235,8 @@ def main(in_base, out_base, files, codes, outfile=None,
# co = compile(f.read(), "", "exec")
# uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast)
for file in files:
infile = os.path.join(in_base, file)
for filename in files:
infile = os.path.join(in_base, filename)
# print (infile, file=sys.stderr)
if of: # outfile was given as parameter
@@ -243,7 +244,7 @@ def main(in_base, out_base, files, codes, outfile=None,
elif out_base is None:
outstream = sys.stdout
else:
outfile = os.path.join(out_base, file) + '_dis'
outfile = os.path.join(out_base, filename) + '_dis'
outstream = _get_outstream(outfile)
# print(outfile, file=sys.stderr)
@@ -282,7 +283,10 @@ def main(in_base, out_base, files, codes, outfile=None,
print(e, file=sys.stderr)
else:
okay_files += 1
if not outfile: print('\n# okay decompyling', infile, __memUsage())
if not outfile:
mess = '\n# okay decompyling'
# mem_usage = __memUsage()
print(mess, infile)
if outfile:
sys.stdout.write("%s\r" %
status_msg(do_verify, tot_files, okay_files, failed_files, verify_failed_files))

View File

@@ -338,14 +338,14 @@ class Token(scanner.Token):
def compare_code_with_srcfile(pyc_filename, src_filename):
"""Compare a .pyc with a source code file."""
version, code_obj1 = uncompyle6._load_module(pyc_filename)
version, code_obj1 = uncompyle6.load_module(pyc_filename)
code_obj2 = uncompyle6._load_file(src_filename)
cmp_code_objects(version, code_obj1, code_obj2)
def compare_files(pyc_filename1, pyc_filename2):
"""Compare two .pyc files."""
version, code_obj1 = uncompyle6._load_module(pyc_filename1)
version, code_obj2 = uncompyle6._load_module(pyc_filename2)
version, code_obj1 = uncompyle6.load_module(pyc_filename1)
version, code_obj2 = uncompyle6.load_module(pyc_filename2)
cmp_code_objects(version, code_obj1, code_obj2)
if __name__ == '__main__':