Part of a much needed cleanup. Move semantics routines into its own

directory. Move out lots of stuff from __init__ to their own files.
Add file loading tests. Document AST handling a tad more complete.
This commit is contained in:
rocky
2015-12-20 23:03:35 -05:00
parent 6910e1b1b4
commit 9cdcdfd305
13 changed files with 456 additions and 445 deletions

View File

@@ -18,16 +18,8 @@ TEST_TYPES=check-long check-short check-2.7 check-3.4
#: Default target - same as "check"
all: check
all test check check_long:
@$(PYTHON) -V && PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \
$(MAKE) check-$$PYTHON_VERSION
#: Run working tests from Python 2.7
check-2.7: pytest
$(MAKE) -C test $@
#: Run working tests from Python 3.4
check-3.4:
#: Run working tests
check check-3.4 check-2.7: pytest
$(MAKE) -C test $@
#: Run py.test tests

View File

@@ -2,7 +2,8 @@
# Mode: -*- python -*-
#
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
#
# Copyright (c) 2015 by Rocky Bernstein
"""
Usage: uncompyle6 [OPTIONS]... [ FILE | DIR]...
@@ -41,15 +42,19 @@ Extensions of generated files:
"""
from __future__ import print_function
import sys, os, getopt
import sys, os, getopt, time
program = os.path.basename(__file__)
Usage_short = \
"%s [--help] [--verify] [--showasm] [--showast] [-o <path>] FILE|DIR..." % program
from uncompyle6 import verify, check_python_version
from uncompyle6.main import main, status_msg
def usage():
print("""usage:
%s [--help] [--verify] [--showasm] [--showast] [-o <path>] FILE|DIR...
""" % program)
sys.exit(1)
from uncompyle6 import main, status_msg, verify, check_python_version
import time
check_python_version(program)
@@ -91,9 +96,8 @@ for opt, val in opts:
elif opt == '-r':
recurse_dirs = True
else:
print(opt)
print(Usage_short)
sys.exit(1)
print(opt, file=sys.stderr)
usage()
# expand directory if specified
if recurse_dirs:
@@ -117,6 +121,11 @@ if src_base:
files = [f[sb_len:] for f in files]
del sb_len
if not files:
print("No files given", file=sys.stderr)
usage()
if outfile == '-':
outfile = None # use stdout
elif outfile and os.path.isdir(outfile):

8
pytest/test_load.py Normal file
View File

@@ -0,0 +1,8 @@
from uncompyle6.load import load_file, check_object_path, load_module
def test_load():
"""Basic test of load_file, check_object_path and load_module"""
co = load_file(__file__)
obj_path = check_object_path(__file__)
co2 = load_module(obj_path)
assert co == co2[2]

View File

@@ -24,13 +24,15 @@ check-2.7: check-bytecode check-2.7-ok
#: Run working tests from Python 3.4
check-3.4: check-bytecode
$(PYTHON) test_pythonlib.py --bytecode-3.4
#: Check deparsing only, but from a different Python version
check-disasm:
$(PYTHON) dis-compare.py
#: Check deparsing bytecode only
check-bytecode: check-bytecode-2.5 check-bytecode-2.5 check-bytecode-3.2
check-bytecode:
$(PYTHON) test_pythonlib.py --bytecode-2.5 --bytecode-2.7 --bytecode-3.2
#: Check deparsing Python 2.5
check-bytecode-2.5:

View File

@@ -31,7 +31,8 @@ from __future__ import print_function
import getopt, os, py_compile, sys, shutil, tempfile, time
from uncompyle6 import main, PYTHON_VERSION
from uncompyle6 import PYTHON_VERSION
from uncompyle6.main import main
from fnmatch import fnmatch
def get_srcdir():
@@ -212,9 +213,6 @@ if __name__ == '__main__':
print("Can't find directory %s. Skipping" % src_dir,
file=sys.stderr)
continue
if last_compile_version and last_compile_version != compiled_version:
print("Warning: mixed python version decompylation")
else:
last_compile_version = compiled_version
pass

View File

@@ -28,19 +28,10 @@
from __future__ import print_function
import imp, os, marshal, sys, types
import sys
# set before importing scanner
PYTHON3 = (sys.version_info >= (3, 0))
import uncompyle6
from uncompyle6.scanner import get_scanner
from uncompyle6.disas import check_object_path
import uncompyle6.marsh
from uncompyle6 import walker, verify, magics
sys.setrecursionlimit(5000)
# We do this crazy way to support Python 2.6 which
# doesn't support version_major, and has a bug in
# floating point so we can't divide 26 by 10 and get
@@ -48,6 +39,8 @@ sys.setrecursionlimit(5000)
PYTHON_VERSION = sys.version_info[0]+ (sys.version_info[1] / 10.0)
PYTHON_VERSION_STR = "%s.%s" % (sys.version_info[0], sys.version_info[1])
sys.setrecursionlimit(5000)
def check_python_version(program):
if not (sys.version_info[0:2] in ((2,6), (2,7), (3,4))):
print('Error: %s requires %s Python 2.6, 2.7 or 3.4' % program,
@@ -55,261 +48,12 @@ def check_python_version(program):
sys.exit(-1)
return
__all__ = ['uncompyle_file', 'main']
import uncompyle6.semantics.pysource
import uncompyle6.semantics.fragments
def _load_file(filename):
'''
load a Python source file and compile it to byte-code
_load_file(filename: string): code_object
filename: name of file containing Python source code
(normally a .py)
code_object: code_object compiled from this source code
This function does NOT write any file!
'''
fp = open(filename, 'rb')
source = fp.read().decode('utf-8') + '\n'
try:
co = compile(source, filename, 'exec', dont_inherit=True)
except SyntaxError:
print('>>Syntax error in %s\n' % filename, file= sys.stderr)
raise
fp.close()
return co
# Conventience functions so you can say:
# from uncompyle6 import deparse_code and
# from uncompyle6 import deparse_code_fragments
def load_module(filename):
"""
load a module without importing it.
load_module(filename: string): version, magic_int, code_object
filename: name of file containing Python byte-code object
(normally a .pyc)
code_object: code_object from this file
version: Python major/minor value e.g. 2.7. or 3.4
magic_int: more specific than version. The actual byte code version of the
code object
"""
with open(filename, 'rb') as fp:
magic = fp.read(4)
try:
version = float(magics.versions[magic])
except KeyError:
raise ImportError("Unknown magic number %s in %s" %
(ord(magic[0])+256*ord(magic[1]), filename))
if not (2.5 <= version <= 2.7) and not (3.2 <= version <= 3.4):
raise ImportError("This is a Python %s file! Only "
"Python 2.5 to 2.7 and 3.2 to 3.4 files are supported."
% version)
# print version
fp.read(4) # timestamp
magic_int = magics.magic2int(magic)
my_magic_int = magics.magic2int(imp.get_magic())
if my_magic_int == magic_int:
# Note: a higher magic number necessarily mean a later
# release. At Python 3.0 the magic number decreased
# significantly. Hence the range below. Also note
# inclusion of the size info, occurred within a
# Python magor/minor release. Hence the test on the
# magic value rather than PYTHON_VERSION
if 3200 <= magic_int < 20121:
fp.read(4) # size mod 2**32
bytecode = fp.read()
co = marshal.loads(bytecode)
else:
co = uncompyle6.marsh.load_code(fp, magic_int)
pass
return version, magic_int, co
def uncompyle(version, co, out=None, showasm=False, showast=False):
"""
disassembles and deparses a given code block 'co'
"""
assert isinstance(co, types.CodeType)
# store final output stream for case of error
real_out = out or sys.stdout
print('# Python %s' % version, file=real_out)
if co.co_filename:
print('# Embedded file name: %s' % co.co_filename,
file=real_out)
scanner = get_scanner(version)
tokens, customize = scanner.disassemble(co)
if showasm:
for t in tokens:
print(t, file=real_out)
print(file=out)
# Build AST from disassembly.
walk = walker.Walker(version, out, scanner, showast=showast)
try:
ast = walk.build_ast(tokens, customize)
except walker.ParserError as e : # parser failed, dump disassembly
print(e, file=real_out)
raise
del tokens # save memory
# convert leading '__doc__ = "..." into doc string
assert ast == 'stmts'
try:
if ast[0][0] == walker.ASSIGN_DOC_STRING(co.co_consts[0]):
walk.print_docstring('', co.co_consts[0])
del ast[0]
if ast[-1] == walker.RETURN_NONE:
ast.pop() # remove last node
# todo: if empty, add 'pass'
except:
pass
walk.mod_globs = walker.find_globals(ast, set())
walk.gen_source(ast, customize)
for g in walk.mod_globs:
walk.write('global %s ## Warning: Unused global' % g)
if walk.ERROR:
raise walk.ERROR
def uncompyle_file(filename, outstream=None, showasm=False, showast=False):
"""
decompile Python byte-code file (.pyc)
"""
check_object_path(filename)
version, magic_int, co = load_module(filename)
if type(co) == list:
for con in co:
uncompyle(version, con, outstream, showasm, showast)
else:
uncompyle(version, co, outstream, showasm, showast)
co = None
# ---- main ----
if sys.platform.startswith('linux') and os.uname()[2][:2] in ['2.', '3.', '4.']:
def __memUsage():
mi = open('/proc/self/stat', 'r')
mu = mi.readline().split()[22]
mi.close()
return int(mu) / 1000000
else:
def __memUsage():
return ''
def status_msg(do_verify, tot_files, okay_files, failed_files,
verify_failed_files):
if tot_files == 1:
if failed_files:
return "decompile failed"
elif verify_failed_files:
return "decompile verify failed"
else:
return "Successfully decompiled file"
pass
pass
mess = "decompiled %i files: %i okay, %i failed" % (tot_files, okay_files, failed_files)
if do_verify:
mess += (", %i verify failed" % verify_failed_files)
return mess
def main(in_base, out_base, files, codes, outfile=None,
showasm=False, showast=False, do_verify=False):
'''
in_base base directory for input files
out_base base directory for output files (ignored when
files list of filenames to be uncompyled (relative to src_base)
outfile write output to this filename (overwrites out_base)
For redirecting output to
- <filename> outfile=<filename> (out_base is ignored)
- files below out_base out_base=...
- stdout out_base=None, outfile=None
'''
def _get_outstream(outfile):
dir = os.path.dirname(outfile)
failed_file = outfile + '_failed'
if os.path.exists(failed_file):
os.remove(failed_file)
try:
os.makedirs(dir)
except OSError:
pass
return open(outfile, 'w')
of = outfile
tot_files = okay_files = failed_files = verify_failed_files = 0
# for code in codes:
# version = sys.version[:3] # "2.5"
# with open(code, "r") as f:
# co = compile(f.read(), "", "exec")
# uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast)
for filename in files:
infile = os.path.join(in_base, filename)
# print (infile, file=sys.stderr)
if of: # outfile was given as parameter
outstream = _get_outstream(outfile)
elif out_base is None:
outstream = sys.stdout
else:
outfile = os.path.join(out_base, filename) + '_dis'
outstream = _get_outstream(outfile)
# print(outfile, file=sys.stderr)
# try to decomyple the input file
try:
uncompyle_file(infile, outstream, showasm, showast)
tot_files += 1
except ValueError as e:
sys.stderr.write("\n# %s" % e)
failed_files += 1
except KeyboardInterrupt:
if outfile:
outstream.close()
os.remove(outfile)
sys.stderr.write("\nLast file: %s " % (infile))
raise
except:
failed_files += 1
if outfile:
outstream.close()
os.rename(outfile, outfile + '_failed')
else:
sys.stderr.write("\n# Can't uncompyle %s\n" % infile)
else: # uncompyle successfull
if outfile:
outstream.close()
if do_verify:
try:
msg = verify.compare_code_with_srcfile(infile, outfile)
if not outfile:
if not msg:
print('\n# okay decompyling %s' % infile)
okay_files += 1
else:
print('\n# %s\n\t%s', infile, msg)
except verify.VerifyCmpError as e:
verify_failed_files += 1
os.rename(outfile, outfile + '_unverified')
if not outfile:
print("### Error Verifiying %s" % filename, file=sys.stderr)
print(e, file=sys.stderr)
else:
okay_files += 1
if not outfile:
mess = '\n# okay decompyling'
# mem_usage = __memUsage()
print(mess, infile)
if outfile:
sys.stdout.write("%s\r" %
status_msg(do_verify, tot_files, okay_files, failed_files, verify_failed_files))
sys.stdout.flush()
if outfile:
sys.stdout.write("\n")
sys.stdout.flush()
return (tot_files, okay_files, failed_files, verify_failed_files)
deparse_code = uncompyle6.semantics.pysource.deparse_code
deparse_fragments = uncompyle6.semantics.fragments.deparse_code

View File

@@ -18,35 +18,11 @@ want to run on Python 2.7.
from __future__ import print_function
import inspect, os, py_compile, sys, tempfile
import inspect, os, sys
import uncompyle6
from uncompyle6 import PYTHON3
from uncompyle6.scanner import get_scanner
def check_object_path(path):
if path.endswith(".py"):
try:
import importlib
return importlib.util.cache_from_source(path,
optimization='')
except:
try:
import imp
imp.cache_from_source(path, debug_override=False)
except:
pass
pass
basename = os.path.basename(path)[0:-3]
spath = path if PYTHON3 else path.decode('utf-8')
path = tempfile.mkstemp(prefix=basename + '-',
suffix='.pyc', text=False)[1]
py_compile.compile(spath, cfile=path)
if not path.endswith(".pyc") and not path.endswith(".pyo"):
raise ValueError("path %s must point to a .py or .pyc file" %
path)
return path
from uncompyle6.load import check_object_path, load_module
def disco(version, co, out=None):
"""
@@ -78,7 +54,7 @@ def disassemble_file(filename, outstream=None):
try to find the corresponding compiled object.
"""
filename = check_object_path(filename)
version, magic_int, co = uncompyle6.load_module(filename)
version, magic_int, co = load_module(filename)
if type(co) == list:
for con in co:
disco(version, con, outstream)

106
uncompyle6/load.py Normal file
View File

@@ -0,0 +1,106 @@
# Copyright (c) 2000 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2015 by Rocky Bernstein
from __future__ import print_function
import imp, marshal, os, py_compile, sys, tempfile
import uncompyle6.marsh
from uncompyle6 import PYTHON3
from uncompyle6 import magics
def check_object_path(path):
if path.endswith(".py"):
try:
import importlib
return importlib.util.cache_from_source(path,
optimization='')
except:
try:
import imp
imp.cache_from_source(path, debug_override=False)
except:
pass
pass
basename = os.path.basename(path)[0:-3]
spath = path if PYTHON3 else path.decode('utf-8')
path = tempfile.mkstemp(prefix=basename + '-',
suffix='.pyc', text=False)[1]
py_compile.compile(spath, cfile=path)
if not path.endswith(".pyc") and not path.endswith(".pyo"):
raise ValueError("path %s must point to a .py or .pyc file\n" %
path)
return path
def load_file(filename):
'''
load a Python source file and compile it to byte-code
_load_file(filename: string): code_object
filename: name of file containing Python source code
(normally a .py)
code_object: code_object compiled from this source code
This function does NOT write any file!
'''
fp = open(filename, 'rb')
source = fp.read().decode('utf-8') + '\n'
try:
co = compile(source, filename, 'exec', dont_inherit=True)
except SyntaxError:
print('>>Syntax error in %s\n' % filename, file= sys.stderr)
raise
fp.close()
return co
def load_module(filename):
"""
load a module without importing it.
load_module(filename: string): version, magic_int, code_object
filename: name of file containing Python byte-code object
(normally a .pyc)
code_object: code_object from this file
version: Python major/minor value e.g. 2.7. or 3.4
magic_int: more specific than version. The actual byte code version of the
code object
"""
with open(filename, 'rb') as fp:
magic = fp.read(4)
try:
version = float(magics.versions[magic])
except KeyError:
raise ImportError("Unknown magic number %s in %s" %
(ord(magic[0])+256*ord(magic[1]), filename))
if not (2.5 <= version <= 2.7) and not (3.2 <= version <= 3.4):
raise ImportError("This is a Python %s file! Only "
"Python 2.5 to 2.7 and 3.2 to 3.4 files are supported."
% version)
# print version
fp.read(4) # timestamp
magic_int = magics.magic2int(magic)
my_magic_int = magics.magic2int(imp.get_magic())
if my_magic_int == magic_int:
# Note: a higher magic number necessarily mean a later
# release. At Python 3.0 the magic number decreased
# significantly. Hence the range below. Also note
# inclusion of the size info, occurred within a
# Python magor/minor release. Hence the test on the
# magic value rather than PYTHON_VERSION
if 3200 <= magic_int < 20121:
fp.read(4) # size mod 2**32
bytecode = fp.read()
co = marshal.loads(bytecode)
else:
co = uncompyle6.marsh.load_code(fp, magic_int)
pass
return version, magic_int, co
if __name__ == '__main__':
co = load_file(__file__)
obj_path = check_object_path(__file__)
co2 = load_module(obj_path)
assert co == co2[2]

200
uncompyle6/main.py Normal file
View File

@@ -0,0 +1,200 @@
from __future__ import print_function
import os, sys, types
from uncompyle6.disas import check_object_path
from uncompyle6 import verify
from uncompyle6.semantics import pysource
from uncompyle6.scanner import get_scanner
from uncompyle6.load import load_module
# FIXME: remove duplicate code from deparse_code
def uncompyle(version, co, out=None, showasm=False, showast=False):
"""
disassembles and deparses a given code block 'co'
"""
assert isinstance(co, types.CodeType)
# store final output stream for case of error
real_out = out or sys.stdout
print('# Python %s' % version, file=real_out)
if co.co_filename:
print('# Embedded file name: %s' % co.co_filename,
file=real_out)
scanner = get_scanner(version)
tokens, customize = scanner.disassemble(co)
if showasm:
for t in tokens:
print(t, file=real_out)
print(file=out)
# Build AST from disassembly.
walk = pysource.Walker(version, out, scanner, showast=showast)
try:
ast = walk.build_ast(tokens, customize)
except pysource.ParserError as e : # parser failed, dump disassembly
print(e, file=real_out)
raise
del tokens # save memory
# convert leading '__doc__ = "..." into doc string
assert ast == 'stmts'
try:
if ast[0][0] == pysource.ASSIGN_DOC_STRING(co.co_consts[0]):
walk.print_docstring('', co.co_consts[0])
del ast[0]
if ast[-1] == pysource.RETURN_NONE:
ast.pop() # remove last node
# todo: if empty, add 'pass'
except:
pass
walk.mod_globs = pysource.find_globals(ast, set())
walk.gen_source(ast, customize)
for g in walk.mod_globs:
walk.write('global %s ## Warning: Unused global' % g)
if walk.ERROR:
raise walk.ERROR
def uncompyle_file(filename, outstream=None, showasm=False, showast=False):
"""
decompile Python byte-code file (.pyc)
"""
check_object_path(filename)
version, magic_int, co = load_module(filename)
if type(co) == list:
for con in co:
uncompyle(version, con, outstream, showasm, showast)
else:
uncompyle(version, co, outstream, showasm, showast)
co = None
def main(in_base, out_base, files, codes, outfile=None,
showasm=False, showast=False, do_verify=False):
'''
in_base base directory for input files
out_base base directory for output files (ignored when
files list of filenames to be uncompyled (relative to src_base)
outfile write output to this filename (overwrites out_base)
For redirecting output to
- <filename> outfile=<filename> (out_base is ignored)
- files below out_base out_base=...
- stdout out_base=None, outfile=None
'''
def _get_outstream(outfile):
dir = os.path.dirname(outfile)
failed_file = outfile + '_failed'
if os.path.exists(failed_file):
os.remove(failed_file)
try:
os.makedirs(dir)
except OSError:
pass
return open(outfile, 'w')
of = outfile
tot_files = okay_files = failed_files = verify_failed_files = 0
# for code in codes:
# version = sys.version[:3] # "2.5"
# with open(code, "r") as f:
# co = compile(f.read(), "", "exec")
# uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast)
for filename in files:
infile = os.path.join(in_base, filename)
# print (infile, file=sys.stderr)
if of: # outfile was given as parameter
outstream = _get_outstream(outfile)
elif out_base is None:
outstream = sys.stdout
else:
outfile = os.path.join(out_base, filename) + '_dis'
outstream = _get_outstream(outfile)
# print(outfile, file=sys.stderr)
# try to decomyple the input file
try:
uncompyle_file(infile, outstream, showasm, showast)
tot_files += 1
except ValueError as e:
sys.stderr.write("\n# %s" % e)
failed_files += 1
except KeyboardInterrupt:
if outfile:
outstream.close()
os.remove(outfile)
sys.stderr.write("\nLast file: %s " % (infile))
raise
except:
failed_files += 1
if outfile:
outstream.close()
os.rename(outfile, outfile + '_failed')
else:
sys.stderr.write("\n# Can't uncompyle %s\n" % infile)
else: # uncompyle successfull
if outfile:
outstream.close()
if do_verify:
try:
msg = verify.compare_code_with_srcfile(infile, outfile)
if not outfile:
if not msg:
print('\n# okay decompyling %s' % infile)
okay_files += 1
else:
print('\n# %s\n\t%s', infile, msg)
except verify.VerifyCmpError as e:
verify_failed_files += 1
os.rename(outfile, outfile + '_unverified')
if not outfile:
print("### Error Verifiying %s" % filename, file=sys.stderr)
print(e, file=sys.stderr)
else:
okay_files += 1
if not outfile:
mess = '\n# okay decompyling'
# mem_usage = __memUsage()
print(mess, infile)
if outfile:
sys.stdout.write("%s\r" %
status_msg(do_verify, tot_files, okay_files, failed_files, verify_failed_files))
sys.stdout.flush()
if outfile:
sys.stdout.write("\n")
sys.stdout.flush()
return (tot_files, okay_files, failed_files, verify_failed_files)
# ---- main ----
if sys.platform.startswith('linux') and os.uname()[2][:2] in ['2.', '3.', '4.']:
def __memUsage():
mi = open('/proc/self/stat', 'r')
mu = mi.readline().split()[22]
mi.close()
return int(mu) / 1000000
else:
def __memUsage():
return ''
def status_msg(do_verify, tot_files, okay_files, failed_files,
verify_failed_files):
if tot_files == 1:
if failed_files:
return "decompile failed"
elif verify_failed_files:
return "decompile verify failed"
else:
return "Successfully decompiled file"
pass
pass
mess = "decompiled %i files: %i okay, %i failed" % (tot_files, okay_files, failed_files)
if do_verify:
mess += (", %i verify failed" % verify_failed_files)
return mess

View File

View File

@@ -2,61 +2,28 @@
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2015 by Rocky Bernstein
# See LICENSE for license
"""
Deparsing saving text fragment information indexed by offset
Creates Python source code from an uncompyle6 abstract syntax tree,
and indexes fragments which can be accessed by instruction offset
address.
Decompilation (walking AST)
All table-driven. (rocky: well, mostly. I need to add more format
specifiers for say duplicating info from one node to another.)
Step 1 determines a table (T) and a path to a
table key (K) from the node type (N) (other nodes are shown as O):
N N N&K
/ | ... \ / | ... \ / | ... \
O O O O O K O O O
|
K
MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT)
The default is a direct mapping. The key K is then extracted from the
subtree and used to find a table entry T[K], if any. The result is a
format string and arguments (a la printf()) for the formatting engine.
Escapes in the format string are:
%c evaluate N[A] recursively*
%C evaluate N[A[0]]..N[A[1]-1] recursively, separate by A[2]*
%P same as %C but sets operator precedence
%, print ',' if last %C only printed one item (for tuples--unused)
%| tab to current indentation level
%+ increase current indentation level
%- decrease current indentation level
%{...} evaluate ... in context of N
%% literal '%'
%p evaluate N setting precedence
* indicates an argument (A) required.
The '%' may optionally be followed by a number (C) in square brackets, which
makes the engine walk down to N[C] before evaluating the escape code.
See the comments in pysource for information on the abstract sytax tree
and how semantic actions are written.
"""
# FIXME: DRY code with pysource
from __future__ import print_function
import inspect, re, sys
from uncompyle6 import PYTHON3
from uncompyle6 import walker
from uncompyle6.semantics import pysource
from uncompyle6.parser import get_python_parser
from uncompyle6.walker import escape, PRECEDENCE, minint
from uncompyle6.walker import AST, NONE, find_all_globals
from uncompyle6.walker import find_globals, find_none, INDENT_PER_LEVEL
from uncompyle6.walker import ParserError
from uncompyle6.semantics.pysource import escape, PRECEDENCE, minint
from uncompyle6.semantics.pysource import AST, NONE, find_all_globals
from uncompyle6.semantics.pysource import find_globals, find_none, INDENT_PER_LEVEL
from uncompyle6.semantics.pysource import ParserError
from uncompyle6 import parser
from uncompyle6.scanner import Token, Code, get_scanner
@@ -67,8 +34,7 @@ else:
from itertools import izip_longest as zip_longest
from StringIO import StringIO
# FIXME: remove uncompyle dups
# from uncompyle6.walker import find_all_globals, find_globals, find_none
from uncompyle6.parsers.spark import GenericASTTraversal, GenericASTTraversalPruningException
from types import CodeType
@@ -77,7 +43,7 @@ NodeInfo = namedtuple("NodeInfo", "node start finish")
ExtractInfo = namedtuple("ExtractInfo",
"lineNo lineStartOffset markerLine selectedLine selectedText")
class Traverser(walker.Walker, object):
class Traverser(pysource.Walker, object):
stacked_params = ('f', 'indent', 'isLambda', '_globals')
def __init__(self, version, scanner, showast=False):
@@ -1186,7 +1152,7 @@ class Traverser(walker.Walker, object):
pass
def deparse(version, co, out=StringIO(), showasm=False, showast=False):
def deparse_code(version, co, out=StringIO(), showasm=False, showast=False):
assert inspect.iscode(co)
# store final output stream for case of error
__real_out = out or sys.stdout
@@ -1199,7 +1165,7 @@ def deparse(version, co, out=StringIO(), showasm=False, showast=False):
try:
walk.ast = walk.build_ast_d(tokens, customize)
except walker.ParserError as e : # parser failed, dump disassembly
except pysource.ParserError as e : # parser failed, dump disassembly
print(e, file=__real_out)
raise
@@ -1207,7 +1173,7 @@ def deparse(version, co, out=StringIO(), showasm=False, showast=False):
# convert leading '__doc__ = "..." into doc string
assert walk.ast == 'stmts'
walk.mod_globs = walker.find_globals(walk.ast, set())
walk.mod_globs = pysource.find_globals(walk.ast, set())
walk.gen_source_d(walk.ast, co.co_name, customize)
walk.set_pos_info(walk.ast, 0, len(walk.text))
walk.fixup_parents(walk.ast, None)
@@ -1219,70 +1185,52 @@ def deparse(version, co, out=StringIO(), showasm=False, showast=False):
return walk
if __name__ == '__main__':
# if __name__ == '__main__':
def deparse_test(co):
sys_version = sys.version_info.major + (sys.version_info.minor / 10.0)
walk = deparse(sys_version, co, showasm=True, showast=True)
print("deparsed source")
print(walk.text, "\n")
print('------------------------')
for name, offset in sorted(walk.offsets.keys()):
print("name %s, offset %s" % (name, offset))
nodeInfo = walk.offsets[name, offset]
node = nodeInfo.node
extractInfo = walk.extract_node_info(node)
print("code: %s" % node.type)
# print extractInfo
print(extractInfo.selectedText)
print(extractInfo.selectedLine)
print(extractInfo.markerLine)
extractInfo, p = walk.extract_parent_info(node)
if extractInfo:
print("Contained in...")
print(extractInfo.selectedLine)
print(extractInfo.markerLine)
print("code: %s" % p.type)
print('=' * 40)
pass
pass
return
# def deparse_test(co):
# sys_version = sys.version_info.major + (sys.version_info.minor / 10.0)
# walk = deparse_code(sys_version, co, showasm=False, showast=False)
# print("deparsed source")
# print(walk.text, "\n")
# print('------------------------')
# for name, offset in sorted(walk.offsets.keys(),
# key=lambda x: str(x[0])):
# print("name %s, offset %s" % (name, offset))
# nodeInfo = walk.offsets[name, offset]
# node = nodeInfo.node
# extractInfo = walk.extract_node_info(node)
# print("code: %s" % node.type)
# # print extractInfo
# print(extractInfo.selectedText)
# print(extractInfo.selectedLine)
# print(extractInfo.markerLine)
# extractInfo, p = walk.extract_parent_info(node)
# if extractInfo:
# print("Contained in...")
# print(extractInfo.selectedLine)
# print(extractInfo.markerLine)
# print("code: %s" % p.type)
# print('=' * 40)
# pass
# pass
# return
def get_code_for_fn(fn):
return fn.__code__
# def get_code_for_fn(fn):
# return fn.__code__
def foo(a, **options):
def bar(a, b=1, c=2):
print("a, b, c= ", a, int(b), c)
bar(a, **options)
options = {'c': 5, 'b': 10}
bar(a, **options)
return None
# def gcd(a, b):
# if a > b:
# (a, b) = (b, a)
# pass
def check_args(args):
deparse_test(inspect.currentframe().f_code)
for i in range(2):
try:
i = int(args[i])
except ValueError:
print("** Expecting an integer, got: %s" % repr(args[i]))
sys.exit(2)
pass
pass
# if a <= 0:
# return None
# if a == 1 or a == b:
# return a
# return gcd(b-a, a)
def gcd(a, b):
if a > b:
(a, b) = (b, a)
pass
if a <= 0:
return None
if a == 1 or a == b:
return a
return gcd(b-a, a)
# check_args(['3', '5'])
deparse_test(get_code_for_fn(gcd))
# deparse_test(get_code_for_fn(gcd))
# deparse_test(get_code_for_fn(Traverser.fixup_offsets))
# deparse_test(inspect.currentframe().f_code)
# # check_args(['3', '5'])
# deparse_test(get_code_for_fn(gcd))
# # deparse_test(get_code_for_fn(gcd))
# # deparse_test(get_code_for_fn(Traverser.fixup_offsets))
# # deparse_test(inspect.currentframe().f_code)

View File

@@ -1,11 +1,31 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2015 by Rocky Bernstein
"""
Decompilation (walking AST)
Creates Python source code from an uncompyle6 abstract syntax tree.
All table-driven. Step 1 determines a table (T) and a path to a
The terminal symbols are CPython bytecode instructions. (See the
python documentation under module "dis" for a list of instructions
and what they mean).
Upper levels of the grammar is a more-or-less conventional grammar for
Python.
Semantic action rules for nonterminal symbols can be table driven.
This mechanism uses a printf-like syntax to direct substitution from
attributes of the nonterminal and its children..
The other way to specify a semantic rule is to create a method
prefaced with "n_" for that nonterminal. For example, "n_exec_stmt"
handles the semantic actions for the "exec_smnt" nonterminal symbol.
The rest of the below describes how table-driven semantic actions work
and gives a list of the format specifiers. The default() and engine()
methods implement most of the below.
Step 1 determines a table (T) and a path to a
table key (K) from the node type (N) (other nodes are shown as O):
N N N&K
@@ -32,10 +52,12 @@
%% literal '%'
%p evaluate N setting precedence
* indicates an argument (A) required.
The '%' may optionally be followed by a number (C) in square brackets, which
makes the engine walk down to N[C] before evaluating the escape code.
"""
from __future__ import print_function
@@ -552,8 +574,8 @@ class Walker(GenericASTTraversal, object):
def print_docstring(self, indent, docstring):
quote = '"""'
self.write(indent)
# FIXME for Python3
if type(docstring) == unicode:
if not PYTHON3 and not isinstance(docstring, str):
# Must be unicode in Python2
self.write('u')
docstring = repr(docstring.expandtabs())[2:-1]
else:
@@ -915,7 +937,6 @@ class Walker(GenericASTTraversal, object):
p = self.prec
self.prec = 27
n = node[-1]
assert n == 'list_iter'
# find innerst node
while n == 'list_iter':
@@ -943,6 +964,7 @@ class Walker(GenericASTTraversal, object):
ast = self.build_ast(code._tokens, code._customize)
self.customize(code._customize)
ast = ast[0][0][0]
n = ast[iter_index]
assert n == 'comp_iter'
# find innerst node
@@ -1464,7 +1486,8 @@ class Walker(GenericASTTraversal, object):
if isLambda:
self.write(self.traverse(ast, isLambda=isLambda))
else:
self.print_(self.traverse(ast, isLambda=isLambda))
self.text = self.traverse(ast, isLambda=isLambda)
self.print_(self.text)
self.return_none = rn
def build_ast(self, tokens, customize, isLambda=0, noneInNames=False):
@@ -1505,7 +1528,11 @@ class Walker(GenericASTTraversal, object):
return ast
def walker(version, co, out=sys.stdout, showasm=False, showast=False):
def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False):
"""
disassembles and deparses a given code block 'co'
"""
assert inspect.iscode(co)
# store final output stream for case of error
__real_out = out or sys.stdout
@@ -1540,9 +1567,9 @@ def walker(version, co, out=sys.stdout, showasm=False, showast=False):
return walk
if __name__ == '__main__':
def walk_test(co):
def deparse_test(co):
sys_version = sys.version_info.major + (sys.version_info.minor / 10.0)
walker(sys_version, co, showasm=True, showast=True)
print()
deparsed = deparse_code(sys_version, co, showasm=False, showast=False)
print(deparsed.text)
return
walk_test(walk_test.__code__)
deparse_test(deparse_test.__code__)

View File

@@ -14,6 +14,7 @@ import uncompyle6
import uncompyle6.scanner as scanner
from uncompyle6 import PYTHON3
from uncompyle6.magics import PYTHON_MAGIC_INT
from uncompyle6.load import load_file, load_module
# FIXME: DRY
if PYTHON3:
@@ -348,12 +349,12 @@ class Token(scanner.Token):
def compare_code_with_srcfile(pyc_filename, src_filename):
"""Compare a .pyc with a source code file."""
version, magic_int, code_obj1 = uncompyle6.load_module(pyc_filename)
version, magic_int, code_obj1 = load_module(pyc_filename)
if magic_int != PYTHON_MAGIC_INT:
msg = ("Can't compare code - Python is running with magic %s, but code is magic %s "
% (PYTHON_MAGIC_INT, magic_int))
return msg
code_obj2 = uncompyle6._load_file(src_filename)
code_obj2 = load_file(src_filename)
cmp_code_objects(version, code_obj1, code_obj2)
return None