Python3 postional arguments. Clean up code more along the lines of uncompyle3.

This commit is contained in:
rocky
2015-12-18 17:07:35 -05:00
parent e5f58e8ecb
commit 347219a009
11 changed files with 143 additions and 116 deletions

View File

@@ -1,17 +1,14 @@
uncompyle6 uncompyle6
========== ==========
A Python 2.x and possibly 3.x byte-code decompiler. A Python 2.x and 3.x byte-code decompiler.
This is written in Python 2.7 but is Python3 compatible.
Introduction Introduction
------------ ------------
*uncompyle6* converts Python byte-code back into equivalent Python *uncompyle6* converts Python byte-code back into equivalent Python
source code. It accepts byte-codes from Python version 2.5 to 2.7. source code. It accepts byte-codes from Python version 2.5 to 2.7.
It runs on Python 2.7 and with a little more work Python 3. It runs on Python 2.6 and 2.7 and Python 3.4
The generated source is fairly readable: docstrings, lists, tuples and The generated source is fairly readable: docstrings, lists, tuples and
hashes are somewhat pretty-printed. hashes are somewhat pretty-printed.
@@ -51,17 +48,6 @@ Features
- output may be written to file, a directory or to stdout - output may be written to file, a directory or to stdout
- option for including byte-code disassembly into generated source - option for including byte-code disassembly into generated source
Requirements
------------
The code runs on Python 2.7. It is compatable with Python3,
and I've run some tests there, but more work is needed to make that
more solid.
Work to support decompyling Python 3 bytecodes and magics is
still needed.
Installation Installation
------------ ------------
@@ -79,6 +65,9 @@ sudo) will do the steps above.
Testing Testing
------- -------
make check-2.7 # if running on Python 2.7
make check-3.4 # if running on Pyton 3.4
Testing right now is largely via utility `test/test_pythonlib.py`. A Testing right now is largely via utility `test/test_pythonlib.py`. A
GNU makefile has been added to smooth over setting running the right GNU makefile has been added to smooth over setting running the right
command. If you have remake_ installed, you can see the list of all command. If you have remake_ installed, you can see the list of all

Binary file not shown.

Binary file not shown.

View File

@@ -1 +1,5 @@
# Tests custom added grammar rule:
# expr ::= expr {expr}^n CALL_FUNCTION_n
# which in the specifc case below is:
# expr ::= expr expr expr CALL_FUNCTION_2
a(b, c) a(b, c)

View File

@@ -38,7 +38,7 @@ def check_object_path(path):
pass pass
pass pass
basename = os.path.basename(path)[0:-3] basename = os.path.basename(path)[0:-3]
spath = path if PYTHON3 else path.decude('utf-8') spath = path if PYTHON3 else path.decode('utf-8')
path = tempfile.mkstemp(prefix=basename + '-', path = tempfile.mkstemp(prefix=basename + '-',
suffix='.pyc', text=False)[1] suffix='.pyc', text=False)[1]
py_compile.compile(spath, cfile=path) py_compile.compile(spath, cfile=path)

View File

@@ -8,18 +8,10 @@ Common spark parser routines Python.
from __future__ import print_function from __future__ import print_function
from uncompyle6 import PYTHON3
import sys import sys
from uncompyle6.parsers.spark import GenericASTBuilder from uncompyle6.parsers.spark import GenericASTBuilder
if PYTHON3:
intern = sys.intern
from collections import UserList
else:
from UserList import UserList
class ParserError(Exception): class ParserError(Exception):
def __init__(self, token, offset): def __init__(self, token, offset):
self.token = token self.token = token
@@ -29,27 +21,7 @@ class ParserError(Exception):
return "Syntax error at or near `%r' token at offset %s\n" % \ return "Syntax error at or near `%r' token at offset %s\n" % \
(self.token, self.offset) (self.token, self.offset)
class AST(UserList): nop_func = lambda self, args: None
def __init__(self, type, kids=[]):
self.type = intern(type)
UserList.__init__(self, kids)
def __getslice__(self, low, high): return self.data[low:high]
def __eq__(self, o):
if isinstance(o, AST):
return self.type == o.type \
and UserList.__eq__(self, o)
else:
return self.type == o
def __hash__(self): return hash(self.type)
def __repr__(self, indent=''):
rv = str(self.type)
for k in self:
rv = rv + '\n' + str(k).replace('\n', '\n ')
return rv
class PythonParser(GenericASTBuilder): class PythonParser(GenericASTBuilder):
@@ -97,71 +69,9 @@ class PythonParser(GenericASTBuilder):
return GenericASTBuilder.resolve(self, list) return GenericASTBuilder.resolve(self, list)
def parse(p, tokens, customize): def parse(p, tokens, customize):
''' p.add_custom_rules(tokens, customize)
Special handling for opcodes that take a variable number
of arguments -- we add a new rule for each:
expr ::= {expr}^n BUILD_LIST_n
expr ::= {expr}^n BUILD_TUPLE_n
unpack_list ::= UNPACK_LIST {expr}^n
unpack ::= UNPACK_TUPLE {expr}^n
unpack ::= UNPACK_SEQEUENE {expr}^n
mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
'''
nop = lambda self, args: None
for k, v in list(customize.items()):
# avoid adding the same rule twice to this parser
if k in p.customized:
continue
p.customized[k] = None
# nop = lambda self, args: None
op = k[:k.rfind('_')]
if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'):
rule = 'build_list ::= ' + 'expr '*v + k
elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
rule = 'unpack ::= ' + k + ' designator'*v
elif op == 'UNPACK_LIST':
rule = 'unpack_list ::= ' + k + ' designator'*v
elif op in ('DUP_TOPX', 'RAISE_VARARGS'):
# no need to add a rule
continue
# rule = 'dup_topx ::= ' + 'expr '*v + k
elif op == 'MAKE_FUNCTION':
p.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
('expr '*v, k), nop)
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
elif op == 'MAKE_CLOSURE':
p.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' %
('expr '*v, k), nop)
p.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
p.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
p.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k)
# rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k)
elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
na = (v & 0xff) # positional parameters
nk = (v >> 8) & 0xff # keyword parameters
# number of apply equiv arguments:
nak = ( len(op)-len('CALL_FUNCTION') ) // 3
rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \
+ 'expr ' * nak + k
else:
raise Exception('unknown customize token %s' % k)
p.addRule(rule, nop)
ast = p.parse(tokens) ast = p.parse(tokens)
# p.cleanup() # p.cleanup()
return ast return ast

View File

@@ -0,0 +1,31 @@
import sys
from uncompyle6 import PYTHON3
if PYTHON3:
intern = sys.intern
from collections import UserList
else:
from UserList import UserList
class AST(UserList):
def __init__(self, type, kids=[]):
self.type = intern(type)
UserList.__init__(self, kids)
def __getslice__(self, low, high): return self.data[low:high]
def __eq__(self, o):
if isinstance(o, AST):
return self.type == o.type \
and UserList.__eq__(self, o)
else:
return self.type == o
def __hash__(self): return hash(self.type)
def __repr__(self, indent=''):
rv = str(self.type)
for k in self:
rv = rv + '\n' + str(k).replace('\n', '\n ')
return rv

View File

@@ -17,7 +17,8 @@ that a later phase can tern into a sequence of ASCII text.
from __future__ import print_function from __future__ import print_function
from uncompyle6.parser import PythonParser, AST from uncompyle6.parser import PythonParser, nop_func
from uncompyle6.parsers.astnode import AST
from uncompyle6.parsers.spark import GenericASTBuilder from uncompyle6.parsers.spark import GenericASTBuilder
class Python2Parser(PythonParser): class Python2Parser(PythonParser):
@@ -643,3 +644,64 @@ class Python2Parser(PythonParser):
nullexprlist ::= nullexprlist ::=
''' '''
def add_custom_rules(self, tokens, customize):
"""
Special handling for opcodes that take a variable number
of arguments -- we add a new rule for each:
expr ::= {expr}^n BUILD_LIST_n
expr ::= {expr}^n BUILD_TUPLE_n
unpack_list ::= UNPACK_LIST {expr}^n
unpack ::= UNPACK_TUPLE {expr}^n
unpack ::= UNPACK_SEQEUENE {expr}^n
mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
"""
for k, v in list(customize.items()):
# avoid adding the same rule twice to this parser
if k in self.customized:
continue
self.customized[k] = None
op = k[:k.rfind('_')]
if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'):
rule = 'build_list ::= ' + 'expr '*v + k
elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
rule = 'unpack ::= ' + k + ' designator'*v
elif op == 'UNPACK_LIST':
rule = 'unpack_list ::= ' + k + ' designator'*v
elif op in ('DUP_TOPX', 'RAISE_VARARGS'):
# no need to add a rule
continue
# rule = 'dup_topx ::= ' + 'expr '*v + k
elif op == 'MAKE_FUNCTION':
self.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
('expr '*v, k), nop_func)
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
elif op == 'MAKE_CLOSURE':
self.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' %
('expr '*v, k), nop_func)
self.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop_func)
self.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop_func)
self.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop_func)
rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k)
# rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k)
elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
na = (v & 0xff) # positional parameters
nk = (v >> 8) & 0xff # keyword parameters
# number of apply equiv arguments:
nak = ( len(op)-len('CALL_FUNCTION') ) // 3
rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \
+ 'expr ' * nak + k
else:
raise Exception('unknown customize token %s' % k)
self.addRule(rule, nop_func)

View File

@@ -17,12 +17,14 @@ that a later phase can tern into a sequence of ASCII text.
from __future__ import print_function from __future__ import print_function
from uncompyle6.parser import PythonParser, AST from uncompyle6.parser import PythonParser, nop_func
from uncompyle6.parsers.astnode import AST
from uncompyle6.parsers.spark import GenericASTBuilder from uncompyle6.parsers.spark import GenericASTBuilder
class Python3Parser(PythonParser): class Python3Parser(PythonParser):
def __init__(self): def __init__(self):
self.added_rules = set()
GenericASTBuilder.__init__(self, AST, 'stmts') GenericASTBuilder.__init__(self, AST, 'stmts')
self.customized = {} self.customized = {}
@@ -643,3 +645,31 @@ class Python3Parser(PythonParser):
nullexprlist ::= nullexprlist ::=
''' '''
def add_custom_rules(self, tokens, customize):
new_rules = set()
for token in tokens:
if token.type != 'CALL_FUNCTION':
continue
# Low byte indicates number of positional paramters,
# high byte number of positional parameters
args_pos = token.attr & 0xff
args_kw = (token.attr >> 8) & 0xff
pos_args_line = '' if args_pos == 0 else ' {}'.format(' '.join('expr' for _ in range(args_pos)))
kw_args_line = '' if args_kw == 0 else ' {}'.format(' '.join('kwarg' for _ in range(args_kw)))
if args_kw == 0:
token.type = 'CALL_FUNCTION_%i' % (args_pos)
rule = ('call_function ::= expr%s%s %s' %
(pos_args_line, kw_args_line, token.type))
# Make sure we do not add the same rule twice
if rule not in new_rules:
new_rules.add(rule)
self.addRule(rule, nop_func)
customize[token.type] = args_pos
pass
else:
assert False, "Can't handle kw args yet"
new_rules.difference_update(self.added_rules)
for rule in new_rules:
self.addRule(rule, nop_func)
self.added_rules.update(new_rules)

View File

@@ -209,7 +209,6 @@ class Scanner27(scan.Scanner):
rv.append(Token(op_name, oparg, pattr, offset, linestart)) rv.append(Token(op_name, oparg, pattr, offset, linestart))
else: else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart)) rv.append(Token(replace[offset], oparg, pattr, offset, linestart))
return rv, customize return rv, customize
def op_size(self, op): def op_size(self, op):

View File

@@ -43,7 +43,8 @@ from __future__ import print_function
import inspect, sys, re import inspect, sys, re
from uncompyle6 import PYTHON3 from uncompyle6 import PYTHON3
from uncompyle6.parser import AST, get_python_parser from uncompyle6.parser import get_python_parser
from uncompyle6.parsers.astnode import AST
from uncompyle6.parsers.spark import GenericASTTraversal from uncompyle6.parsers.spark import GenericASTTraversal
import uncompyle6.parser as python_parser import uncompyle6.parser as python_parser
from uncompyle6.scanner import Token, Code, get_scanner from uncompyle6.scanner import Token, Code, get_scanner
@@ -115,7 +116,7 @@ TABLE_R = {
TABLE_R0 = { TABLE_R0 = {
# 'BUILD_LIST': ( '[%C]', (0,-1,', ') ), # 'BUILD_LIST': ( '[%C]', (0,-1,', ') ),
# 'BUILD_TUPLE': ( '(%C)', (0,-1,', ') ), # 'BUILD_TUPLE': ( '(%C)', (0,-1,', ') ),
# 'CALL_FUNCTION': ( '%c(%C)', 0, (1,-1,', ') ), # 'CALL_FUNCTION': ( '%c(%P)', 0, (1,-1,', ') ),
} }
TABLE_DIRECT = { TABLE_DIRECT = {
'BINARY_ADD': ( '+' ,), 'BINARY_ADD': ( '+' ,),
@@ -1186,7 +1187,8 @@ class Walker(GenericASTTraversal, object):
if k in TABLE_R: if k in TABLE_R:
continue continue
op = k[ :k.rfind('_') ] op = k[ :k.rfind('_') ]
if op == 'CALL_FUNCTION': TABLE_R[k] = ('%c(%P)', 0, (1, -1, ', ', 100)) if op == 'CALL_FUNCTION':
TABLE_R[k] = ('%c(%P)', 0, (1, -1, ', ', 100))
elif op in ('CALL_FUNCTION_VAR', elif op in ('CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
if v == 0: if v == 0: