From 347219a00916ea927e15f5217d3885a5b2eb3331 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 18 Dec 2015 17:07:35 -0500 Subject: [PATCH] Python3 postional arguments. Clean up code more along the lines of uncompyle3. --- README.rst | 21 +--- test/bytecode_2.7/positional.pyc | Bin 0 -> 160 bytes test/bytecode_3.4/positional.pyc | Bin 0 -> 149 bytes .../call_arguments/positional.py | 4 + uncompyle6/disas.py | 2 +- uncompyle6/parser.py | 96 +----------------- uncompyle6/parsers/astnode.py | 31 ++++++ uncompyle6/parsers/parse2.py | 64 +++++++++++- uncompyle6/parsers/parse3.py | 32 +++++- uncompyle6/scanners/scanner27.py | 1 - uncompyle6/walker.py | 8 +- 11 files changed, 143 insertions(+), 116 deletions(-) create mode 100644 test/bytecode_2.7/positional.pyc create mode 100644 test/bytecode_3.4/positional.pyc create mode 100644 uncompyle6/parsers/astnode.py diff --git a/README.rst b/README.rst index 1c16df61..085f54b4 100644 --- a/README.rst +++ b/README.rst @@ -1,17 +1,14 @@ uncompyle6 ========== -A Python 2.x and possibly 3.x byte-code decompiler. - -This is written in Python 2.7 but is Python3 compatible. - +A Python 2.x and 3.x byte-code decompiler. Introduction ------------ *uncompyle6* converts Python byte-code back into equivalent Python source code. It accepts byte-codes from Python version 2.5 to 2.7. -It runs on Python 2.7 and with a little more work Python 3. +It runs on Python 2.6 and 2.7 and Python 3.4 The generated source is fairly readable: docstrings, lists, tuples and hashes are somewhat pretty-printed. @@ -51,17 +48,6 @@ Features - output may be written to file, a directory or to stdout - option for including byte-code disassembly into generated source -Requirements ------------- - -The code runs on Python 2.7. It is compatable with Python3, -and I've run some tests there, but more work is needed to make that -more solid. - -Work to support decompyling Python 3 bytecodes and magics is -still needed. - - Installation ------------ @@ -79,6 +65,9 @@ sudo) will do the steps above. Testing ------- + make check-2.7 # if running on Python 2.7 + make check-3.4 # if running on Pyton 3.4 + Testing right now is largely via utility `test/test_pythonlib.py`. A GNU makefile has been added to smooth over setting running the right command. If you have remake_ installed, you can see the list of all diff --git a/test/bytecode_2.7/positional.pyc b/test/bytecode_2.7/positional.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70a08e859177da45e4a945d5cb475242bd92826d GIT binary patch literal 160 zcmZSn%*%Bmu_P>+0ScIbv;zRUNi8nXFV4&@$Vt^L&Mz%WPSsCN%*lyQEJ`oUP0cGQ)-T8} e&Me8y&r8hFE2zB1VUwGmQks)$#|W|xh#3Gnj3sIS literal 0 HcmV?d00001 diff --git a/test/simple-source/call_arguments/positional.py b/test/simple-source/call_arguments/positional.py index c44bb93e..8cc6eaba 100644 --- a/test/simple-source/call_arguments/positional.py +++ b/test/simple-source/call_arguments/positional.py @@ -1 +1,5 @@ +# Tests custom added grammar rule: +# expr ::= expr {expr}^n CALL_FUNCTION_n +# which in the specifc case below is: +# expr ::= expr expr expr CALL_FUNCTION_2 a(b, c) diff --git a/uncompyle6/disas.py b/uncompyle6/disas.py index c966d8e2..d71eb2e6 100644 --- a/uncompyle6/disas.py +++ b/uncompyle6/disas.py @@ -38,7 +38,7 @@ def check_object_path(path): pass pass basename = os.path.basename(path)[0:-3] - spath = path if PYTHON3 else path.decude('utf-8') + spath = path if PYTHON3 else path.decode('utf-8') path = tempfile.mkstemp(prefix=basename + '-', suffix='.pyc', text=False)[1] py_compile.compile(spath, cfile=path) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index adb002a7..36c4bbe0 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -8,18 +8,10 @@ Common spark parser routines Python. from __future__ import print_function -from uncompyle6 import PYTHON3 - import sys from uncompyle6.parsers.spark import GenericASTBuilder -if PYTHON3: - intern = sys.intern - from collections import UserList -else: - from UserList import UserList - class ParserError(Exception): def __init__(self, token, offset): self.token = token @@ -29,27 +21,7 @@ class ParserError(Exception): return "Syntax error at or near `%r' token at offset %s\n" % \ (self.token, self.offset) -class AST(UserList): - def __init__(self, type, kids=[]): - self.type = intern(type) - UserList.__init__(self, kids) - - def __getslice__(self, low, high): return self.data[low:high] - - def __eq__(self, o): - if isinstance(o, AST): - return self.type == o.type \ - and UserList.__eq__(self, o) - else: - return self.type == o - - def __hash__(self): return hash(self.type) - - def __repr__(self, indent=''): - rv = str(self.type) - for k in self: - rv = rv + '\n' + str(k).replace('\n', '\n ') - return rv +nop_func = lambda self, args: None class PythonParser(GenericASTBuilder): @@ -97,71 +69,9 @@ class PythonParser(GenericASTBuilder): return GenericASTBuilder.resolve(self, list) def parse(p, tokens, customize): - ''' - Special handling for opcodes that take a variable number - of arguments -- we add a new rule for each: - - expr ::= {expr}^n BUILD_LIST_n - expr ::= {expr}^n BUILD_TUPLE_n - unpack_list ::= UNPACK_LIST {expr}^n - unpack ::= UNPACK_TUPLE {expr}^n - unpack ::= UNPACK_SEQEUENE {expr}^n - mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n - mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n - expr ::= expr {expr}^n CALL_FUNCTION_n - expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP - expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP - expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP - ''' - nop = lambda self, args: None - - for k, v in list(customize.items()): - # avoid adding the same rule twice to this parser - if k in p.customized: - continue - p.customized[k] = None - - # nop = lambda self, args: None - - op = k[:k.rfind('_')] - if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'): - rule = 'build_list ::= ' + 'expr '*v + k - elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): - rule = 'unpack ::= ' + k + ' designator'*v - elif op == 'UNPACK_LIST': - rule = 'unpack_list ::= ' + k + ' designator'*v - elif op in ('DUP_TOPX', 'RAISE_VARARGS'): - # no need to add a rule - continue - # rule = 'dup_topx ::= ' + 'expr '*v + k - elif op == 'MAKE_FUNCTION': - p.addRule('mklambda ::= %s LOAD_LAMBDA %s' % - ('expr '*v, k), nop) - rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k) - elif op == 'MAKE_CLOSURE': - p.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' % - ('expr '*v, k), nop) - p.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' % - ('expr '*v, k), nop) - p.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' % - ('expr '*v, k), nop) - p.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' % - ('expr '*v, k), nop) - rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k) -# rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k) - elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', - 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): - na = (v & 0xff) # positional parameters - nk = (v >> 8) & 0xff # keyword parameters - # number of apply equiv arguments: - nak = ( len(op)-len('CALL_FUNCTION') ) // 3 - rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \ - + 'expr ' * nak + k - else: - raise Exception('unknown customize token %s' % k) - p.addRule(rule, nop) + p.add_custom_rules(tokens, customize) ast = p.parse(tokens) -# p.cleanup() + # p.cleanup() return ast diff --git a/uncompyle6/parsers/astnode.py b/uncompyle6/parsers/astnode.py new file mode 100644 index 00000000..229aef10 --- /dev/null +++ b/uncompyle6/parsers/astnode.py @@ -0,0 +1,31 @@ +import sys +from uncompyle6 import PYTHON3 + +if PYTHON3: + intern = sys.intern + from collections import UserList +else: + from UserList import UserList + + +class AST(UserList): + def __init__(self, type, kids=[]): + self.type = intern(type) + UserList.__init__(self, kids) + + def __getslice__(self, low, high): return self.data[low:high] + + def __eq__(self, o): + if isinstance(o, AST): + return self.type == o.type \ + and UserList.__eq__(self, o) + else: + return self.type == o + + def __hash__(self): return hash(self.type) + + def __repr__(self, indent=''): + rv = str(self.type) + for k in self: + rv = rv + '\n' + str(k).replace('\n', '\n ') + return rv diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index 8922aa5e..0ee5cf8d 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -17,7 +17,8 @@ that a later phase can tern into a sequence of ASCII text. from __future__ import print_function -from uncompyle6.parser import PythonParser, AST +from uncompyle6.parser import PythonParser, nop_func +from uncompyle6.parsers.astnode import AST from uncompyle6.parsers.spark import GenericASTBuilder class Python2Parser(PythonParser): @@ -643,3 +644,64 @@ class Python2Parser(PythonParser): nullexprlist ::= ''' + + def add_custom_rules(self, tokens, customize): + """ + Special handling for opcodes that take a variable number + of arguments -- we add a new rule for each: + + expr ::= {expr}^n BUILD_LIST_n + expr ::= {expr}^n BUILD_TUPLE_n + unpack_list ::= UNPACK_LIST {expr}^n + unpack ::= UNPACK_TUPLE {expr}^n + unpack ::= UNPACK_SEQEUENE {expr}^n + mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n + mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n + expr ::= expr {expr}^n CALL_FUNCTION_n + expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP + expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP + expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP + """ + for k, v in list(customize.items()): + # avoid adding the same rule twice to this parser + if k in self.customized: + continue + self.customized[k] = None + + op = k[:k.rfind('_')] + if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'): + rule = 'build_list ::= ' + 'expr '*v + k + elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): + rule = 'unpack ::= ' + k + ' designator'*v + elif op == 'UNPACK_LIST': + rule = 'unpack_list ::= ' + k + ' designator'*v + elif op in ('DUP_TOPX', 'RAISE_VARARGS'): + # no need to add a rule + continue + # rule = 'dup_topx ::= ' + 'expr '*v + k + elif op == 'MAKE_FUNCTION': + self.addRule('mklambda ::= %s LOAD_LAMBDA %s' % + ('expr '*v, k), nop_func) + rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k) + elif op == 'MAKE_CLOSURE': + self.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' % + ('expr '*v, k), nop_func) + self.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' % + ('expr '*v, k), nop_func) + self.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' % + ('expr '*v, k), nop_func) + self.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' % + ('expr '*v, k), nop_func) + rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k) + # rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k) + elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', + 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): + na = (v & 0xff) # positional parameters + nk = (v >> 8) & 0xff # keyword parameters + # number of apply equiv arguments: + nak = ( len(op)-len('CALL_FUNCTION') ) // 3 + rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \ + + 'expr ' * nak + k + else: + raise Exception('unknown customize token %s' % k) + self.addRule(rule, nop_func) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 9b26ab21..b3b61d0d 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -17,12 +17,14 @@ that a later phase can tern into a sequence of ASCII text. from __future__ import print_function -from uncompyle6.parser import PythonParser, AST +from uncompyle6.parser import PythonParser, nop_func +from uncompyle6.parsers.astnode import AST from uncompyle6.parsers.spark import GenericASTBuilder class Python3Parser(PythonParser): def __init__(self): + self.added_rules = set() GenericASTBuilder.__init__(self, AST, 'stmts') self.customized = {} @@ -643,3 +645,31 @@ class Python3Parser(PythonParser): nullexprlist ::= ''' + + def add_custom_rules(self, tokens, customize): + new_rules = set() + for token in tokens: + if token.type != 'CALL_FUNCTION': + continue + # Low byte indicates number of positional paramters, + # high byte number of positional parameters + args_pos = token.attr & 0xff + args_kw = (token.attr >> 8) & 0xff + pos_args_line = '' if args_pos == 0 else ' {}'.format(' '.join('expr' for _ in range(args_pos))) + kw_args_line = '' if args_kw == 0 else ' {}'.format(' '.join('kwarg' for _ in range(args_kw))) + if args_kw == 0: + token.type = 'CALL_FUNCTION_%i' % (args_pos) + rule = ('call_function ::= expr%s%s %s' % + (pos_args_line, kw_args_line, token.type)) + # Make sure we do not add the same rule twice + if rule not in new_rules: + new_rules.add(rule) + self.addRule(rule, nop_func) + customize[token.type] = args_pos + pass + else: + assert False, "Can't handle kw args yet" + new_rules.difference_update(self.added_rules) + for rule in new_rules: + self.addRule(rule, nop_func) + self.added_rules.update(new_rules) diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index b0fde918..bcdab907 100755 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -209,7 +209,6 @@ class Scanner27(scan.Scanner): rv.append(Token(op_name, oparg, pattr, offset, linestart)) else: rv.append(Token(replace[offset], oparg, pattr, offset, linestart)) - return rv, customize def op_size(self, op): diff --git a/uncompyle6/walker.py b/uncompyle6/walker.py index f5b194d3..90d39bb4 100644 --- a/uncompyle6/walker.py +++ b/uncompyle6/walker.py @@ -43,7 +43,8 @@ from __future__ import print_function import inspect, sys, re from uncompyle6 import PYTHON3 -from uncompyle6.parser import AST, get_python_parser +from uncompyle6.parser import get_python_parser +from uncompyle6.parsers.astnode import AST from uncompyle6.parsers.spark import GenericASTTraversal import uncompyle6.parser as python_parser from uncompyle6.scanner import Token, Code, get_scanner @@ -115,7 +116,7 @@ TABLE_R = { TABLE_R0 = { # 'BUILD_LIST': ( '[%C]', (0,-1,', ') ), # 'BUILD_TUPLE': ( '(%C)', (0,-1,', ') ), -# 'CALL_FUNCTION': ( '%c(%C)', 0, (1,-1,', ') ), +# 'CALL_FUNCTION': ( '%c(%P)', 0, (1,-1,', ') ), } TABLE_DIRECT = { 'BINARY_ADD': ( '+' ,), @@ -1186,7 +1187,8 @@ class Walker(GenericASTTraversal, object): if k in TABLE_R: continue op = k[ :k.rfind('_') ] - if op == 'CALL_FUNCTION': TABLE_R[k] = ('%c(%P)', 0, (1, -1, ', ', 100)) + if op == 'CALL_FUNCTION': + TABLE_R[k] = ('%c(%P)', 0, (1, -1, ', ', 100)) elif op in ('CALL_FUNCTION_VAR', 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): if v == 0: