Python3 postional arguments. Clean up code more along the lines of uncompyle3.

2025-08-03 00:45:53 +08:00 · 2015-12-18 17:07:35 -05:00
parent e5f58e8ecb
commit 347219a009
11 changed files with 143 additions and 116 deletions
--- a/README.rst
+++ b/README.rst
@@ -1,17 +1,14 @@
 uncompyle6
 ==========
-A Python 2.x and possibly 3.x byte-code decompiler.
+A Python 2.x and 3.x byte-code decompiler.
 This is written in Python 2.7 but is Python3 compatible.
 Introduction
 ------------
 *uncompyle6* converts Python byte-code back into equivalent Python
 source code. It accepts byte-codes from Python version 2.5 to 2.7.
-It runs on Python 2.7 and with a little more work Python 3.
+It runs on Python 2.6 and 2.7 and Python 3.4
 The generated source is fairly readable: docstrings, lists, tuples and
 hashes are somewhat pretty-printed.
@@ -51,17 +48,6 @@ Features
 - output may be written to file, a directory or to stdout
 - option for including byte-code disassembly into generated source
 Requirements
 ------------
 The code runs on Python 2.7. It is compatable with Python3,
 and I've run some tests there, but more work is needed to make that
 more solid.
 Work to support decompyling Python 3 bytecodes and magics is
 still needed.
 Installation
 ------------
@@ -79,6 +65,9 @@ sudo) will do the steps above.
 Testing
 -------
   make check-2.7 # if running on Python 2.7
   make check-3.4 # if running on Pyton 3.4
 Testing right now is largely via utility `test/test_pythonlib.py`.  A
 GNU makefile has been added to smooth over setting running the right
 command. If you have remake_ installed, you can see the list of all
--- a/test/bytecode_2.7/positional.pyc
+++ b/test/bytecode_2.7/positional.pyc
--- a/test/bytecode_3.4/positional.pyc
+++ b/test/bytecode_3.4/positional.pyc
--- a/test/simple-source/call_arguments/positional.py
+++ b/test/simple-source/call_arguments/positional.py
@@ -1 +1,5 @@
 # Tests custom added grammar rule:
 #   expr ::= expr {expr}^n CALL_FUNCTION_n
 # which in the specifc case below is:
 #   expr ::= expr expr expr CALL_FUNCTION_2
 a(b, c)
--- a/uncompyle6/disas.py
+++ b/uncompyle6/disas.py
@@ -38,7 +38,7 @@ def check_object_path(path):
                pass
            pass
        basename = os.path.basename(path)[0:-3]
-        spath = path if PYTHON3 else path.decude('utf-8')
+        spath = path if PYTHON3 else path.decode('utf-8')
        path = tempfile.mkstemp(prefix=basename + '-',
                                suffix='.pyc', text=False)[1]
        py_compile.compile(spath, cfile=path)
--- a/uncompyle6/parser.py
+++ b/uncompyle6/parser.py
@@ -8,18 +8,10 @@ Common spark parser routines Python.
 from __future__ import print_function
 from uncompyle6 import PYTHON3
 import sys
 from uncompyle6.parsers.spark import GenericASTBuilder
 if PYTHON3:
    intern = sys.intern
    from collections import UserList
 else:
    from UserList import UserList
 class ParserError(Exception):
    def __init__(self, token, offset):
        self.token = token
@@ -29,27 +21,7 @@ class ParserError(Exception):
        return "Syntax error at or near `%r' token at offset %s\n" % \
               (self.token, self.offset)
-class AST(UserList):
+nop_func = lambda self, args: None
    def __init__(self, type, kids=[]):
        self.type = intern(type)
        UserList.__init__(self, kids)
    def __getslice__(self, low, high):    return self.data[low:high]
    def __eq__(self, o):
        if isinstance(o, AST):
            return self.type == o.type \
                   and UserList.__eq__(self, o)
        else:
            return self.type == o
    def __hash__(self):            return hash(self.type)
    def __repr__(self, indent=''):
        rv = str(self.type)
        for k in self:
            rv = rv + '\n' + str(k).replace('\n', '\n   ')
        return rv
 class PythonParser(GenericASTBuilder):
@@ -97,71 +69,9 @@ class PythonParser(GenericASTBuilder):
        return GenericASTBuilder.resolve(self, list)
 def parse(p, tokens, customize):
-    '''
+    p.add_custom_rules(tokens, customize)
    Special handling for opcodes that take a variable number
    of arguments -- we add a new rule for each:
        expr ::= {expr}^n BUILD_LIST_n
        expr ::= {expr}^n BUILD_TUPLE_n
        unpack_list ::= UNPACK_LIST {expr}^n
        unpack ::= UNPACK_TUPLE {expr}^n
        unpack ::= UNPACK_SEQEUENE {expr}^n
        mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
        mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
        expr ::= expr {expr}^n CALL_FUNCTION_n
        expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
        expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
        expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
    '''
    nop = lambda self, args: None
    for k, v in list(customize.items()):
        # avoid adding the same rule twice to this parser
        if k in p.customized:
            continue
        p.customized[k] = None
        # nop = lambda self, args: None
        op = k[:k.rfind('_')]
        if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'):
            rule = 'build_list ::= ' + 'expr '*v + k
        elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
            rule = 'unpack ::= ' + k + ' designator'*v
        elif op == 'UNPACK_LIST':
            rule = 'unpack_list ::= ' + k + ' designator'*v
        elif op in ('DUP_TOPX', 'RAISE_VARARGS'):
            # no need to add a rule
            continue
            # rule = 'dup_topx ::= ' + 'expr '*v + k
        elif op == 'MAKE_FUNCTION':
            p.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
                  ('expr '*v, k), nop)
            rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
        elif op == 'MAKE_CLOSURE':
            p.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' %
                  ('expr '*v, k), nop)
            p.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' %
                  ('expr '*v, k), nop)
            p.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' %
                  ('expr '*v, k), nop)
            p.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' %
                  ('expr '*v, k), nop)
            rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k)
 #            rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k)
        elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
                'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
            na = (v & 0xff)           # positional parameters
            nk = (v >> 8) & 0xff      # keyword parameters
            # number of apply equiv arguments:
            nak = ( len(op)-len('CALL_FUNCTION') ) // 3
            rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \
                   + 'expr ' * nak + k
        else:
            raise Exception('unknown customize token %s' % k)
        p.addRule(rule, nop)
    ast = p.parse(tokens)
-#   p.cleanup()
+    #  p.cleanup()
    return ast
--- a/uncompyle6/parsers/astnode.py
+++ b/uncompyle6/parsers/astnode.py
@@ -0,0 +1,31 @@
 import sys
 from uncompyle6 import PYTHON3
 if PYTHON3:
    intern = sys.intern
    from collections import UserList
 else:
    from UserList import UserList
 class AST(UserList):
    def __init__(self, type, kids=[]):
        self.type = intern(type)
        UserList.__init__(self, kids)
    def __getslice__(self, low, high):    return self.data[low:high]
    def __eq__(self, o):
        if isinstance(o, AST):
            return self.type == o.type \
                   and UserList.__eq__(self, o)
        else:
            return self.type == o
    def __hash__(self):            return hash(self.type)
    def __repr__(self, indent=''):
        rv = str(self.type)
        for k in self:
            rv = rv + '\n' + str(k).replace('\n', '\n   ')
        return rv
--- a/uncompyle6/parsers/parse2.py
+++ b/uncompyle6/parsers/parse2.py
@@ -17,7 +17,8 @@ that a later phase can tern into a sequence of ASCII text.
 from __future__ import print_function
-from uncompyle6.parser import PythonParser, AST
+from uncompyle6.parser import PythonParser, nop_func
 from uncompyle6.parsers.astnode import AST
 from uncompyle6.parsers.spark import GenericASTBuilder
 class Python2Parser(PythonParser):
@@ -643,3 +644,64 @@ class Python2Parser(PythonParser):
        nullexprlist ::=
        '''
    def add_custom_rules(self, tokens, customize):
        """
        Special handling for opcodes that take a variable number
        of arguments -- we add a new rule for each:
            expr ::= {expr}^n BUILD_LIST_n
            expr ::= {expr}^n BUILD_TUPLE_n
            unpack_list ::= UNPACK_LIST {expr}^n
            unpack ::= UNPACK_TUPLE {expr}^n
            unpack ::= UNPACK_SEQEUENE {expr}^n
            mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
            mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
            expr ::= expr {expr}^n CALL_FUNCTION_n
            expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
            expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
            expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
        """
        for k, v in list(customize.items()):
            # avoid adding the same rule twice to this parser
            if k in self.customized:
                continue
            self.customized[k] = None
            op = k[:k.rfind('_')]
            if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'):
                rule = 'build_list ::= ' + 'expr '*v + k
            elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
                rule = 'unpack ::= ' + k + ' designator'*v
            elif op == 'UNPACK_LIST':
                rule = 'unpack_list ::= ' + k + ' designator'*v
            elif op in ('DUP_TOPX', 'RAISE_VARARGS'):
                # no need to add a rule
                continue
                # rule = 'dup_topx ::= ' + 'expr '*v + k
            elif op == 'MAKE_FUNCTION':
                self.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
                      ('expr '*v, k), nop_func)
                rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
            elif op == 'MAKE_CLOSURE':
                self.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' %
                      ('expr '*v, k), nop_func)
                self.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' %
                      ('expr '*v, k), nop_func)
                self.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' %
                      ('expr '*v, k), nop_func)
                self.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' %
                      ('expr '*v, k), nop_func)
                rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k)
                # rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k)
            elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
                    'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
                na = (v & 0xff)           # positional parameters
                nk = (v >> 8) & 0xff      # keyword parameters
                # number of apply equiv arguments:
                nak = ( len(op)-len('CALL_FUNCTION') ) // 3
                rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \
                       + 'expr ' * nak + k
            else:
                raise Exception('unknown customize token %s' % k)
            self.addRule(rule, nop_func)
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -17,12 +17,14 @@ that a later phase can tern into a sequence of ASCII text.
 from __future__ import print_function
-from uncompyle6.parser import PythonParser, AST
+from uncompyle6.parser import PythonParser, nop_func
 from uncompyle6.parsers.astnode import AST
 from uncompyle6.parsers.spark import GenericASTBuilder
 class Python3Parser(PythonParser):
    def __init__(self):
        self.added_rules = set()
        GenericASTBuilder.__init__(self, AST, 'stmts')
        self.customized = {}
@@ -643,3 +645,31 @@ class Python3Parser(PythonParser):
        nullexprlist ::=
        '''
    def add_custom_rules(self, tokens, customize):
        new_rules = set()
        for token in tokens:
            if token.type != 'CALL_FUNCTION':
                continue
            # Low byte indicates number of positional paramters,
            # high byte number of positional parameters
            args_pos = token.attr & 0xff
            args_kw = (token.attr >> 8) & 0xff
            pos_args_line = '' if args_pos == 0 else ' {}'.format(' '.join('expr' for _ in range(args_pos)))
            kw_args_line = '' if args_kw == 0 else ' {}'.format(' '.join('kwarg' for _ in range(args_kw)))
            if args_kw == 0:
                token.type = 'CALL_FUNCTION_%i' % (args_pos)
                rule = ('call_function ::= expr%s%s %s' %
                        (pos_args_line, kw_args_line, token.type))
                # Make sure we do not add the same rule twice
                if rule not in new_rules:
                    new_rules.add(rule)
                    self.addRule(rule, nop_func)
                    customize[token.type] = args_pos
                    pass
            else:
                assert False, "Can't handle kw args yet"
        new_rules.difference_update(self.added_rules)
        for rule in new_rules:
            self.addRule(rule, nop_func)
        self.added_rules.update(new_rules)
--- a/uncompyle6/scanners/scanner27.py
+++ b/uncompyle6/scanners/scanner27.py
@@ -209,7 +209,6 @@ class Scanner27(scan.Scanner):
                rv.append(Token(op_name, oparg, pattr, offset, linestart))
            else:
                rv.append(Token(replace[offset], oparg, pattr, offset, linestart))
        return rv, customize
    def op_size(self, op):
--- a/uncompyle6/walker.py
+++ b/uncompyle6/walker.py
@@ -43,7 +43,8 @@ from __future__ import print_function
 import inspect, sys, re
 from uncompyle6 import PYTHON3
-from uncompyle6.parser import AST, get_python_parser
+from uncompyle6.parser import get_python_parser
 from uncompyle6.parsers.astnode import AST
 from uncompyle6.parsers.spark import GenericASTTraversal
 import uncompyle6.parser as python_parser
 from uncompyle6.scanner import Token, Code, get_scanner
@@ -115,7 +116,7 @@ TABLE_R = {
 TABLE_R0 = {
 #    'BUILD_LIST':	( '[%C]',      (0,-1,', ') ),
 #    'BUILD_TUPLE':	( '(%C)',      (0,-1,', ') ),
-#    'CALL_FUNCTION':	( '%c(%C)', 0, (1,-1,', ') ),
+#    'CALL_FUNCTION':	( '%c(%P)', 0, (1,-1,', ') ),
 }
 TABLE_DIRECT = {
    'BINARY_ADD':	( '+' ,),
@@ -1186,7 +1187,8 @@ class Walker(GenericASTTraversal, object):
            if k in TABLE_R:
                continue
            op = k[ :k.rfind('_') ]
-            if op == 'CALL_FUNCTION':	TABLE_R[k] = ('%c(%P)', 0, (1, -1, ', ', 100))
+            if op == 'CALL_FUNCTION':
                TABLE_R[k] = ('%c(%P)', 0, (1, -1, ', ', 100))
            elif op in ('CALL_FUNCTION_VAR',
                        'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
                if v == 0: