From a7455a3801cdfccdf2ccc1900251d2fab0e6168a Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 14 Dec 2015 16:18:56 -0500 Subject: [PATCH] Start to move deparser from python-deparse here. Start Python 3.2 tolerance --- uncompyle6/__init__.py | 7 +- uncompyle6/deparser.py | 1238 ++++++++++++++++++++++++++++++ uncompyle6/disasm/__init__.py | 0 uncompyle6/disasm/dis_25.py | 225 ++++++ uncompyle6/opcodes/opcode_32.py | 180 +++++ uncompyle6/scanner.py | 2 + uncompyle6/scanners/scanner32.py | 485 ++++++++++++ uncompyle6/verify.py | 14 +- 8 files changed, 2145 insertions(+), 6 deletions(-) create mode 100644 uncompyle6/deparser.py create mode 100644 uncompyle6/disasm/__init__.py create mode 100644 uncompyle6/disasm/dis_25.py create mode 100644 uncompyle6/opcodes/opcode_32.py create mode 100644 uncompyle6/scanners/scanner32.py diff --git a/uncompyle6/__init__.py b/uncompyle6/__init__.py index d0a8818c..64da7eaa 100644 --- a/uncompyle6/__init__.py +++ b/uncompyle6/__init__.py @@ -74,9 +74,9 @@ def _load_module(filename): except KeyError: raise ImportError("Unknown magic number %s in %s" % (ord(magic[0])+256*ord(magic[1]), filename)) - if not (2.5 <= version <= 2.7) and not (version == 3.4): + if not (2.5 <= version <= 2.7) and not (3.2 <= version <= 3.4): raise ImportError("This is a Python %s file! Only " - "Python 2.5 to 2.7 and 3.4 files are supported." + "Python 2.5 to 2.7 and 3.2 to 3.4 files are supported." % version) # print version @@ -124,6 +124,9 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): elif version == 2.5: import uncompyle6.scanners.scanner25 as scan scanner = scan.Scanner25() + elif version == 3.2: + import uncompyle6.scanners.scanner32 as scan + scanner = scan.Scanner32() elif version == 3.4: import uncompyle6.scanners.scanner34 as scan scanner = scan.Scanner34() diff --git a/uncompyle6/deparser.py b/uncompyle6/deparser.py new file mode 100644 index 00000000..b37ff896 --- /dev/null +++ b/uncompyle6/deparser.py @@ -0,0 +1,1238 @@ +''' + Copyright (c) 1999 John Aycock + Copyright (c) 2000-2002 by hartmut Goebel + Copyright (c) 2005 by Dan Pascu + Copyright (c) 2015 by Rocky Bernstein + + See main module for license. + + + Decompilation (walking AST) + + All table-driven. (rocky: well, mostly. I need to add more format + specifiers for say duplicating info from one node to another.) + + Step 1 determines a table (T) and a path to a + table key (K) from the node type (N) (other nodes are shown as O): + + N N N&K + / | ... \ / | ... \ / | ... \ + O O O O O K O O O + | + K + + MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT) + + The default is a direct mapping. The key K is then extracted from the + subtree and used to find a table entry T[K], if any. The result is a + format string and arguments (a la printf()) for the formatting engine. + Escapes in the format string are: + + %c evaluate N[A] recursively* + %C evaluate N[A[0]]..N[A[1]-1] recursively, separate by A[2]* + %P same as %C but sets operator precedence + %, print ',' if last %C only printed one item (for tuples--unused) + %| tab to current indentation level + %+ increase current indentation level + %- decrease current indentation level + %{...} evaluate ... in context of N + %% literal '%' + %p evaluate N setting precedence + + + * indicates an argument (A) required. + + The '%' may optionally be followed by a number (C) in square brackets, which + makes the engine walk down to N[C] before evaluating the escape code. +''' + +from __future__ import print_function + +from uncompyle6 import walker +from uncompyle6.walker import escape, PRECEDENCE, minint +from uncompyle6.walker import AST, NONE, find_all_globals +from uncompyle6.walker import find_globals, find_none, INDENT_PER_LEVEL +from uncompyle6.walker import ParserError +from uncompyle6 import parser + +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +import sys, inspect, types, re + + +# FIXME: remove uncompyle dups +# from uncompyle6.walker import find_all_globals, find_globals, find_none +from uncompyle6.spark import GenericASTTraversal +from uncompyle6.spark import GenericASTTraversalPruningException +from types import CodeType + +try: + from uncompyle6.Scanner import Token, Code + older_uncompyle = True +except ImportError: + from uncompyle6.scanner import Token, Code + older_uncompyle = False + +from collections import namedtuple +NodeInfo = namedtuple("NodeInfo", "node start finish") +ExtractInfo = namedtuple("ExtractInfo", + "lineNo lineStartOffset markerLine selectedLine selectedText") + +class Traverser(walker.Walker, object): + stacked_params = ('f', 'indent', 'isLambda', '_globals') + + def __init__(self, scanner, showast=0): + GenericASTTraversal.__init__(self, ast=None) + self.scanner = scanner + params = {'f': StringIO(), 'indent': '', } + self.showast = showast + self.__params = params + self.__param_stack = [] + self.ERROR = None + self.prec = 100 + self.return_none = False + self.mod_globs = set() + self.currentclass = None + self.pending_newlines = 0 + + self.offsets = {} + self.last_finish = -1 + + f = property(lambda s: s.__params['f'], + lambda s, x: s.__params.__setitem__('f', x), + lambda s: s.__params.__delitem__('f'), + None) + + indent = property(lambda s: s.__params['indent'], + lambda s, x: s.__params.__setitem__('indent', x), + lambda s: s.__params.__delitem__('indent'), + None) + + isLambda = property(lambda s: s.__params['isLambda'], + lambda s, x: s.__params.__setitem__('isLambda', x), + lambda s: s.__params.__delitem__('isLambda'), + None) + + _globals = property(lambda s: s.__params['_globals'], + lambda s, x: s.__params.__setitem__('_globals', x), + lambda s: s.__params.__delitem__('_globals'), + None) + + def set_pos_info(self, node, start, finish): + if hasattr(node, 'offset'): + self.offsets[self.name, node.offset] = \ + NodeInfo(node = node, start = start, finish = finish) + + if hasattr(node, 'parent'): + assert node.parent != node + + node.start = start + node.finish = finish + self.last_finish = finish + + def preorder(self, node=None): + + if node is None: + node = self.ast + + start = len(self.f.getvalue()) + + try: + name = 'n_' + self.typestring(node) + if hasattr(self, name): + func = getattr(self, name) + func(node) + else: + self.default(node) + except GenericASTTraversalPruningException: + # All leaf nodes, those with the offset method among others + # seems to fit under this exception. If this is not true + # we would need to dupllicate the below code before the + # return outside of this block + self.set_pos_info(node, start, len(self.f.getvalue())) + # print self.f.getvalue()[start:] + return + + for kid in node: + self.preorder(kid) + + name = name + '_exit' + if hasattr(self, name): + func = getattr(self, name) + func(node) + self.set_pos_info(node, start, len(self.f.getvalue())) + + return + + def n_return_stmt(self, node): + start = len(self.f.getvalue()) + len(self.indent) + if self.__params['isLambda']: + self.preorder(node[0]) + if hasattr(node[-1], 'offset'): + self.set_pos_info(node[-1], start, + len(self.f.getvalue())) + self.prune() + else: + start = len(self.f.getvalue()) + len(self.indent) + self.write(self.indent, 'return') + if self.return_none or node != AST('return_stmt', [AST('ret_expr', [NONE]), Token('RETURN_VALUE')]): + self.write(' ') + self.last_finish = len(self.f.getvalue()) + self.preorder(node[0]) + if hasattr(node[-1], 'offset'): + self.set_pos_info(node[-1], start, len(self.f.getvalue())) + pass + pass + else: + for n in node: + self.set_pos_info(n, start, len(self.f.getvalue())) + pass + pass + self.set_pos_info(node, start, len(self.f.getvalue())) + self.print_() + self.prune() # stop recursing + + def n_return_if_stmt(self, node): + + start = len(self.f.getvalue()) + len(self.indent) + if self.__params['isLambda']: + node[0].parent = node + self.preorder(node[0]) + else: + start = len(self.f.getvalue()) + len(self.indent) + self.write(self.indent, 'return') + if self.return_none or node != AST('return_stmt', [AST('ret_expr', [NONE]), Token('RETURN_END_IF')]): + self.write(' ') + self.preorder(node[0]) + if hasattr(node[-1], 'offset'): + self.set_pos_info(node[-1], start, len(self.f.getvalue())) + self.print_() + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prune() # stop recursing + + def n_yield(self, node): + start = len(self.f.getvalue()) + self.write('yield') + if node != AST('yield', [NONE, Token('YIELD_VALUE')]): + self.write(' ') + node[0].parent = node + self.preorder(node[0]) + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prune() # stop recursing + + def n_buildslice3(self, node): + start = len(self.f.getvalue()) + p = self.prec + self.prec = 100 + if node[0] != NONE: + self.preorder(node[0]) + self.write(':') + if node[1] != NONE: + self.preorder(node[1]) + self.write(':') + if node[2] != NONE: + self.preorder(node[2]) + self.prec = p + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prune() # stop recursing + + def n_buildslice2(self, node): + start = len(self.f.getvalue()) + p = self.prec + self.prec = 100 + if node[0] != NONE: + node[0].parent = node + self.preorder(node[0]) + self.write(':') + if node[1] != NONE: + node[1].parent = node + self.preorder(node[1]) + self.prec = p + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prune() # stop recursing + + def n_expr(self, node): + start = len(self.f.getvalue()) + p = self.prec + if node[0].type.startswith('binary_expr'): + n = node[0][-1][0] + else: + n = node[0] + self.prec = PRECEDENCE.get(n, -2) + if n == 'LOAD_CONST' and repr(n.pattr)[0] == '-': + n.parent = node + self.set_pos_info(n, start, len(self.f.getvalue())) + self.prec = 6 + if p < self.prec: + self.write('(') + node[0].parent = node + self.last_finish = len(self.f.getvalue()) + self.preorder(node[0]) + self.write(')') + self.last_finish = len(self.f.getvalue()) + else: + node[0].parent = node + self.preorder(node[0]) + self.prec = p + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prune() + + def n_ret_expr(self, node): + start = len(self.f.getvalue()) + if len(node) == 1 and node[0] == 'expr': + node[0].parent = node + self.n_expr(node[0]) + else: + self.n_expr(node) + self.set_pos_info(node, start, len(self.f.getvalue())) + + def n_binary_expr(self, node): + start = len(self.f.getvalue()) + node[0].parent = node + self.last_finish = len(self.f.getvalue()) + self.preorder(node[0]) + self.write(' ') + node[-1].parent = node + self.preorder(node[-1]) + self.write(' ') + self.prec -= 1 + node[1].parent = node + self.preorder(node[1]) + self.prec += 1 + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prune() + + def n_LOAD_CONST(self, node): + start = len(self.f.getvalue()) + data = node.pattr; datatype = type(data) + if isinstance(datatype, int) and data == minint: + # convert to hex, since decimal representation + # would result in 'LOAD_CONST; UNARY_NEGATIVE' + # change:hG/2002-02-07: this was done for all negative integers + # todo: check whether this is necessary in Python 2.1 + self.write( hex(data) ) + elif datatype is type(Ellipsis): + self.write('...') + elif data is None: + # LOAD_CONST 'None' only occurs, when None is + # implicit eg. in 'return' w/o params + # pass + self.write('None') + else: + self.write(repr(data)) + self.set_pos_info(node, start, len(self.f.getvalue())) + # LOAD_CONST is a terminal, so stop processing/recursing early + self.prune() + + def n_exec_stmt(self, node): + """ + exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT + exec_stmt ::= expr exprlist EXEC_STMT + """ + start = len(self.f.getvalue()) + len(self.indent) + self.write(self.indent, 'exec ') + self.preorder(node[0]) + if node[1][0] != NONE: + sep = ' in ' + for subnode in node[1]: + self.write(sep); sep = ", " + self.preorder(subnode) + self.set_pos_info(node, start, len(self.f.getvalue())) + self.print_() + self.prune() # stop recursing + + def n_ifelsestmtr(self, node): + if len(node[2]) != 2: + self.default(node) + + if not (node[2][0][0][0] == 'ifstmt' and node[2][0][0][0][1][0] == 'return_if_stmts') \ + and not (node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_if_stmts'): + self.default(node) + return + + start = len(self.f.getvalue()) + len(self.indent) + self.write(self.indent, 'if ') + self.preorder(node[0]) + self.print_(':') + self.indentMore() + node[1].parent = node + self.preorder(node[1]) + self.indentLess() + + if_ret_at_end = False + if len(node[2][0]) >= 3: + node[2][0].parent = node + if node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_if_stmts': + if_ret_at_end = True + + past_else = False + prev_stmt_is_if_ret = True + for n in node[2][0]: + if (n[0] == 'ifstmt' and n[0][1][0] == 'return_if_stmts'): + if prev_stmt_is_if_ret: + n[0].type = 'elifstmt' + prev_stmt_is_if_ret = True + else: + prev_stmt_is_if_ret = False + if not past_else and not if_ret_at_end: + self.print_(self.indent, 'else:') + self.indentMore() + past_else = True + n.parent = node + self.preorder(n) + if not past_else or if_ret_at_end: + self.print_(self.indent, 'else:') + self.indentMore() + node[2][1].parent = node + self.preorder(node[2][1]) + self.set_pos_info(node, start, len(self.f.getvalue())) + self.indentLess() + self.prune() + + def n_elifelsestmtr(self, node): + if len(node[2]) != 2: + self.default(node) + + for n in node[2][0]: + if not (n[0] == 'ifstmt' and n[0][1][0] == 'return_if_stmts'): + self.default(node) + return + + start = len(self.f.getvalue() + self.indent) + self.write(self.indent, 'elif ') + node[0].parent = node + self.preorder(node[0]) + self.print_(':') + self.indentMore() + node[1].parent = node + self.preorder(node[1]) + self.indentLess() + + for n in node[2][0]: + n[0].type = 'elifstmt' + n.parent = node + self.preorder(n) + self.print_(self.indent, 'else:') + self.indentMore() + node[2][1].parent = node + self.preorder(node[2][1]) + self.indentLess() + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prune() + + def n_import_as(self, node): + start = len(self.f.getvalue()) + iname = node[0].pattr + assert node[-1][-1].type.startswith('STORE_') + sname = node[-1][-1].pattr # assume one of STORE_.... here + self.write(iname) + finish = len(self.f.getvalue()) + if iname == sname or iname.startswith(sname + '.'): + self.set_pos_info_recurse(node, start, finish) + else: + self.write(' as ') + sname_start = len(self.f.getvalue()) + self.write(sname) + finish = len(self.f.getvalue()) + for n in node[-1]: + self.set_pos_info_recurse(n, sname_start, finish) + self.set_pos_info(node, start, finish) + self.prune() # stop recursing + + def n_mkfunc(self, node): + start = len(self.f.getvalue()) + old_name = self.name + self.name = node[-2].attr.co_name # code.co_name + self.write(self.name) + self.indentMore() + self.make_function(node, isLambda=0) + self.name = old_name + self.set_pos_info(node, start, len(self.f.getvalue())) + if len(self.__param_stack) > 1: + self.write('\n\n') + else: + self.write('\n\n\n') + self.indentLess() + self.prune() # stop recursing + + def comprehension_walk(self, node, iter_index): + p = self.prec + self.prec = 27 + code = node[-5].attr + + assert isinstance(code, types.CodeType) + code = Code(code, self.scanner, self.currentclass) + # assert isinstance(code, Code) + + ast = self.build_ast_d(code._tokens, code._customize) + self.customize(code._customize) + ast = ast[0][0][0] + + n = ast[iter_index] + assert n == 'comp_iter' + # find innerst node + while n == 'comp_iter': + n = n[0] # recurse one step + if n == 'comp_for': n = n[3] + elif n == 'comp_if': n = n[2] + elif n == 'comp_ifnot': n = n[2] + assert n == 'comp_body', ast + + self.preorder(n[0]) + self.write(' for ') + start = len(self.f.getvalue()) + self.preorder(ast[iter_index-1]) + self.set_pos_info(node, start, len(self.f.getvalue())) + self.write(' in ') + start = len(self.f.getvalue()) + node[-3].parent = node + self.preorder(node[-3]) + self.set_pos_info(node, start, len(self.f.getvalue())) + self.preorder(ast[iter_index]) + self.prec = p + + def n_genexpr(self, node): + start = len(self.f.getvalue()) + self.write('(') + self.comprehension_walk(node, 3) + self.write(')') + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prune() + + def n_setcomp(self, node): + start = len(self.f.getvalue()) + self.write('{') + self.comprehension_walk(node, 4) + self.write('}') + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prune() + + def n_classdef(self, node): + # class definition ('class X(A,B,C):') + cclass = self.currentclass + self.currentclass = str(node[0].pattr) + + self.write('\n\n') + start = len(self.f.getvalue()) + self.write(self.indent, 'class ', self.currentclass) + self.print_super_classes(node) + self.print_(':') + + # class body + self.indentMore() + self.build_class(node[2][-2].attr) + self.indentLess() + + self.currentclass = cclass + self.set_pos_info(node, start, len(self.f.getvalue())) + if len(self.__param_stack) > 1: + self.write('\n\n') + else: + self.write('\n\n\n') + + self.prune() + + def gen_source_d(self, ast, name, customize, isLambda=0, returnNone=False): + """convert AST to source code""" + + rn = self.return_none + self.return_none = returnNone + self.name = name + # if code would be empty, append 'pass' + if len(ast) == 0: + self.print_(self.indent, 'pass') + else: + self.customize(customize) + self.text = self.traverse(ast, isLambda=isLambda) + self.return_none = rn + + def build_ast_d(self, tokens, customize, isLambda=0, noneInNames=False): + # assert type(tokens) == ListType + # assert isinstance(tokens[0], Token) + + if isLambda: + tokens.append(Token('LAMBDA_MARKER')) + try: + ast = parser.parse(tokens, customize) + except parser.ParserError as e: + raise ParserError(e, tokens) + if self.showast: + print(repr(ast)) + return ast + + if len(tokens) >= 2 and not noneInNames: + if tokens[-1] == Token('RETURN_VALUE'): + if tokens[-2] != Token('LOAD_CONST'): + tokens.append(Token('RETURN_LAST')) + if len(tokens) == 0: + return + + # Build AST from disassembly. + try: + ast = parser.parse(tokens, customize) + except parser.ParserError as e: + raise ParserError(e, tokens) + + if self.showast: + print(repr(ast)) + + return ast + + # FIXME: we could provide another customized routine + # that fixes up parents along a particular path to a node that + # we are interested in. + def fixup_parents(self, node, parent): + """Make sure each node has a parent""" + start, finish = 0, self.last_finish + # We assume anything with a start has a finish. + needs_range = not hasattr(node, 'start') + + if not hasattr(node, 'parent'): + node.parent = parent + + for n in node: + if needs_range and hasattr(n, 'start'): + if n.start < start: start = n.start + if n.finish > finish: finish = n.finish + + if hasattr(n, 'offset') and not hasattr(n, 'parent'): + n.parent = node + else: + self.fixup_parents(n, node) + pass + pass + if needs_range: + node.start, node.finish = start, finish + + return + + # FIXME: revise to do *once* over the entire tree. + # So here we should just mark that the subtree + # needs offset adjustment. + def fixup_offsets(self, new_start, node): + """Adjust all offsets under node""" + if hasattr(node, 'start'): + node.start += new_start + node.finish += new_start + for n in node: + if hasattr(n, 'offset'): + if hasattr(n, 'start'): + n.start += new_start + n.finish += new_start + else: + self.fixup_offsets(new_start, n) + return + + def set_pos_info_recurse(self, node, start, finish): + """Set positions under node""" + self.set_pos_info(node, start, finish) + for n in node: + if hasattr(n, 'offset'): + self.set_pos_info(n, start, finish) + else: + self.set_pos_info_recurse(n, start, finish) + return + + def node_append(self, before_str, node_text, node): + self.write(before_str) + self.last_finish = len(self.f.getvalue()) + self.fixup_offsets(self.last_finish, node) + self.write(node_text) + self.last_finish = len(self.f.getvalue()) + + # FIXME; below duplicated the code, since we don't find self.__params + def traverse(self, node, indent=None, isLambda=0): + '''Buulds up fragment which can be used inside a larger + block of code''' + + self.__param_stack.append(self.__params) + if indent is None: indent = self.indent + p = self.pending_newlines + self.pending_newlines = 0 + self.__params = { + '_globals': {}, + 'f': StringIO(), + 'indent': indent, + 'isLambda': isLambda, + } + self.preorder(node) + self.f.write('\n'*self.pending_newlines) + + text = self.f.getvalue() + self.last_finish = len(text) + + self.__params = self.__param_stack.pop() + self.pending_newlines = p + + return text + + def extract_node_info(self, nodeInfo): + # XXX debug + # print('-' * 30) + # node = nodeInfo.node + # print(node) + # if hasattr(node, 'parent'): + # print('~' * 30) + # print(node.parent) + # else: + # print("No parent") + # print('-' * 30) + + start, finish = (nodeInfo.start, nodeInfo.finish) + text = self.text + + # Ignore trailing blanks + match = re.search(r'\n+$', text[start:]) + if match: + text = text[:-len(match.group(0))] + + # Ignore leading blanks + match = re.search(r'\s*[^ \t\n]', text[start:]) + if match: + start += len(match.group(0))-1 + + at_end = False + if start >= finish: + at_end = True + selectedText = text + else: + selectedText = text[start:finish] + + # Compute offsets relative to the beginning of the + # line rather than the beinning of the text + try: + lineStart = text[:start].rindex("\n") + 1 + except ValueError: + lineStart = 0 + adjustedStart = start - lineStart + + # If selected text is greater than a single line + # just show the first line plus elipses. + lines = selectedText.split("\n") + if len(lines) > 1: + adjustedEnd = len(lines[0]) - adjustedStart + selectedText = lines[0] + " ...\n" + lines[-1] + else: + adjustedEnd = len(selectedText) + + if at_end: + markerLine = (' ' * len(lines[-1])) + '^' + else: + markerLine = ((' ' * adjustedStart) + + ('-' * adjustedEnd)) + + elided = False + if len(lines) > 1 and not at_end: + elided = True + markerLine += ' ...' + + # Get line that the selected text is in and + # get a line count for that. + try: + lineEnd = lineStart + text[lineStart+1:].index("\n") - 1 + except ValueError: + lineEnd = len(text) + + lines = text[:lineEnd].split("\n") + + selectedLine = text[lineStart:lineEnd+2] + + if elided: selectedLine += ' ...' + + return ExtractInfo(lineNo = len(lines), lineStartOffset = lineStart, + markerLine = markerLine, + selectedLine = selectedLine, + selectedText = selectedText) + + def extract_line_info(self, name, offset): + if (name, offset) not in list(self.offsets.keys()): + return None + return self.extract_node_info(self.offsets[name, offset]) + + def extract_parent_info(self, node): + if not hasattr(node, 'parent'): + return None, None + p = node.parent + orig_parent = p + # If we can get different text, use that as the parent, + # otherwise we'll use the immeditate parent + while (p and (hasattr(p, 'parent') and + p.start == node.start and p.finish == node.finish)): + assert p != node + node = p + p = p.parent + if p is None: p = orig_parent + return self.extract_node_info(p), p + + def print_super_classes(self, node): + node[1][0].parent = node + node = node[1][0] + if not (node == 'build_list'): + return + + start = len(self.f.getvalue()) + self.write('(') + line_separator = ', ' + sep = '' + for elem in node[:-1]: + value = self.traverse(elem) + self.node_append(sep, value, elem) + # self.write(sep, value) + sep = line_separator + + self.write(')') + self.set_pos_info(node, start, len(self.f.getvalue())) + + def n_mapexpr(self, node): + """ + prettyprint a mapexpr + 'mapexpr' is something like k = {'a': 1, 'b': 42 }" + """ + p = self.prec + self.prec = 100 + assert node[-1] == 'kvlist' + kv_node = node[-1] # goto kvlist + + self.indentMore(INDENT_PER_LEVEL) + line_seperator = ',\n' + self.indent + sep = INDENT_PER_LEVEL[:-1] + start = len(self.f.getvalue()) + self.write('{') + for kv in kv_node: + assert kv in ('kv', 'kv2', 'kv3') + # kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR + # kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR + # kv3 ::= expr expr STORE_MAP + if kv == 'kv': + name = self.traverse(kv[-2], indent='') + kv[1].parent = kv_node + value = self.traverse(kv[1], indent=self.indent+(len(name)+2)*' ') + elif kv == 'kv2': + name = self.traverse(kv[1], indent='') + kv[-3].parent = kv_node + value = self.traverse(kv[-3], indent=self.indent+(len(name)+2)*' ') + elif kv == 'kv3': + name = self.traverse(kv[-2], indent='') + kv[0].parent = kv_node + value = self.traverse(kv[0], indent=self.indent+(len(name)+2)*' ') + self.write(sep, name, ': ', value) + sep = line_seperator + self.write('}') + finish = len(self.f.getvalue()) + for n in node: + n.parent = node + self.set_pos_info(n, start, finish) + self.set_pos_info(node, start, finish) + self.indentLess(INDENT_PER_LEVEL) + self.prec = p + self.prune() + + def n_build_list(self, node): + """ + prettyprint a list or tuple + """ + p = self.prec + self.prec = 100 + n = node.pop() + lastnode = n.type + start = len(self.f.getvalue()) + if lastnode.startswith('BUILD_LIST'): + self.write('['); endchar = ']' + elif lastnode.startswith('BUILD_TUPLE'): + self.write('('); endchar = ')' + elif lastnode.startswith('BUILD_SET'): + self.write('{'); endchar = '}' + elif lastnode.startswith('ROT_TWO'): + self.write('('); endchar = ')' + else: + raise RuntimeError('Internal Error: n_build_list expects list or tuple') + + self.indentMore(INDENT_PER_LEVEL) + if len(node) > 3: + line_separator = ',\n' + self.indent + else: + line_separator = ', ' + sep = INDENT_PER_LEVEL[:-1] + for elem in node: + if (elem == 'ROT_THREE'): + continue + + assert elem == 'expr' + value = self.traverse(elem) + self.node_append(sep, value, elem) + sep = line_separator + if len(node) == 1 and lastnode.startswith('BUILD_TUPLE'): + self.write(',') + self.write(endchar) + finish = len(self.f.getvalue()) + n.parent = node.parent + self.set_pos_info(n, start, finish) + self.set_pos_info(node, start, finish) + self.indentLess(INDENT_PER_LEVEL) + self.prec = p + self.prune() + + def engine(self, entry, startnode): + '''The format template interpetation engine. See the comment at the + beginning of this module for the how we interpret format specifications such as + %c, %C, and so on. + ''' + + # print("-----") + # print(startnode) + # print(entry[0]) + # print('======') + + startnode_start = len(self.f.getvalue()) + + fmt = entry[0] + arg = 1 + i = 0 + lastC = -1 + + m = escape.search(fmt) + while m: + i = m.end() + self.write(m.group('prefix')) + + typ = m.group('type') or '{' + node = startnode + try: + if m.group('child'): + node = node[int(m.group('child'))] + node.parent = startnode + except: + print(node.__dict__) + raise + + if typ == '%': + start = len(self.f.getvalue()) + self.write('%') + self.set_pos_info(node, start, len(self.f.getvalue())) + + elif typ == '+': self.indentMore() + elif typ == '-': self.indentLess() + elif typ == '|': self.write(self.indent) + # no longer used, since BUILD_TUPLE_n is pretty printed: + elif typ == ',': + if lastC == 1: + self.write(',') + elif typ == 'c': + start = len(self.f.getvalue()) + self.preorder(node[entry[arg]]) + finish = len(self.f.getvalue()) + + # FIXME rocky: figure out how to get this to be table driven + # for loops have two positions that correspond to a single text + # location. In "for i in ..." there is the initialization "i" code as well + # as the iteration code with "i" + match = re.search(r'^for', startnode.type) + if match and entry[arg] == 3: + self.set_pos_info(node[0], start, finish) + for n in node[2]: + self.set_pos_info(n, start, finish) + + self.set_pos_info(node, start, finish) + arg += 1 + elif typ == 'p': + p = self.prec + (index, self.prec) = entry[arg] + node[index].parent = node + start = len(self.f.getvalue()) + self.preorder(node[index]) + self.set_pos_info(node, start, len(self.f.getvalue())) + self.prec = p + arg += 1 + elif typ == 'C': + low, high, sep = entry[arg] + lastC = remaining = len(node[low:high]) + start = len(self.f.getvalue()) + for subnode in node[low:high]: + self.preorder(subnode) + remaining -= 1 + if remaining > 0: + self.write(sep) + + self.set_pos_info(node, start, len(self.f.getvalue())) + arg += 1 + elif typ == 'P': + p = self.prec + low, high, sep, self.prec = entry[arg] + lastC = remaining = len(node[low:high]) + start = self.last_finish + for subnode in node[low:high]: + self.preorder(subnode) + remaining -= 1 + if remaining > 0: + self.write(sep) + self.prec = p + arg += 1 + + elif typ == '{': + d = node.__dict__ + expr = m.group('expr') + try: + start = len(self.f.getvalue()) + self.write(eval(expr, d, d)) + self.set_pos_info(node, start, len(self.f.getvalue())) + except: + print(node) + raise + m = escape.search(fmt, i) + if hasattr(node, 'offset') and (self.name, node.offset) not in self.offsets: + print("Type %s of node %s has an offset %d" % (typ, node, node.offset)) + pass + pass + + self.write(fmt[i:]) + self.set_pos_info(startnode, startnode_start, len(self.f.getvalue())) + + # FIXME rocky: figure out how to get these casess to be table driven. + # + # 1. for loops. For loops have two positions that correspond to a single text + # location. In "for i in ..." there is the initialization "i" code as well + # as the iteration code with "i". A "copy" spec like %X3,3 - copy parame + # 3 to param 2 would work + # + # 2. subroutine calls. It the last op is the call and for purposes of printing + # we don't need to print anything special there. However it encompases the + # entire string of the node fn(...) + match = re.search(r'^try', startnode.type) + if match: + self.set_pos_info(node[0], startnode_start, startnode_start+len("try:")) + self.set_pos_info(node[2], node[3].finish, node[3].finish) + else: + match = re.search(r'^call_function', startnode.type) + if match: + last_node = startnode[-1] + # import traceback; traceback.print_stack() + self.set_pos_info(last_node, startnode_start, self.last_finish) + return + + def make_function(self, node, isLambda, nested=1): + """Dump function defintion, doc string, and function body.""" + + def build_param(ast, name, default): + """build parameters: + - handle defaults + - handle format tuple parameters + """ + # if formal parameter is a tuple, the paramater name + # starts with a dot (eg. '.1', '.2') + if name.startswith('.'): + # replace the name with the tuple-string + name = self.get_tuple_parameter(ast, name) + + if default: + if self.showast: + print('--', name) + print(default) + print('--') + pass + result = '%s = ' % name + old_last_finish = self.last_finish + self.last_finish = len(result) + value = self.traverse(default, indent='') + self.last_finish = old_last_finish + result += value + if result[-2:] == '= ': # default was 'LOAD_CONST None' + result += 'None' + return result + else: + return name + + defparams = node[:node[-1].attr] # node[-1] == MAKE_xxx_n + code = node[-2].attr + + assert type(code) == CodeType + code = Code(code, self.scanner, self.currentclass) + # assert isinstance(code, Code) + + # add defaults values to parameter names + argc = code.co_argcount + paramnames = list(code.co_varnames[:argc]) + + # defaults are for last n parameters, thus reverse + paramnames.reverse(); defparams.reverse() + + try: + ast = self.build_ast_d(code._tokens, + code._customize, + isLambda = isLambda, + noneInNames = ('None' in code.co_names)) + except ParserError as p: + self.write( str(p)) + self.ERROR = p + return + + # build parameters + + # This would be a nicer piece of code, but I can't get this to work + # now, have to find a usable lambda constuct hG/2000-09-05 + # params = map(lambda name, default: build_param(ast, name, default), + # paramnames, defparams) + params = [] + for name, default in map(lambda a, b: (a, b), paramnames, defparams): + params.append( build_param(ast, name, default) ) + + params.reverse() # back to correct order + + if 4 & code.co_flags: # flag 2 -> variable number of args + params.append('*%s' % code.co_varnames[argc]) + argc += 1 + if 8 & code.co_flags: # flag 3 -> keyword args + params.append('**%s' % code.co_varnames[argc]) + argc += 1 + + # dump parameter list (with default values) + indent = self.indent + if isLambda: + self.write("lambda ", ", ".join(params), ": ") + else: + self.print_("(", ", ".join(params), "):") + # self.print_(indent, '#flags:\t', int(code.co_flags)) + + if len(code.co_consts)>0 and code.co_consts[0] is not None and not isLambda: # ugly + # docstring exists, dump it + self.print_docstring(indent, code.co_consts[0]) + + code._tokens = None # save memory + assert ast == 'stmts' + + all_globals = find_all_globals(ast, set()) + for g in ((all_globals & self.mod_globs) | find_globals(ast, set())): + self.print_(self.indent, 'global ', g) + self.mod_globs -= all_globals + rn = ('None' in code.co_names) and not find_none(ast) + self.gen_source_d(ast, code.co_name, code._customize, isLambda=isLambda, + returnNone=rn) + code._tokens = None; code._customize = None # save memory + + pass + +def deparse(version, co, out=StringIO(), showasm=0, showast=0): + assert isinstance(co, types.CodeType) + # store final output stream for case of error + __real_out = out or sys.stdout + try: + import uncompyle6.Scanner as scan + scanner = scan.Scanner(version) + except ImportError: + if version == 2.5: + import uncompyle6.scanners.scanner25 as scan + scanner = scan.Scanner25() + elif version == 2.6: + import uncompyle6.scanners.scanner26 as scan + scanner = scan.Scanner26() + elif version == 2.7: + import uncompyle6.scanners.scanner27 as scan + scanner = scan.Scanner27() + elif version == 3.2: + import uncompyle6.scanners.scanner34 as scan + scanner = scan.Scanner32() + elif version == 3.4: + import uncompyle6.scanners.scanner34 as scan + scanner = scan.Scanner34() + + scanner.setShowAsm(showasm, out) + tokens, customize = scanner.disassemble(co) + + # Build AST from disassembly. + # walk = walker.Walker(out, scanner, showast=showast) + walk = Traverser(scanner, showast=showast) + + try: + if older_uncompyle: + walk.ast = walk.build_ast_d(tokens, customize) + else: + walk.ast = walk.build_ast_d(tokens, customize) + except walker.ParserError as e : # parser failed, dump disassembly + print(e, file=__real_out) + raise + + del tokens # save memory + + # convert leading '__doc__ = "..." into doc string + assert walk.ast == 'stmts' + walk.mod_globs = walker.find_globals(walk.ast, set()) + walk.gen_source_d(walk.ast, co.co_name, customize) + walk.set_pos_info(walk.ast, 0, len(walk.text)) + walk.fixup_parents(walk.ast, None) + + for g in walk.mod_globs: + walk.write('global %s ## Warning: Unused global' % g) + if walk.ERROR: + raise walk.ERROR + + return walk + +if __name__ == '__main__': + + def deparse_test(co): + sys_version = sys.version_info.major + (sys.version_info.minor / 10.0) + walk = deparse(sys_version, co, showasm=1, showast=1) + print("deparsed source") + print(walk.text, "\n") + print('------------------------') + for name, offset in sorted(walk.offsets.keys()): + print("name %s, offset %s" % (name, offset)) + nodeInfo = walk.offsets[name, offset] + node = nodeInfo.node + extractInfo = walk.extract_node_info(node) + print("code: %s" % node.type) + # print extractInfo + print(extractInfo.selectedText) + print(extractInfo.selectedLine) + print(extractInfo.markerLine) + extractInfo, p = walk.extract_parent_info(node) + if extractInfo: + print("Contained in...") + print(extractInfo.selectedLine) + print(extractInfo.markerLine) + print("code: %s" % p.type) + print('=' * 40) + pass + pass + return + + def get_code_for_fn(fn): + return fn.__code__ + + def foo(a, **options): + def bar(a, b=1, c=2): + print("a, b, c= ", a, int(b), c) + bar(a, **options) + options = {'c': 5, 'b': 10} + bar(a, **options) + return None + + def check_args(args): + deparse_test(inspect.currentframe().f_code) + for i in range(2): + try: + i = int(args[i]) + except ValueError: + print("** Expecting an integer, got: %s" % repr(args[i])) + sys.exit(2) + pass + pass + + def gcd(a, b): + if a > b: + (a, b) = (b, a) + pass + + if a <= 0: + return None + if a == 1 or a == b: + return a + return gcd(b-a, a) + + # check_args(['3', '5']) + deparse_test(get_code_for_fn(gcd)) + # deparse_test(get_code_for_fn(gcd)) + # deparse_test(get_code_for_fn(Traverser.fixup_offsets)) + # deparse_test(inspect.currentframe().f_code) diff --git a/uncompyle6/disasm/__init__.py b/uncompyle6/disasm/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/uncompyle6/disasm/dis_25.py b/uncompyle6/disasm/dis_25.py new file mode 100644 index 00000000..7cce05e8 --- /dev/null +++ b/uncompyle6/disasm/dis_25.py @@ -0,0 +1,225 @@ +"""Disassembler of Python byte code into mnemonics.""" + +import sys +import types + +from uncompyle6.opcodes.opcode_25 import * +from uncompyle6.opcodes.opcode_25 import __all__ as _opcodes_all + +__all__ = ["dis", "disassemble", "distb", "disco"] + _opcodes_all +del _opcodes_all + +def dis(x=None): + """Disassemble classes, methods, functions, or code. + + With no argument, disassemble the last traceback. + + """ + print "WOOT WOOT" + if x is None: + distb() + return + if isinstance(x, types.InstanceType): + x = x.__class__ + if hasattr(x, 'im_func'): + x = x.im_func + if hasattr(x, 'func_code'): + x = x.func_code + if hasattr(x, '__dict__'): + items = x.__dict__.items() + items.sort() + for name, x1 in items: + if type(x1) in (types.MethodType, + types.FunctionType, + types.CodeType, + types.ClassType): + print "Disassembly of %s:" % name + try: + dis(x1) + except TypeError, msg: + print "Sorry:", msg + print + elif hasattr(x, 'co_code'): + disassemble(x) + elif isinstance(x, str): + disassemble_string(x) + else: + raise TypeError( + "don't know how to disassemble %s objects" % + type(x).__name__) + +def distb(tb=None): + """Disassemble a traceback (default: last traceback).""" + if tb is None: + try: + tb = sys.last_traceback + except AttributeError: + raise RuntimeError("no last traceback to disassemble") + while tb.tb_next: tb = tb.tb_next + disassemble(tb.tb_frame.f_code, tb.tb_lasti) + +def disassemble(co, lasti=-1): + """Disassemble a code object.""" + code = co.co_code + labels = findlabels(code) + linestarts = dict(findlinestarts(co)) + n = len(code) + i = 0 + extended_arg = 0 + free = None + while i < n: + c = code[i] + op = ord(c) + if i in linestarts: + if i > 0: + print + print "%3d" % linestarts[i], + else: + print ' ', + + if i == lasti: print '-->', + else: print ' ', + if i in labels: print '>>', + else: print ' ', + print repr(i).rjust(4), + print opname[op].ljust(20), + i = i+1 + if op >= HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg + extended_arg = 0 + i = i+2 + if op == EXTENDED_ARG: + extended_arg = oparg*65536L + print repr(oparg).rjust(5), + if op in hasconst: + print '(' + repr(co.co_consts[oparg]) + ')', + elif op in hasname: + print '(' + co.co_names[oparg] + ')', + elif op in hasjrel: + print '(to ' + repr(i + oparg) + ')', + elif op in haslocal: + print '(' + co.co_varnames[oparg] + ')', + elif op in hascompare: + print '(' + cmp_op[oparg] + ')', + elif op in hasfree: + if free is None: + free = co.co_cellvars + co.co_freevars + print '(' + free[oparg] + ')', + print + +def disassemble_string(code, lasti=-1, varnames=None, names=None, + constants=None): + labels = findlabels(code) + n = len(code) + i = 0 + while i < n: + c = code[i] + op = ord(c) + if i == lasti: print '-->', + else: print ' ', + if i in labels: print '>>', + else: print ' ', + print repr(i).rjust(4), + print opname[op].ljust(15), + i = i+1 + if op >= HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + i = i+2 + print repr(oparg).rjust(5), + if op in hasconst: + if constants: + print '(' + repr(constants[oparg]) + ')', + else: + print '(%d)' % oparg, + elif op in hasname: + if names is not None: + print '(' + names[oparg] + ')', + else: + print '(%d)' % oparg, + elif op in hasjrel: + print '(to ' + repr(i + oparg) + ')', + elif op in haslocal: + if varnames: + print '(' + varnames[oparg] + ')', + else: + print '(%d)' % oparg, + elif op in hascompare: + print '(' + cmp_op[oparg] + ')', + print + +disco = disassemble # XXX For backwards compatibility + +def findlabels(code): + """Detect all offsets in a byte code which are jump targets. + + Return the list of offsets. + + """ + labels = [] + n = len(code) + i = 0 + while i < n: + c = code[i] + op = ord(c) + i = i+1 + if op >= HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + i = i+2 + label = -1 + if op in hasjrel: + label = i+oparg + elif op in hasjabs: + label = oparg + if label >= 0: + if label not in labels: + labels.append(label) + return labels + +def findlinestarts(code): + """Find the offsets in a byte code which are start of lines in the source. + + Generate pairs (offset, lineno) as described in Python/compile.c. + + """ + print "WOOT" + byte_increments = [ord(c) for c in code.co_lnotab[0::2]] + line_increments = [ord(c) for c in code.co_lnotab[1::2]] + + lastlineno = None + lineno = code.co_firstlineno + addr = 0 + for byte_incr, line_incr in zip(byte_increments, line_increments): + if byte_incr: + if lineno != lastlineno: + yield (addr, lineno) + lastlineno = lineno + addr += byte_incr + lineno += line_incr + if lineno != lastlineno: + yield (addr, lineno) + +def _test(): + """Simple test program to disassemble a file.""" + if sys.argv[1:]: + if sys.argv[2:]: + sys.stderr.write("usage: python dis.py [-|file]\n") + sys.exit(2) + fn = sys.argv[1] + if not fn or fn == "-": + fn = None + else: + fn = None + if fn is None: + f = sys.stdin + else: + f = open(fn) + source = f.read() + if fn is not None: + f.close() + else: + fn = "" + code = compile(source, fn, "exec") + dis(code) + +if __name__ == "__main__": + _test() diff --git a/uncompyle6/opcodes/opcode_32.py b/uncompyle6/opcodes/opcode_32.py new file mode 100644 index 00000000..8e15d13e --- /dev/null +++ b/uncompyle6/opcodes/opcode_32.py @@ -0,0 +1,180 @@ + +""" +opcode module - potentially shared between dis and other modules which +operate on bytecodes (e.g. peephole optimizers). +""" + +__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", + "haslocal", "hascompare", "hasfree", "opname", "opmap", + "HAVE_ARGUMENT", "EXTENDED_ARG"] + +cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', + 'is not', 'exception match', 'BAD') + +hasconst = [] +hasname = [] +hasjrel = [] +hasjabs = [] +haslocal = [] +hascompare = [] +hasfree = [] + +opmap = {} +opname = [''] * 256 +for op in range(256): opname[op] = '<%r>' % (op,) +del op + +def def_op(name, op): + opname[op] = name + opmap[name] = op + +def name_op(name, op): + def_op(name, op) + hasname.append(op) + +def jrel_op(name, op): + def_op(name, op) + hasjrel.append(op) + +def jabs_op(name, op): + def_op(name, op) + hasjabs.append(op) + +# Instruction opcodes for compiled code +# Blank lines correspond to available opcodes + +def_op('STOP_CODE', 0) +def_op('POP_TOP', 1) +def_op('ROT_TWO', 2) +def_op('ROT_THREE', 3) +def_op('DUP_TOP', 4) +def_op('DUP_TOP_TWO', 5) + +def_op('NOP', 9) +def_op('UNARY_POSITIVE', 10) +def_op('UNARY_NEGATIVE', 11) +def_op('UNARY_NOT', 12) + +def_op('UNARY_INVERT', 15) + +def_op('BINARY_POWER', 19) +def_op('BINARY_MULTIPLY', 20) + +def_op('BINARY_MODULO', 22) +def_op('BINARY_ADD', 23) +def_op('BINARY_SUBTRACT', 24) +def_op('BINARY_SUBSCR', 25) +def_op('BINARY_FLOOR_DIVIDE', 26) +def_op('BINARY_TRUE_DIVIDE', 27) +def_op('INPLACE_FLOOR_DIVIDE', 28) +def_op('INPLACE_TRUE_DIVIDE', 29) + +def_op('STORE_MAP', 54) +def_op('INPLACE_ADD', 55) +def_op('INPLACE_SUBTRACT', 56) +def_op('INPLACE_MULTIPLY', 57) + +def_op('INPLACE_MODULO', 59) +def_op('STORE_SUBSCR', 60) +def_op('DELETE_SUBSCR', 61) +def_op('BINARY_LSHIFT', 62) +def_op('BINARY_RSHIFT', 63) +def_op('BINARY_AND', 64) +def_op('BINARY_XOR', 65) +def_op('BINARY_OR', 66) +def_op('INPLACE_POWER', 67) +def_op('GET_ITER', 68) +def_op('STORE_LOCALS', 69) + +def_op('PRINT_EXPR', 70) +def_op('LOAD_BUILD_CLASS', 71) + +def_op('INPLACE_LSHIFT', 75) +def_op('INPLACE_RSHIFT', 76) +def_op('INPLACE_AND', 77) +def_op('INPLACE_XOR', 78) +def_op('INPLACE_OR', 79) +def_op('BREAK_LOOP', 80) +def_op('WITH_CLEANUP', 81) + +def_op('RETURN_VALUE', 83) +def_op('IMPORT_STAR', 84) + +def_op('YIELD_VALUE', 86) +def_op('POP_BLOCK', 87) +def_op('END_FINALLY', 88) +def_op('POP_EXCEPT', 89) + +HAVE_ARGUMENT = 90 # Opcodes from here have an argument: + +name_op('STORE_NAME', 90) # Index in name list +name_op('DELETE_NAME', 91) # "" +def_op('UNPACK_SEQUENCE', 92) # Number of tuple items +jrel_op('FOR_ITER', 93) +def_op('UNPACK_EX', 94) +name_op('STORE_ATTR', 95) # Index in name list +name_op('DELETE_ATTR', 96) # "" +name_op('STORE_GLOBAL', 97) # "" +name_op('DELETE_GLOBAL', 98) # "" +def_op('LOAD_CONST', 100) # Index in const list +hasconst.append(100) +name_op('LOAD_NAME', 101) # Index in name list +def_op('BUILD_TUPLE', 102) # Number of tuple items +def_op('BUILD_LIST', 103) # Number of list items +def_op('BUILD_SET', 104) # Number of set items +def_op('BUILD_MAP', 105) # Number of dict entries (upto 255) +name_op('LOAD_ATTR', 106) # Index in name list +def_op('COMPARE_OP', 107) # Comparison operator +hascompare.append(107) +name_op('IMPORT_NAME', 108) # Index in name list +name_op('IMPORT_FROM', 109) # Index in name list + +jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip +jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code +jabs_op('JUMP_IF_TRUE_OR_POP', 112) # "" +jabs_op('JUMP_ABSOLUTE', 113) # "" +jabs_op('POP_JUMP_IF_FALSE', 114) # "" +jabs_op('POP_JUMP_IF_TRUE', 115) # "" + +name_op('LOAD_GLOBAL', 116) # Index in name list + +jabs_op('CONTINUE_LOOP', 119) # Target address +jrel_op('SETUP_LOOP', 120) # Distance to target address +jrel_op('SETUP_EXCEPT', 121) # "" +jrel_op('SETUP_FINALLY', 122) # "" + +def_op('LOAD_FAST', 124) # Local variable number +haslocal.append(124) +def_op('STORE_FAST', 125) # Local variable number +haslocal.append(125) +def_op('DELETE_FAST', 126) # Local variable number +haslocal.append(126) + +def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3) +def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8) +def_op('MAKE_FUNCTION', 132) # Number of args with default values +def_op('BUILD_SLICE', 133) # Number of items +def_op('MAKE_CLOSURE', 134) +def_op('LOAD_CLOSURE', 135) +hasfree.append(135) +def_op('LOAD_DEREF', 136) +hasfree.append(136) +def_op('STORE_DEREF', 137) +hasfree.append(137) +def_op('DELETE_DEREF', 138) +hasfree.append(138) + +def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8) +def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8) +def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8) + +jrel_op('SETUP_WITH', 143) + +def_op('LIST_APPEND', 145) +def_op('SET_ADD', 146) +def_op('MAP_ADD', 147) + +def_op('EXTENDED_ARG', 144) +EXTENDED_ARG = 144 + +del def_op, name_op, jrel_op, jabs_op diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 248ebc5c..265a6bd4 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -84,6 +84,8 @@ class Scanner(object): self.opc = opcode_26 elif version == 2.5: self.opc = opcode_25 + elif version == 3.2: + self.opc = opcode_32 elif version == 3.4: self.opc = opcode_34 diff --git a/uncompyle6/scanners/scanner32.py b/uncompyle6/scanners/scanner32.py new file mode 100644 index 00000000..265ddebe --- /dev/null +++ b/uncompyle6/scanners/scanner32.py @@ -0,0 +1,485 @@ +from __future__ import print_function + +''' + Copyright (c) 1999 John Aycock + Copyright (c) 2000-2002 by hartmut Goebel + Copyright (c) 2005 by Dan Pascu + + See main module for license. +''' + +import dis, marshal +from collections import namedtuple + +from uncompyle6.scanner import Token + + +# Get all the opcodes into globals +globals().update(dis.opmap) +from uncompyle6.opcodes.opcode_27 import * +import uncompyle6.scanner as scan + + +class Scanner34(scan.Scanner): + def __init__(self): + self.Token = scan.Scanner.__init__(self, 3.2) # check + + def run(self, bytecode): + code_object = marshal.loads(bytecode) + tokens = self.tokenize(code_object) + return tokens + + def disassemble(self, co): + """ + Convert code object into a sequence of tokens. + + Based on dis.disassemble() function. + """ + # Container for tokens + tokens = [] + customize = {} + self.code = code = co.co_code + codelen = len(code) + self.build_lines_data(co) + self.build_prev_op() + # Get jump targets + # Format: {target offset: [jump offsets]} + jump_targets = self.find_jump_targets() + # Initialize extended arg at 0. When extended arg op is encountered, + # variable preserved for next cycle and added as arg for next op + extended_arg = 0 + free = None + for offset in self.op_range(0, codelen): + # Add jump target tokens + if offset in jump_targets: + jump_idx = 0 + for jump_offset in jump_targets[offset]: + tokens.append(Token('COME_FROM', None, repr(jump_offset), + offset='{}_{}'.format(offset, jump_idx))) + jump_idx += 1 + op = code[offset] + # Create token and fill all the fields we can + # w/o touching arguments + current_token = Token(dis.opname[op]) + current_token.offset = offset + current_token.linestart = True if offset in self.linestarts else False + if op >= dis.HAVE_ARGUMENT: + # Calculate op's argument value based on its argument and + # preceding extended argument, if any + oparg = code[offset+1] + code[offset+2]*256 + extended_arg + extended_arg = 0 + if op == dis.EXTENDED_ARG: + extended_arg = oparg*65536 + + # Fill token's attr/pattr fields + current_token.attr = oparg + if op in dis.hasconst: + current_token.pattr = repr(co.co_consts[oparg]) + elif op in dis.hasname: + current_token.pattr = co.co_names[oparg] + elif op in dis.hasjrel: + current_token.pattr = repr(offset + 3 + oparg) + elif op in dis.haslocal: + current_token.pattr = co.co_varnames[oparg] + elif op in dis.hascompare: + current_token.pattr = dis.cmp_op[oparg] + elif op in dis.hasfree: + if free is None: + free = co.co_cellvars + co.co_freevars + current_token.pattr = free[oparg] + tokens.append(current_token) + return tokens, customize + + def build_lines_data(self, code_obj): + """ + Generate various line-related helper data. + """ + # Offset: lineno pairs, only for offsets which start line. + # Locally we use list for more convenient iteration using indices + linestarts = list(dis.findlinestarts(code_obj)) + self.linestarts = dict(linestarts) + # Plain set with offsets of first ops on line + self.linestart_offsets = {a for (a, _) in linestarts} + # 'List-map' which shows line number of current op and offset of + # first op on following line, given offset of op as index + self.lines = lines = [] + LineTuple = namedtuple('LineTuple', ['l_no', 'next']) + # Iterate through available linestarts, and fill + # the data for all code offsets encountered until + # last linestart offset + _, prev_line_no = linestarts[0] + offset = 0 + for start_offset, line_no in linestarts[1:]: + while offset < start_offset: + lines.append(LineTuple(prev_line_no, start_offset)) + offset += 1 + prev_line_no = line_no + # Fill remaining offsets with reference to last line number + # and code length as start offset of following non-existing line + codelen = len(self.code) + while offset < codelen: + lines.append(LineTuple(prev_line_no, codelen)) + offset += 1 + + def build_prev_op(self): + """ + Compose 'list-map' which allows to jump to previous + op, given offset of current op as index. + """ + code = self.code + codelen = len(code) + self.prev_op = [0] + for offset in self.op_range(0, codelen): + op = code[offset] + for _ in range(self.op_size(op)): + self.prev_op.append(offset) + + def op_size(self, op): + """ + Return size of operator with its arguments + for given opcode . + """ + if op < dis.HAVE_ARGUMENT: + return 1 + else: + return 3 + + def find_jump_targets(self): + """ + Detect all offsets in a byte code which are jump targets. + + Return the list of offsets. + + This procedure is modelled after dis.findlables(), but here + for each target the number of jumps is counted. + """ + code = self.code + codelen = len(code) + self.structs = [{'type': 'root', + 'start': 0, + 'end': codelen-1}] + + # All loop entry points + # self.loops = [] + # Map fixed jumps to their real destination + self.fixed_jumps = {} + self.ignore_if = set() + self.build_statement_indices() + # Containers filled by detect_structure() + self.not_continue = set() + self.return_end_ifs = set() + + targets = {} + for offset in self.op_range(0, codelen): + op = code[offset] + + # Determine structures and fix jumps for 2.3+ + self.detect_structure(offset) + + if op >= dis.HAVE_ARGUMENT: + label = self.fixed_jumps.get(offset) + oparg = code[offset+1] + code[offset+2] * 256 + + if label is None: + if op in dis.hasjrel and op != FOR_ITER: + label = offset + 3 + oparg + elif op in dis.hasjabs: + if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): + if oparg > offset: + label = oparg + + if label is not None and label != -1: + targets[label] = targets.get(label, []) + [offset] + elif op == END_FINALLY and offset in self.fixed_jumps: + label = self.fixed_jumps[offset] + targets[label] = targets.get(label, []) + [offset] + return targets + + def build_statement_indices(self): + code = self.code + start = 0 + end = codelen = len(code) + + statement_opcodes = { + SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP, + SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH, + POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, + STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, + STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, + RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR, + JUMP_ABSOLUTE + } + + statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE), + (POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)] + + designator_ops = { + STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, + STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE + } + + # Compose preliminary list of indices with statements, + # using plain statement opcodes + prelim = self.all_instr(start, end, statement_opcodes) + + # Initialize final container with statements with + # preliminnary data + stmts = self.stmts = set(prelim) + + # Same for opcode sequences + pass_stmts = set() + for sequence in statement_opcode_sequences: + for i in self.op_range(start, end-(len(sequence)+1)): + match = True + for elem in sequence: + if elem != code[i]: + match = False + break + i += self.op_size(code[i]) + + if match is True: + i = self.prev_op[i] + stmts.add(i) + pass_stmts.add(i) + + # Initialize statement list with the full data we've gathered so far + if pass_stmts: + stmt_offset_list = list(stmts) + stmt_offset_list.sort() + else: + stmt_offset_list = prelim + # 'List-map' which contains offset of start of + # next statement, when op offset is passed as index + self.next_stmt = slist = [] + last_stmt_offset = -1 + i = 0 + # Go through all statement offsets + for stmt_offset in stmt_offset_list: + # Process absolute jumps, but do not remove 'pass' statements + # from the set + if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts: + # If absolute jump occurs in forward direction or it takes off from the + # same line as previous statement, this is not a statement + target = self.get_target(stmt_offset) + if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: + stmts.remove(stmt_offset) + continue + # Rewing ops till we encounter non-JA one + j = self.prev_op[stmt_offset] + while code[j] == JUMP_ABSOLUTE: + j = self.prev_op[j] + # If we got here, then it's list comprehension which + # is not a statement too + if code[j] == LIST_APPEND: + stmts.remove(stmt_offset) + continue + # Exclude ROT_TWO + POP_TOP + elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO: + stmts.remove(stmt_offset) + continue + # Exclude FOR_ITER + designators + elif code[stmt_offset] in designator_ops: + j = self.prev_op[stmt_offset] + while code[j] in designator_ops: + j = self.prev_op[j] + if code[j] == FOR_ITER: + stmts.remove(stmt_offset) + continue + # Add to list another list with offset of current statement, + # equal to length of previous statement + slist += [stmt_offset] * (stmt_offset-i) + last_stmt_offset = stmt_offset + i = stmt_offset + # Finish filling the list for last statement + slist += [codelen] * (codelen-len(slist)) + + def get_target(self, offset): + """ + Get target offset for op located at given . + """ + op = self.code[offset] + target = self.code[offset+1] + self.code[offset+2] * 256 + if op in dis.hasjrel: + target += offset + 3 + return target + + def detect_structure(self, offset): + """ + Detect structures and their boundaries to fix optimizied jumps + in python2.3+ + """ + code = self.code + op = code[offset] + # Detect parent structure + parent = self.structs[0] + start = parent['start'] + end = parent['end'] + + # Pick inner-most parent for our offset + for struct in self.structs: + curent_start = struct['start'] + curent_end = struct['end'] + if (curent_start <= offset < curent_end) and (curent_start >= start and curent_end <= end): + start = curent_start + end = curent_end + parent = struct + + if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): + start = offset + self.op_size(op) + target = self.get_target(offset) + rtarget = self.restrict_to_parent(target, parent) + prev_op = self.prev_op + + # Do not let jump to go out of parent struct bounds + if target != rtarget and parent['type'] == 'and/or': + self.fixed_jumps[offset] = rtarget + return + + # Does this jump to right after another cond jump? + # If so, it's part of a larger conditional + if (code[prev_op[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, + POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > offset): + self.fixed_jumps[offset] = prev_op[target] + self.structs.append({'type': 'and/or', + 'start': start, + 'end': prev_op[target]}) + return + # Is it an and inside if block + if op == POP_JUMP_IF_FALSE: + # Search for other POP_JUMP_IF_FALSE targetting the same op, + # in current statement, starting from current offset, and filter + # everything inside inner 'or' jumps and midline ifs + match = self.rem_or(start, self.next_stmt[offset], POP_JUMP_IF_FALSE, target) + match = self.remove_mid_line_ifs(match) + # If we still have any offsets in set, start working on it + if match: + if (code[prev_op[rtarget]] in (JUMP_FORWARD, JUMP_ABSOLUTE) and prev_op[rtarget] not in self.stmts and + self.restrict_to_parent(self.get_target(prev_op[rtarget]), parent) == rtarget): + if (code[prev_op[prev_op[rtarget]]] == JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and + target == self.get_target(prev_op[prev_op[rtarget]]) and + (prev_op[prev_op[rtarget]] not in self.stmts or self.get_target(prev_op[prev_op[rtarget]]) > prev_op[prev_op[rtarget]]) and + 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)))): + pass + elif (code[prev_op[prev_op[rtarget]]] == RETURN_VALUE and self.remove_mid_line_ifs([offset]) and + 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], + (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target))) | + set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], + (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE, JUMP_ABSOLUTE), + prev_op[rtarget], True)))))): + pass + else: + fix = None + jump_ifs = self.all_instr(start, self.next_stmt[offset], POP_JUMP_IF_FALSE) + last_jump_good = True + for j in jump_ifs: + if target == self.get_target(j): + if self.lines[j].next == j + 3 and last_jump_good: + fix = j + break + else: + last_jump_good = False + self.fixed_jumps[offset] = fix or match[-1] + return + else: + self.fixed_jumps[offset] = match[-1] + return + # op == POP_JUMP_IF_TRUE + else: + next = self.next_stmt[offset] + if prev_op[next] == offset: + pass + elif code[next] in (JUMP_FORWARD, JUMP_ABSOLUTE) and target == self.get_target(next): + if code[prev_op[next]] == POP_JUMP_IF_FALSE: + if code[next] == JUMP_FORWARD or target != rtarget or code[prev_op[prev_op[rtarget]]] not in (JUMP_ABSOLUTE, RETURN_VALUE): + self.fixed_jumps[offset] = prev_op[next] + return + elif (code[next] == JUMP_ABSOLUTE and code[target] in (JUMP_ABSOLUTE, JUMP_FORWARD) and + self.get_target(target) == self.get_target(next)): + self.fixed_jumps[offset] = prev_op[next] + return + + # Don't add a struct for a while test, it's already taken care of + if offset in self.ignore_if: + return + + if (code[prev_op[rtarget]] == JUMP_ABSOLUTE and prev_op[rtarget] in self.stmts and + prev_op[rtarget] != offset and prev_op[prev_op[rtarget]] != offset and + not (code[rtarget] == JUMP_ABSOLUTE and code[rtarget+3] == POP_BLOCK and code[prev_op[prev_op[rtarget]]] != JUMP_ABSOLUTE)): + rtarget = prev_op[rtarget] + + # Does the if jump just beyond a jump op, then this is probably an if statement + if code[prev_op[rtarget]] in (JUMP_ABSOLUTE, JUMP_FORWARD): + if_end = self.get_target(prev_op[rtarget]) + + # Is this a loop not an if? + if (if_end < prev_op[rtarget]) and (code[prev_op[if_end]] == SETUP_LOOP): + if(if_end > start): + return + + end = self.restrict_to_parent(if_end, parent) + + self.structs.append({'type': 'if-then', + 'start': start, + 'end': prev_op[rtarget]}) + self.not_continue.add(prev_op[rtarget]) + + if rtarget < end: + self.structs.append({'type': 'if-else', + 'start': rtarget, + 'end': end}) + elif code[prev_op[rtarget]] == RETURN_VALUE: + self.structs.append({'type': 'if-then', + 'start': start, + 'end': rtarget}) + self.return_end_ifs.add(prev_op[rtarget]) + + elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): + target = self.get_target(offset) + if target > offset: + unop_target = self.last_instr(offset, target, JUMP_FORWARD, target) + if unop_target and code[unop_target+3] != ROT_TWO: + self.fixed_jumps[offset] = unop_target + else: + self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) + + def restrict_to_parent(self, target, parent): + """Restrict target to parent structure boundaries.""" + if not (parent['start'] < target < parent['end']): + target = parent['end'] + return target + + def rem_or(self, start, end, instr, target=None, include_beyond_target=False): + """ + Find offsets of all requested between and , + optionally ing specified offset, and return list found + offsets which are not within any POP_JUMP_IF_TRUE jumps. + """ + # Find all offsets of requested instructions + instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target) + # Get all POP_JUMP_IF_TRUE (or) offsets + pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE) + filtered = [] + for pjit_offset in pjit_offsets: + pjit_tgt = self.get_target(pjit_offset) - 3 + for instr_offset in instr_offsets: + if instr_offset <= pjit_offset or instr_offset >= pjit_tgt: + filtered.append(instr_offset) + instr_offsets = filtered + filtered = [] + return instr_offsets + + def remove_mid_line_ifs(self, ifs): + """ + Go through passed offsets, filtering ifs + located somewhere mid-line. + """ + filtered = [] + for if_ in ifs: + # For each offset, if line number of current and next op + # is the same + if self.lines[if_].l_no == self.lines[if_+3].l_no: + # Check if last op on line is PJIT or PJIF, and if it is - skip it + if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE): + continue + filtered.append(if_) + return filtered diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 1b4c640e..c6311f94 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -166,15 +166,21 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): if member in __IGNORE_CODE_MEMBERS__: pass elif member == 'co_code': - if version == 2.7: - import uncompyle6.scanners.scanner27 as scan + if version == 2.5: + import uncompyle6.scanners.scanner25 as scan scanner = scan.Scanner27() elif version == 2.6: import uncompyle6.scanners.scanner26 as scan scanner = scan.Scanner26() - elif version == 2.5: - import uncompyle65.scanners.scanner25 as scan + elif version == 2.7: + import uncompyle6.scanners.scanner27 as scan scanner = scan.Scanner25() + elif version == 3.2: + import uncompyle6.scanners.scanner32 as scan + scanner = scan.Scanner34() + elif version == 3.4: + import uncompyle6.scanners.scanner34 as scan + scanner = scan.Scanner34() scanner.setShowAsm( showasm=0 ) global JUMP_OPs JUMP_OPs = scan.JUMP_OPs + ['JUMP_BACK']