diff --git a/HISTORY.md b/HISTORY.md index 3e3fc1da..41f8da82 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -69,7 +69,7 @@ supported. Next we get to ["uncompyle" and PyPI](https://pypi.python.org/pypi/uncompyle/1.1) and the era of public version control. (Dan's code although not public used -[darcs](http://darcs.net/) for version control. +[darcs](http://darcs.net/) for version control.) In contrast to _decompyle_, _uncompyle_ at least in its final versions, runs only on Python 2.7. However it accepts bytecode back to Python @@ -98,7 +98,7 @@ actively, if briefly, worked on. Also starting around 2012 is Dark Fenx's uncompyle3 which I used for inspiration for Python3. I started working on this late 2015, mostly to add fragment support. -In that decided to make this runnable on Python 3.2+ and Python 2.6+ +In that, I decided to make this runnable on Python 3.2+ and Python 2.6+ while, handling Python bytecodes from Python versions 2.5+ and 3.2+. (I think I could go back further, but I'd consider doing that only after code is better cleaned up and supports Python 3 better.) @@ -116,7 +116,7 @@ Hartmut a decade an a half ago: NB. This is not a masterpiece of software, but became more like a hack. Probably a complete rewrite would be sensefull. hG/2000-12-27 -This project deparses using a Early-algorithm parse with lots of +This project deparses using an Early-algorithm parse with lots of massaging of tokens and the grammar in the scanner phase. Early-algorithm parsers are context free and tend to be linear if the grammar is LR or left recursive. diff --git a/Makefile b/Makefile index f7e41b18..a4838dc8 100644 --- a/Makefile +++ b/Makefile @@ -23,12 +23,12 @@ check: @PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \ $(MAKE) check-$$PYTHON_VERSION -#: Tests for Python 2.7, 3.3 and 3.4 -check-2.7 check-3.3 check-3.4: pytest +#: Tests for Python 2.7, 3.3, 3.4 and 3.5 +check-2.7 check-3.3 check-3.4 check-3.5: pytest $(MAKE) -C test $@ -#: Tests for Python 3.5 - pytest doesn't work here -check-3.2 check-3.5: +#: Tests for Python 3.2 - pytest doesn't work here +check-3.2: $(MAKE) -C test $@ #:Tests for Python 2.6 (doesn't have pytest) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 6ede22b0..6c52a7df 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -243,7 +243,7 @@ class Python3Parser(PythonParser): testtrue ::= expr jmp_true _ifstmts_jump ::= return_if_stmts - _ifstmts_jump ::= c_stmts_opt + _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE @@ -594,7 +594,6 @@ class Python32Parser(Python3Parser): """ # Store locals is only used in Python 3.2 designator ::= STORE_LOCALS - _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM """ class Python34Parser(Python3Parser): @@ -614,6 +613,11 @@ class Python35onParser(Python3Parser): withasstmt ::= expr SETUP_WITH designator suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + # Python 3.5 has more loop optimization that removes + # JUMP_FORWARD in some cases, and hence we also don't + # see COME_FROM + _ifstmts_jump ::= c_stmts_opt """ class Python3ParserSingle(Python3Parser, PythonParserSingle): diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index cc6082df..5e443270 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -1,6 +1,7 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2016 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu +# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 1999 John Aycock # # See LICENSE # diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index 0ccd471d..858d9f8b 100755 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -44,43 +44,11 @@ class Scanner27(scan.Scanner): customize = {} Token = self.Token # shortcut - self.code = array('B', co.co_code) - for i in self.op_range(0, len(self.code)): - if self.code[i] in (RETURN_VALUE, END_FINALLY): - n = i + 1 - self.code = array('B', co.co_code[:n]) + n = self.setup_code(co) + self.build_lines_data(co, n) + self.build_prev_op(n) - self.prev = [0] - # mapping addresses of instruction & argument - for i in self.op_range(0, n): - op = self.code[i] - self.prev.append(i) - if op >= HAVE_ARGUMENT: - self.prev.append(i) - self.prev.append(i) - - self.lines = [] - linetuple = namedtuple('linetuple', ['l_no', 'next']) - - j = 0 - - # linestarts is a tuple of (offset, line number). - # Turn that in a has that we can index - linestarts = list(dis.findlinestarts(co)) - linestartoffsets = {} - for offset, lineno in linestarts: - linestartoffsets[offset] = lineno - - (prev_start_byte, prev_line_no) = linestarts[0] - for (start_byte, line_no) in linestarts[1:]: - while j < start_byte: - self.lines.append(linetuple(prev_line_no, start_byte)) - j += 1 - prev_line_no = start_byte - while j < n: - self.lines.append(linetuple(prev_line_no, n)) - j+=1 # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' @@ -104,7 +72,7 @@ class Scanner27(scan.Scanner): if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) - cf = self.find_jump_targets(self.code) + cf = self.find_jump_targets() # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] @@ -213,8 +181,8 @@ class Scanner27(scan.Scanner): if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' - if offset in linestartoffsets: - linestart = linestartoffsets[offset] + if offset in self.linestartoffsets: + linestart = self.linestartoffsets[offset] else: linestart = None @@ -224,6 +192,63 @@ class Scanner27(scan.Scanner): tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) return tokens, customize + def setup_code(self, co): + """ + Creates Python-independent bytecode structure (byte array) in + self.code and records previous instruction in self.prev + The size of self.code is returned + """ + self.code = array('B', co.co_code) + + n = -1 + for i in self.op_range(0, len(self.code)): + if self.code[i] in (RETURN_VALUE, END_FINALLY): + n = i + 1 + pass + pass + assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY" + self.code = array('B', co.co_code[:n]) + + return n + + def build_prev_op(self, n): + self.prev = [0] + # mapping addresses of instruction & argument + for i in self.op_range(0, n): + op = self.code[i] + self.prev.append(i) + if op >= HAVE_ARGUMENT: + self.prev.append(i) + self.prev.append(i) + pass + pass + + def build_lines_data(self, co, n): + """ + Initializes self.lines and self.linesstartoffsets + """ + self.lines = [] + linetuple = namedtuple('linetuple', ['l_no', 'next']) + + # linestarts is a tuple of (offset, line number). + # Turn that in a has that we can index + linestarts = list(dis.findlinestarts(co)) + self.linestartoffsets = {} + for offset, lineno in linestarts: + self.linestartoffsets[offset] = lineno + + j = 0 + (prev_start_byte, prev_line_no) = linestarts[0] + for (start_byte, line_no) in linestarts[1:]: + while j < start_byte: + self.lines.append(linetuple(prev_line_no, start_byte)) + j += 1 + prev_line_no = start_byte + while j < n: + self.lines.append(linetuple(prev_line_no, n)) + j+=1 + return + def build_stmt_indices(self): code = self.code start = 0 @@ -585,7 +610,7 @@ class Scanner27(scan.Scanner): target = self.get_target(pos, op) self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) - def find_jump_targets(self, code): + def find_jump_targets(self): ''' Detect all offsets in a byte code which are jump targets. @@ -595,7 +620,7 @@ class Scanner27(scan.Scanner): for each target the number of jumps are counted. ''' - n = len(code) + n = len(self.code) self.structs = [{'type': 'root', 'start': 0, 'end': n-1}] @@ -603,12 +628,14 @@ class Scanner27(scan.Scanner): self.fixed_jumps = {} # Map fixed jumps to their real destination self.ignore_if = set() self.build_stmt_indices() + + # Containers filled by detect_structure() self.not_continue = set() self.return_end_ifs = set() targets = {} for i in self.op_range(0, n): - op = code[i] + op = self.code[i] # Determine structures and fix jumps in Python versions # since 2.3 @@ -616,7 +643,7 @@ class Scanner27(scan.Scanner): if op >= HAVE_ARGUMENT: label = self.fixed_jumps.get(i) - oparg = code[i+1] + code[i+2] * 256 + oparg = self.code[i+1] + self.code[i+2] * 256 if label is None: if op in hasjrel and op != FOR_ITER: label = i + 3 + oparg @@ -634,7 +661,8 @@ class Scanner27(scan.Scanner): if __name__ == "__main__": co = inspect.currentframe().f_code - tokens, customize = Scanner27().disassemble(co) + from uncompyle6 import PYTHON_VERSION + tokens, customize = Scanner27(PYTHON_VERSION).disassemble(co) for t in tokens: print(t) pass diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 31ec4b13..92e220ba 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -394,7 +394,7 @@ class Scanner3(scan.Scanner): # # We may however want to consider whether we do # this in 3.5 or not. - if oparg == 0 and self.version != 3.4: + if oparg == 0 and self.version >= 3.5: tokens.append(Token('NOP', oparg, pattr, offset, linestart)) continue elif op_name == 'LOAD_GLOBAL': @@ -481,23 +481,25 @@ class Scanner3(scan.Scanner): for each target the number of jumps is counted. """ code = self.code - codelen = len(code) + n = len(code) self.structs = [{'type': 'root', 'start': 0, - 'end': codelen-1}] + 'end': n-1}] # All loop entry points - # self.loops = [] + self.loops = [] + # Map fixed jumps to their real destination self.fixed_jumps = {} self.ignore_if = set() self.build_statement_indices() + # Containers filled by detect_structure() self.not_continue = set() self.return_end_ifs = set() targets = {} - for offset in self.op_range(0, codelen): + for offset in self.op_range(0, n): op = code[offset] # Determine structures and fix jumps in Python versions @@ -656,7 +658,67 @@ class Scanner3(scan.Scanner): end = curent_end parent = struct - if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): + if op == SETUP_LOOP: + start = offset+3 + target = self.get_target(offset) + end = self.restrict_to_parent(target, parent) + + if target != end: + self.fixed_jumps[offset] = end + (line_no, next_line_byte) = self.lines[offset] + jump_back = self.last_instr(start, end, JUMP_ABSOLUTE, + next_line_byte, False) + + if jump_back and jump_back != self.prev_op[end] and code[jump_back+3] in (JUMP_ABSOLUTE, JUMP_FORWARD): + if code[self.prev_op[end]] == RETURN_VALUE or \ + (code[self.prev_op[end]] == POP_BLOCK and code[self.prev_op[self.prev_op[end]]] == RETURN_VALUE): + jump_back = None + if not jump_back: # loop suite ends in return. wtf right? + jump_back = self.last_instr(start, end, RETURN_VALUE) + 1 + if not jump_back: + return + if code[self.prev_op[next_line_byte]] not in (PJIF, PJIT): + loop_type = 'for' + else: + loop_type = 'while' + self.ignore_if.add(self.prev_op[next_line_byte]) + target = next_line_byte + end = jump_back + 3 + else: + if self.get_target(jump_back) >= next_line_byte: + jump_back = self.last_instr(start, end, JUMP_ABSOLUTE, start, False) + if end > jump_back+4 and code[end] in (JUMP_FORWARD, JUMP_ABSOLUTE): + if code[jump_back+4] in (JUMP_ABSOLUTE, JUMP_FORWARD): + if self.get_target(jump_back+4) == self.get_target(end): + self.fixed_jumps[offset] = jump_back+4 + end = jump_back+4 + elif target < offset: + self.fixed_jumps[offset] = jump_back+4 + end = jump_back+4 + target = self.get_target(jump_back) + + if code[target] in (FOR_ITER, GET_ITER): + loop_type = 'for' + else: + loop_type = 'while' + test = self.prev_op[next_line_byte] + if test == offset: + loop_type = 'while 1' + elif self.code[test] in op3.hasjabs+op3.hasjrel: + self.ignore_if.add(test) + test_target = self.get_target(test) + if test_target > (jump_back+3): + jump_back = test_target + self.not_continue.add(jump_back) + self.loops.append(target) + self.structs.append({'type': loop_type + '-loop', + 'start': target, + 'end': jump_back}) + if jump_back+3 != end: + self.structs.append({'type': loop_type + '-else', + 'start': jump_back+3, + 'end': end}) + elif op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): start = offset + self.op_size(op) target = self.get_target(offset) rtarget = self.restrict_to_parent(target, parent)