Python 2 loop scanner detection in Python 3

scanner*.py: Make scanner27 and scanner3 more aligned
Makefile: we can run py.test on Python 3.5
HISTORY.md: grammar changes
This commit is contained in:
rocky
2016-05-16 11:18:35 -04:00
parent 134b67d952
commit bdd7df6040
6 changed files with 155 additions and 60 deletions

View File

@@ -69,7 +69,7 @@ supported.
Next we get to ["uncompyle" and
PyPI](https://pypi.python.org/pypi/uncompyle/1.1) and the era of
public version control. (Dan's code although not public used
[darcs](http://darcs.net/) for version control.
[darcs](http://darcs.net/) for version control.)
In contrast to _decompyle_, _uncompyle_ at least in its final versions,
runs only on Python 2.7. However it accepts bytecode back to Python
@@ -98,7 +98,7 @@ actively, if briefly, worked on. Also starting around 2012 is Dark
Fenx's uncompyle3 which I used for inspiration for Python3.
I started working on this late 2015, mostly to add fragment support.
In that decided to make this runnable on Python 3.2+ and Python 2.6+
In that, I decided to make this runnable on Python 3.2+ and Python 2.6+
while, handling Python bytecodes from Python versions 2.5+ and
3.2+. (I think I could go back further, but I'd consider doing that
only after code is better cleaned up and supports Python 3 better.)
@@ -116,7 +116,7 @@ Hartmut a decade an a half ago:
NB. This is not a masterpiece of software, but became more like a hack.
Probably a complete rewrite would be sensefull. hG/2000-12-27
This project deparses using a Early-algorithm parse with lots of
This project deparses using an Early-algorithm parse with lots of
massaging of tokens and the grammar in the scanner
phase. Early-algorithm parsers are context free and tend to be linear
if the grammar is LR or left recursive.

View File

@@ -23,12 +23,12 @@ check:
@PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \
$(MAKE) check-$$PYTHON_VERSION
#: Tests for Python 2.7, 3.3 and 3.4
check-2.7 check-3.3 check-3.4: pytest
#: Tests for Python 2.7, 3.3, 3.4 and 3.5
check-2.7 check-3.3 check-3.4 check-3.5: pytest
$(MAKE) -C test $@
#: Tests for Python 3.5 - pytest doesn't work here
check-3.2 check-3.5:
#: Tests for Python 3.2 - pytest doesn't work here
check-3.2:
$(MAKE) -C test $@
#:Tests for Python 2.6 (doesn't have pytest)

View File

@@ -243,7 +243,7 @@ class Python3Parser(PythonParser):
testtrue ::= expr jmp_true
_ifstmts_jump ::= return_if_stmts
_ifstmts_jump ::= c_stmts_opt
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE
@@ -594,7 +594,6 @@ class Python32Parser(Python3Parser):
"""
# Store locals is only used in Python 3.2
designator ::= STORE_LOCALS
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
"""
class Python34Parser(Python3Parser):
@@ -614,6 +613,11 @@ class Python35onParser(Python3Parser):
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
POP_BLOCK LOAD_CONST COME_FROM
WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
# Python 3.5 has more loop optimization that removes
# JUMP_FORWARD in some cases, and hence we also don't
# see COME_FROM
_ifstmts_jump ::= c_stmts_opt
"""
class Python3ParserSingle(Python3Parser, PythonParserSingle):

View File

@@ -1,6 +1,7 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2016 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
#
# See LICENSE
#

View File

@@ -44,43 +44,11 @@ class Scanner27(scan.Scanner):
customize = {}
Token = self.Token # shortcut
self.code = array('B', co.co_code)
for i in self.op_range(0, len(self.code)):
if self.code[i] in (RETURN_VALUE, END_FINALLY):
n = i + 1
self.code = array('B', co.co_code[:n])
n = self.setup_code(co)
self.build_lines_data(co, n)
self.build_prev_op(n)
self.prev = [0]
# mapping addresses of instruction & argument
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
j = 0
# linestarts is a tuple of (offset, line number).
# Turn that in a has that we can index
linestarts = list(dis.findlinestarts(co))
linestartoffsets = {}
for offset, lineno in linestarts:
linestartoffsets[offset] = lineno
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
prev_line_no = start_byte
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
@@ -104,7 +72,7 @@ class Scanner27(scan.Scanner):
if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3)
cf = self.find_jump_targets(self.code)
cf = self.find_jump_targets()
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
@@ -213,8 +181,8 @@ class Scanner27(scan.Scanner):
if offset in self.return_end_ifs:
op_name = 'RETURN_END_IF'
if offset in linestartoffsets:
linestart = linestartoffsets[offset]
if offset in self.linestartoffsets:
linestart = self.linestartoffsets[offset]
else:
linestart = None
@@ -224,6 +192,63 @@ class Scanner27(scan.Scanner):
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
return tokens, customize
def setup_code(self, co):
"""
Creates Python-independent bytecode structure (byte array) in
self.code and records previous instruction in self.prev
The size of self.code is returned
"""
self.code = array('B', co.co_code)
n = -1
for i in self.op_range(0, len(self.code)):
if self.code[i] in (RETURN_VALUE, END_FINALLY):
n = i + 1
pass
pass
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY"
self.code = array('B', co.co_code[:n])
return n
def build_prev_op(self, n):
self.prev = [0]
# mapping addresses of instruction & argument
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
pass
pass
def build_lines_data(self, co, n):
"""
Initializes self.lines and self.linesstartoffsets
"""
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
# linestarts is a tuple of (offset, line number).
# Turn that in a has that we can index
linestarts = list(dis.findlinestarts(co))
self.linestartoffsets = {}
for offset, lineno in linestarts:
self.linestartoffsets[offset] = lineno
j = 0
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
prev_line_no = start_byte
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
return
def build_stmt_indices(self):
code = self.code
start = 0
@@ -585,7 +610,7 @@ class Scanner27(scan.Scanner):
target = self.get_target(pos, op)
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
def find_jump_targets(self):
'''
Detect all offsets in a byte code which are jump targets.
@@ -595,7 +620,7 @@ class Scanner27(scan.Scanner):
for each target the number of jumps are counted.
'''
n = len(code)
n = len(self.code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
@@ -603,12 +628,14 @@ class Scanner27(scan.Scanner):
self.fixed_jumps = {} # Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
# Containers filled by detect_structure()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
op = self.code[i]
# Determine structures and fix jumps in Python versions
# since 2.3
@@ -616,7 +643,7 @@ class Scanner27(scan.Scanner):
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = code[i+1] + code[i+2] * 256
oparg = self.code[i+1] + self.code[i+2] * 256
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
@@ -634,7 +661,8 @@ class Scanner27(scan.Scanner):
if __name__ == "__main__":
co = inspect.currentframe().f_code
tokens, customize = Scanner27().disassemble(co)
from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner27(PYTHON_VERSION).disassemble(co)
for t in tokens:
print(t)
pass

View File

@@ -394,7 +394,7 @@ class Scanner3(scan.Scanner):
#
# We may however want to consider whether we do
# this in 3.5 or not.
if oparg == 0 and self.version != 3.4:
if oparg == 0 and self.version >= 3.5:
tokens.append(Token('NOP', oparg, pattr, offset, linestart))
continue
elif op_name == 'LOAD_GLOBAL':
@@ -481,23 +481,25 @@ class Scanner3(scan.Scanner):
for each target the number of jumps is counted.
"""
code = self.code
codelen = len(code)
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': codelen-1}]
'end': n-1}]
# All loop entry points
# self.loops = []
self.loops = []
# Map fixed jumps to their real destination
self.fixed_jumps = {}
self.ignore_if = set()
self.build_statement_indices()
# Containers filled by detect_structure()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for offset in self.op_range(0, codelen):
for offset in self.op_range(0, n):
op = code[offset]
# Determine structures and fix jumps in Python versions
@@ -656,7 +658,67 @@ class Scanner3(scan.Scanner):
end = curent_end
parent = struct
if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
if op == SETUP_LOOP:
start = offset+3
target = self.get_target(offset)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[offset] = end
(line_no, next_line_byte) = self.lines[offset]
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE,
next_line_byte, False)
if jump_back and jump_back != self.prev_op[end] and code[jump_back+3] in (JUMP_ABSOLUTE, JUMP_FORWARD):
if code[self.prev_op[end]] == RETURN_VALUE or \
(code[self.prev_op[end]] == POP_BLOCK and code[self.prev_op[self.prev_op[end]]] == RETURN_VALUE):
jump_back = None
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev_op[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev_op[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE, start, False)
if end > jump_back+4 and code[end] in (JUMP_FORWARD, JUMP_ABSOLUTE):
if code[jump_back+4] in (JUMP_ABSOLUTE, JUMP_FORWARD):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[offset] = jump_back+4
end = jump_back+4
elif target < offset:
self.fixed_jumps[offset] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev_op[next_line_byte]
if test == offset:
loop_type = 'while 1'
elif self.code[test] in op3.hasjabs+op3.hasjrel:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.not_continue.add(jump_back)
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
start = offset + self.op_size(op)
target = self.get_target(offset)
rtarget = self.restrict_to_parent(target, parent)