Python 2 loop scanner detection in Python 3

scanner*.py: Make scanner27 and scanner3 more aligned
Makefile: we can run py.test on Python 3.5
HISTORY.md: grammar changes
This commit is contained in:
rocky
2016-05-16 11:18:35 -04:00
parent 134b67d952
commit bdd7df6040
6 changed files with 155 additions and 60 deletions

View File

@@ -69,7 +69,7 @@ supported.
Next we get to ["uncompyle" and Next we get to ["uncompyle" and
PyPI](https://pypi.python.org/pypi/uncompyle/1.1) and the era of PyPI](https://pypi.python.org/pypi/uncompyle/1.1) and the era of
public version control. (Dan's code although not public used public version control. (Dan's code although not public used
[darcs](http://darcs.net/) for version control. [darcs](http://darcs.net/) for version control.)
In contrast to _decompyle_, _uncompyle_ at least in its final versions, In contrast to _decompyle_, _uncompyle_ at least in its final versions,
runs only on Python 2.7. However it accepts bytecode back to Python runs only on Python 2.7. However it accepts bytecode back to Python
@@ -98,7 +98,7 @@ actively, if briefly, worked on. Also starting around 2012 is Dark
Fenx's uncompyle3 which I used for inspiration for Python3. Fenx's uncompyle3 which I used for inspiration for Python3.
I started working on this late 2015, mostly to add fragment support. I started working on this late 2015, mostly to add fragment support.
In that decided to make this runnable on Python 3.2+ and Python 2.6+ In that, I decided to make this runnable on Python 3.2+ and Python 2.6+
while, handling Python bytecodes from Python versions 2.5+ and while, handling Python bytecodes from Python versions 2.5+ and
3.2+. (I think I could go back further, but I'd consider doing that 3.2+. (I think I could go back further, but I'd consider doing that
only after code is better cleaned up and supports Python 3 better.) only after code is better cleaned up and supports Python 3 better.)
@@ -116,7 +116,7 @@ Hartmut a decade an a half ago:
NB. This is not a masterpiece of software, but became more like a hack. NB. This is not a masterpiece of software, but became more like a hack.
Probably a complete rewrite would be sensefull. hG/2000-12-27 Probably a complete rewrite would be sensefull. hG/2000-12-27
This project deparses using a Early-algorithm parse with lots of This project deparses using an Early-algorithm parse with lots of
massaging of tokens and the grammar in the scanner massaging of tokens and the grammar in the scanner
phase. Early-algorithm parsers are context free and tend to be linear phase. Early-algorithm parsers are context free and tend to be linear
if the grammar is LR or left recursive. if the grammar is LR or left recursive.

View File

@@ -23,12 +23,12 @@ check:
@PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \ @PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \
$(MAKE) check-$$PYTHON_VERSION $(MAKE) check-$$PYTHON_VERSION
#: Tests for Python 2.7, 3.3 and 3.4 #: Tests for Python 2.7, 3.3, 3.4 and 3.5
check-2.7 check-3.3 check-3.4: pytest check-2.7 check-3.3 check-3.4 check-3.5: pytest
$(MAKE) -C test $@ $(MAKE) -C test $@
#: Tests for Python 3.5 - pytest doesn't work here #: Tests for Python 3.2 - pytest doesn't work here
check-3.2 check-3.5: check-3.2:
$(MAKE) -C test $@ $(MAKE) -C test $@
#:Tests for Python 2.6 (doesn't have pytest) #:Tests for Python 2.6 (doesn't have pytest)

View File

@@ -243,7 +243,7 @@ class Python3Parser(PythonParser):
testtrue ::= expr jmp_true testtrue ::= expr jmp_true
_ifstmts_jump ::= return_if_stmts _ifstmts_jump ::= return_if_stmts
_ifstmts_jump ::= c_stmts_opt _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE
@@ -594,7 +594,6 @@ class Python32Parser(Python3Parser):
""" """
# Store locals is only used in Python 3.2 # Store locals is only used in Python 3.2
designator ::= STORE_LOCALS designator ::= STORE_LOCALS
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
""" """
class Python34Parser(Python3Parser): class Python34Parser(Python3Parser):
@@ -614,6 +613,11 @@ class Python35onParser(Python3Parser):
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
POP_BLOCK LOAD_CONST COME_FROM POP_BLOCK LOAD_CONST COME_FROM
WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
# Python 3.5 has more loop optimization that removes
# JUMP_FORWARD in some cases, and hence we also don't
# see COME_FROM
_ifstmts_jump ::= c_stmts_opt
""" """
class Python3ParserSingle(Python3Parser, PythonParserSingle): class Python3ParserSingle(Python3Parser, PythonParserSingle):

View File

@@ -1,6 +1,7 @@
# Copyright (c) 1999 John Aycock # Copyright (c) 2016 by Rocky Bernstein
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
# #
# See LICENSE # See LICENSE
# #

View File

@@ -44,43 +44,11 @@ class Scanner27(scan.Scanner):
customize = {} customize = {}
Token = self.Token # shortcut Token = self.Token # shortcut
self.code = array('B', co.co_code)
for i in self.op_range(0, len(self.code)): n = self.setup_code(co)
if self.code[i] in (RETURN_VALUE, END_FINALLY): self.build_lines_data(co, n)
n = i + 1 self.build_prev_op(n)
self.code = array('B', co.co_code[:n])
self.prev = [0]
# mapping addresses of instruction & argument
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
j = 0
# linestarts is a tuple of (offset, line number).
# Turn that in a has that we can index
linestarts = list(dis.findlinestarts(co))
linestartoffsets = {}
for offset, lineno in linestarts:
linestartoffsets[offset] = lineno
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
prev_line_no = start_byte
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr) # self.lines contains (block,addrLastInstr)
if classname: if classname:
classname = '_' + classname.lstrip('_') + '__' classname = '_' + classname.lstrip('_') + '__'
@@ -104,7 +72,7 @@ class Scanner27(scan.Scanner):
if names[self.get_argument(i+3)] == 'AssertionError': if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3) self.load_asserts.add(i+3)
cf = self.find_jump_targets(self.code) cf = self.find_jump_targets()
# contains (code, [addrRefToCode]) # contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0] last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt] i = self.next_stmt[last_stmt]
@@ -213,8 +181,8 @@ class Scanner27(scan.Scanner):
if offset in self.return_end_ifs: if offset in self.return_end_ifs:
op_name = 'RETURN_END_IF' op_name = 'RETURN_END_IF'
if offset in linestartoffsets: if offset in self.linestartoffsets:
linestart = linestartoffsets[offset] linestart = self.linestartoffsets[offset]
else: else:
linestart = None linestart = None
@@ -224,6 +192,63 @@ class Scanner27(scan.Scanner):
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
return tokens, customize return tokens, customize
def setup_code(self, co):
"""
Creates Python-independent bytecode structure (byte array) in
self.code and records previous instruction in self.prev
The size of self.code is returned
"""
self.code = array('B', co.co_code)
n = -1
for i in self.op_range(0, len(self.code)):
if self.code[i] in (RETURN_VALUE, END_FINALLY):
n = i + 1
pass
pass
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY"
self.code = array('B', co.co_code[:n])
return n
def build_prev_op(self, n):
self.prev = [0]
# mapping addresses of instruction & argument
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
pass
pass
def build_lines_data(self, co, n):
"""
Initializes self.lines and self.linesstartoffsets
"""
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
# linestarts is a tuple of (offset, line number).
# Turn that in a has that we can index
linestarts = list(dis.findlinestarts(co))
self.linestartoffsets = {}
for offset, lineno in linestarts:
self.linestartoffsets[offset] = lineno
j = 0
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
prev_line_no = start_byte
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
return
def build_stmt_indices(self): def build_stmt_indices(self):
code = self.code code = self.code
start = 0 start = 0
@@ -585,7 +610,7 @@ class Scanner27(scan.Scanner):
target = self.get_target(pos, op) target = self.get_target(pos, op)
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code): def find_jump_targets(self):
''' '''
Detect all offsets in a byte code which are jump targets. Detect all offsets in a byte code which are jump targets.
@@ -595,7 +620,7 @@ class Scanner27(scan.Scanner):
for each target the number of jumps are counted. for each target the number of jumps are counted.
''' '''
n = len(code) n = len(self.code)
self.structs = [{'type': 'root', self.structs = [{'type': 'root',
'start': 0, 'start': 0,
'end': n-1}] 'end': n-1}]
@@ -603,12 +628,14 @@ class Scanner27(scan.Scanner):
self.fixed_jumps = {} # Map fixed jumps to their real destination self.fixed_jumps = {} # Map fixed jumps to their real destination
self.ignore_if = set() self.ignore_if = set()
self.build_stmt_indices() self.build_stmt_indices()
# Containers filled by detect_structure()
self.not_continue = set() self.not_continue = set()
self.return_end_ifs = set() self.return_end_ifs = set()
targets = {} targets = {}
for i in self.op_range(0, n): for i in self.op_range(0, n):
op = code[i] op = self.code[i]
# Determine structures and fix jumps in Python versions # Determine structures and fix jumps in Python versions
# since 2.3 # since 2.3
@@ -616,7 +643,7 @@ class Scanner27(scan.Scanner):
if op >= HAVE_ARGUMENT: if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i) label = self.fixed_jumps.get(i)
oparg = code[i+1] + code[i+2] * 256 oparg = self.code[i+1] + self.code[i+2] * 256
if label is None: if label is None:
if op in hasjrel and op != FOR_ITER: if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg label = i + 3 + oparg
@@ -634,7 +661,8 @@ class Scanner27(scan.Scanner):
if __name__ == "__main__": if __name__ == "__main__":
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
tokens, customize = Scanner27().disassemble(co) from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner27(PYTHON_VERSION).disassemble(co)
for t in tokens: for t in tokens:
print(t) print(t)
pass pass

View File

@@ -394,7 +394,7 @@ class Scanner3(scan.Scanner):
# #
# We may however want to consider whether we do # We may however want to consider whether we do
# this in 3.5 or not. # this in 3.5 or not.
if oparg == 0 and self.version != 3.4: if oparg == 0 and self.version >= 3.5:
tokens.append(Token('NOP', oparg, pattr, offset, linestart)) tokens.append(Token('NOP', oparg, pattr, offset, linestart))
continue continue
elif op_name == 'LOAD_GLOBAL': elif op_name == 'LOAD_GLOBAL':
@@ -481,23 +481,25 @@ class Scanner3(scan.Scanner):
for each target the number of jumps is counted. for each target the number of jumps is counted.
""" """
code = self.code code = self.code
codelen = len(code) n = len(code)
self.structs = [{'type': 'root', self.structs = [{'type': 'root',
'start': 0, 'start': 0,
'end': codelen-1}] 'end': n-1}]
# All loop entry points # All loop entry points
# self.loops = [] self.loops = []
# Map fixed jumps to their real destination # Map fixed jumps to their real destination
self.fixed_jumps = {} self.fixed_jumps = {}
self.ignore_if = set() self.ignore_if = set()
self.build_statement_indices() self.build_statement_indices()
# Containers filled by detect_structure() # Containers filled by detect_structure()
self.not_continue = set() self.not_continue = set()
self.return_end_ifs = set() self.return_end_ifs = set()
targets = {} targets = {}
for offset in self.op_range(0, codelen): for offset in self.op_range(0, n):
op = code[offset] op = code[offset]
# Determine structures and fix jumps in Python versions # Determine structures and fix jumps in Python versions
@@ -656,7 +658,67 @@ class Scanner3(scan.Scanner):
end = curent_end end = curent_end
parent = struct parent = struct
if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): if op == SETUP_LOOP:
start = offset+3
target = self.get_target(offset)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[offset] = end
(line_no, next_line_byte) = self.lines[offset]
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE,
next_line_byte, False)
if jump_back and jump_back != self.prev_op[end] and code[jump_back+3] in (JUMP_ABSOLUTE, JUMP_FORWARD):
if code[self.prev_op[end]] == RETURN_VALUE or \
(code[self.prev_op[end]] == POP_BLOCK and code[self.prev_op[self.prev_op[end]]] == RETURN_VALUE):
jump_back = None
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev_op[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev_op[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE, start, False)
if end > jump_back+4 and code[end] in (JUMP_FORWARD, JUMP_ABSOLUTE):
if code[jump_back+4] in (JUMP_ABSOLUTE, JUMP_FORWARD):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[offset] = jump_back+4
end = jump_back+4
elif target < offset:
self.fixed_jumps[offset] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev_op[next_line_byte]
if test == offset:
loop_type = 'while 1'
elif self.code[test] in op3.hasjabs+op3.hasjrel:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.not_continue.add(jump_back)
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
start = offset + self.op_size(op) start = offset + self.op_size(op)
target = self.get_target(offset) target = self.get_target(offset)
rtarget = self.restrict_to_parent(target, parent) rtarget = self.restrict_to_parent(target, parent)