You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-02 16:44:46 +08:00
Python 2 loop scanner detection in Python 3
scanner*.py: Make scanner27 and scanner3 more aligned Makefile: we can run py.test on Python 3.5 HISTORY.md: grammar changes
This commit is contained in:
@@ -69,7 +69,7 @@ supported.
|
||||
Next we get to ["uncompyle" and
|
||||
PyPI](https://pypi.python.org/pypi/uncompyle/1.1) and the era of
|
||||
public version control. (Dan's code although not public used
|
||||
[darcs](http://darcs.net/) for version control.
|
||||
[darcs](http://darcs.net/) for version control.)
|
||||
|
||||
In contrast to _decompyle_, _uncompyle_ at least in its final versions,
|
||||
runs only on Python 2.7. However it accepts bytecode back to Python
|
||||
@@ -98,7 +98,7 @@ actively, if briefly, worked on. Also starting around 2012 is Dark
|
||||
Fenx's uncompyle3 which I used for inspiration for Python3.
|
||||
|
||||
I started working on this late 2015, mostly to add fragment support.
|
||||
In that decided to make this runnable on Python 3.2+ and Python 2.6+
|
||||
In that, I decided to make this runnable on Python 3.2+ and Python 2.6+
|
||||
while, handling Python bytecodes from Python versions 2.5+ and
|
||||
3.2+. (I think I could go back further, but I'd consider doing that
|
||||
only after code is better cleaned up and supports Python 3 better.)
|
||||
@@ -116,7 +116,7 @@ Hartmut a decade an a half ago:
|
||||
NB. This is not a masterpiece of software, but became more like a hack.
|
||||
Probably a complete rewrite would be sensefull. hG/2000-12-27
|
||||
|
||||
This project deparses using a Early-algorithm parse with lots of
|
||||
This project deparses using an Early-algorithm parse with lots of
|
||||
massaging of tokens and the grammar in the scanner
|
||||
phase. Early-algorithm parsers are context free and tend to be linear
|
||||
if the grammar is LR or left recursive.
|
||||
|
8
Makefile
8
Makefile
@@ -23,12 +23,12 @@ check:
|
||||
@PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \
|
||||
$(MAKE) check-$$PYTHON_VERSION
|
||||
|
||||
#: Tests for Python 2.7, 3.3 and 3.4
|
||||
check-2.7 check-3.3 check-3.4: pytest
|
||||
#: Tests for Python 2.7, 3.3, 3.4 and 3.5
|
||||
check-2.7 check-3.3 check-3.4 check-3.5: pytest
|
||||
$(MAKE) -C test $@
|
||||
|
||||
#: Tests for Python 3.5 - pytest doesn't work here
|
||||
check-3.2 check-3.5:
|
||||
#: Tests for Python 3.2 - pytest doesn't work here
|
||||
check-3.2:
|
||||
$(MAKE) -C test $@
|
||||
|
||||
#:Tests for Python 2.6 (doesn't have pytest)
|
||||
|
@@ -243,7 +243,7 @@ class Python3Parser(PythonParser):
|
||||
testtrue ::= expr jmp_true
|
||||
|
||||
_ifstmts_jump ::= return_if_stmts
|
||||
_ifstmts_jump ::= c_stmts_opt
|
||||
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
|
||||
|
||||
iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE
|
||||
|
||||
@@ -594,7 +594,6 @@ class Python32Parser(Python3Parser):
|
||||
"""
|
||||
# Store locals is only used in Python 3.2
|
||||
designator ::= STORE_LOCALS
|
||||
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
|
||||
"""
|
||||
|
||||
class Python34Parser(Python3Parser):
|
||||
@@ -614,6 +613,11 @@ class Python35onParser(Python3Parser):
|
||||
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
|
||||
POP_BLOCK LOAD_CONST COME_FROM
|
||||
WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
|
||||
|
||||
# Python 3.5 has more loop optimization that removes
|
||||
# JUMP_FORWARD in some cases, and hence we also don't
|
||||
# see COME_FROM
|
||||
_ifstmts_jump ::= c_stmts_opt
|
||||
"""
|
||||
|
||||
class Python3ParserSingle(Python3Parser, PythonParserSingle):
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# Copyright (c) 1999 John Aycock
|
||||
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
||||
# Copyright (c) 2016 by Rocky Bernstein
|
||||
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
||||
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
||||
# Copyright (c) 1999 John Aycock
|
||||
#
|
||||
# See LICENSE
|
||||
#
|
||||
|
@@ -44,43 +44,11 @@ class Scanner27(scan.Scanner):
|
||||
|
||||
customize = {}
|
||||
Token = self.Token # shortcut
|
||||
self.code = array('B', co.co_code)
|
||||
|
||||
for i in self.op_range(0, len(self.code)):
|
||||
if self.code[i] in (RETURN_VALUE, END_FINALLY):
|
||||
n = i + 1
|
||||
self.code = array('B', co.co_code[:n])
|
||||
n = self.setup_code(co)
|
||||
self.build_lines_data(co, n)
|
||||
self.build_prev_op(n)
|
||||
|
||||
self.prev = [0]
|
||||
# mapping addresses of instruction & argument
|
||||
for i in self.op_range(0, n):
|
||||
op = self.code[i]
|
||||
self.prev.append(i)
|
||||
if op >= HAVE_ARGUMENT:
|
||||
self.prev.append(i)
|
||||
self.prev.append(i)
|
||||
|
||||
self.lines = []
|
||||
linetuple = namedtuple('linetuple', ['l_no', 'next'])
|
||||
|
||||
j = 0
|
||||
|
||||
# linestarts is a tuple of (offset, line number).
|
||||
# Turn that in a has that we can index
|
||||
linestarts = list(dis.findlinestarts(co))
|
||||
linestartoffsets = {}
|
||||
for offset, lineno in linestarts:
|
||||
linestartoffsets[offset] = lineno
|
||||
|
||||
(prev_start_byte, prev_line_no) = linestarts[0]
|
||||
for (start_byte, line_no) in linestarts[1:]:
|
||||
while j < start_byte:
|
||||
self.lines.append(linetuple(prev_line_no, start_byte))
|
||||
j += 1
|
||||
prev_line_no = start_byte
|
||||
while j < n:
|
||||
self.lines.append(linetuple(prev_line_no, n))
|
||||
j+=1
|
||||
# self.lines contains (block,addrLastInstr)
|
||||
if classname:
|
||||
classname = '_' + classname.lstrip('_') + '__'
|
||||
@@ -104,7 +72,7 @@ class Scanner27(scan.Scanner):
|
||||
if names[self.get_argument(i+3)] == 'AssertionError':
|
||||
self.load_asserts.add(i+3)
|
||||
|
||||
cf = self.find_jump_targets(self.code)
|
||||
cf = self.find_jump_targets()
|
||||
# contains (code, [addrRefToCode])
|
||||
last_stmt = self.next_stmt[0]
|
||||
i = self.next_stmt[last_stmt]
|
||||
@@ -213,8 +181,8 @@ class Scanner27(scan.Scanner):
|
||||
if offset in self.return_end_ifs:
|
||||
op_name = 'RETURN_END_IF'
|
||||
|
||||
if offset in linestartoffsets:
|
||||
linestart = linestartoffsets[offset]
|
||||
if offset in self.linestartoffsets:
|
||||
linestart = self.linestartoffsets[offset]
|
||||
else:
|
||||
linestart = None
|
||||
|
||||
@@ -224,6 +192,63 @@ class Scanner27(scan.Scanner):
|
||||
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||
return tokens, customize
|
||||
|
||||
def setup_code(self, co):
|
||||
"""
|
||||
Creates Python-independent bytecode structure (byte array) in
|
||||
self.code and records previous instruction in self.prev
|
||||
The size of self.code is returned
|
||||
"""
|
||||
self.code = array('B', co.co_code)
|
||||
|
||||
n = -1
|
||||
for i in self.op_range(0, len(self.code)):
|
||||
if self.code[i] in (RETURN_VALUE, END_FINALLY):
|
||||
n = i + 1
|
||||
pass
|
||||
pass
|
||||
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY"
|
||||
self.code = array('B', co.co_code[:n])
|
||||
|
||||
return n
|
||||
|
||||
def build_prev_op(self, n):
|
||||
self.prev = [0]
|
||||
# mapping addresses of instruction & argument
|
||||
for i in self.op_range(0, n):
|
||||
op = self.code[i]
|
||||
self.prev.append(i)
|
||||
if op >= HAVE_ARGUMENT:
|
||||
self.prev.append(i)
|
||||
self.prev.append(i)
|
||||
pass
|
||||
pass
|
||||
|
||||
def build_lines_data(self, co, n):
|
||||
"""
|
||||
Initializes self.lines and self.linesstartoffsets
|
||||
"""
|
||||
self.lines = []
|
||||
linetuple = namedtuple('linetuple', ['l_no', 'next'])
|
||||
|
||||
# linestarts is a tuple of (offset, line number).
|
||||
# Turn that in a has that we can index
|
||||
linestarts = list(dis.findlinestarts(co))
|
||||
self.linestartoffsets = {}
|
||||
for offset, lineno in linestarts:
|
||||
self.linestartoffsets[offset] = lineno
|
||||
|
||||
j = 0
|
||||
(prev_start_byte, prev_line_no) = linestarts[0]
|
||||
for (start_byte, line_no) in linestarts[1:]:
|
||||
while j < start_byte:
|
||||
self.lines.append(linetuple(prev_line_no, start_byte))
|
||||
j += 1
|
||||
prev_line_no = start_byte
|
||||
while j < n:
|
||||
self.lines.append(linetuple(prev_line_no, n))
|
||||
j+=1
|
||||
return
|
||||
|
||||
def build_stmt_indices(self):
|
||||
code = self.code
|
||||
start = 0
|
||||
@@ -585,7 +610,7 @@ class Scanner27(scan.Scanner):
|
||||
target = self.get_target(pos, op)
|
||||
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
|
||||
|
||||
def find_jump_targets(self, code):
|
||||
def find_jump_targets(self):
|
||||
'''
|
||||
Detect all offsets in a byte code which are jump targets.
|
||||
|
||||
@@ -595,7 +620,7 @@ class Scanner27(scan.Scanner):
|
||||
for each target the number of jumps are counted.
|
||||
'''
|
||||
|
||||
n = len(code)
|
||||
n = len(self.code)
|
||||
self.structs = [{'type': 'root',
|
||||
'start': 0,
|
||||
'end': n-1}]
|
||||
@@ -603,12 +628,14 @@ class Scanner27(scan.Scanner):
|
||||
self.fixed_jumps = {} # Map fixed jumps to their real destination
|
||||
self.ignore_if = set()
|
||||
self.build_stmt_indices()
|
||||
|
||||
# Containers filled by detect_structure()
|
||||
self.not_continue = set()
|
||||
self.return_end_ifs = set()
|
||||
|
||||
targets = {}
|
||||
for i in self.op_range(0, n):
|
||||
op = code[i]
|
||||
op = self.code[i]
|
||||
|
||||
# Determine structures and fix jumps in Python versions
|
||||
# since 2.3
|
||||
@@ -616,7 +643,7 @@ class Scanner27(scan.Scanner):
|
||||
|
||||
if op >= HAVE_ARGUMENT:
|
||||
label = self.fixed_jumps.get(i)
|
||||
oparg = code[i+1] + code[i+2] * 256
|
||||
oparg = self.code[i+1] + self.code[i+2] * 256
|
||||
if label is None:
|
||||
if op in hasjrel and op != FOR_ITER:
|
||||
label = i + 3 + oparg
|
||||
@@ -634,7 +661,8 @@ class Scanner27(scan.Scanner):
|
||||
|
||||
if __name__ == "__main__":
|
||||
co = inspect.currentframe().f_code
|
||||
tokens, customize = Scanner27().disassemble(co)
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
tokens, customize = Scanner27(PYTHON_VERSION).disassemble(co)
|
||||
for t in tokens:
|
||||
print(t)
|
||||
pass
|
||||
|
@@ -394,7 +394,7 @@ class Scanner3(scan.Scanner):
|
||||
#
|
||||
# We may however want to consider whether we do
|
||||
# this in 3.5 or not.
|
||||
if oparg == 0 and self.version != 3.4:
|
||||
if oparg == 0 and self.version >= 3.5:
|
||||
tokens.append(Token('NOP', oparg, pattr, offset, linestart))
|
||||
continue
|
||||
elif op_name == 'LOAD_GLOBAL':
|
||||
@@ -481,23 +481,25 @@ class Scanner3(scan.Scanner):
|
||||
for each target the number of jumps is counted.
|
||||
"""
|
||||
code = self.code
|
||||
codelen = len(code)
|
||||
n = len(code)
|
||||
self.structs = [{'type': 'root',
|
||||
'start': 0,
|
||||
'end': codelen-1}]
|
||||
'end': n-1}]
|
||||
|
||||
# All loop entry points
|
||||
# self.loops = []
|
||||
self.loops = []
|
||||
|
||||
# Map fixed jumps to their real destination
|
||||
self.fixed_jumps = {}
|
||||
self.ignore_if = set()
|
||||
self.build_statement_indices()
|
||||
|
||||
# Containers filled by detect_structure()
|
||||
self.not_continue = set()
|
||||
self.return_end_ifs = set()
|
||||
|
||||
targets = {}
|
||||
for offset in self.op_range(0, codelen):
|
||||
for offset in self.op_range(0, n):
|
||||
op = code[offset]
|
||||
|
||||
# Determine structures and fix jumps in Python versions
|
||||
@@ -656,7 +658,67 @@ class Scanner3(scan.Scanner):
|
||||
end = curent_end
|
||||
parent = struct
|
||||
|
||||
if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
|
||||
if op == SETUP_LOOP:
|
||||
start = offset+3
|
||||
target = self.get_target(offset)
|
||||
end = self.restrict_to_parent(target, parent)
|
||||
|
||||
if target != end:
|
||||
self.fixed_jumps[offset] = end
|
||||
(line_no, next_line_byte) = self.lines[offset]
|
||||
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE,
|
||||
next_line_byte, False)
|
||||
|
||||
if jump_back and jump_back != self.prev_op[end] and code[jump_back+3] in (JUMP_ABSOLUTE, JUMP_FORWARD):
|
||||
if code[self.prev_op[end]] == RETURN_VALUE or \
|
||||
(code[self.prev_op[end]] == POP_BLOCK and code[self.prev_op[self.prev_op[end]]] == RETURN_VALUE):
|
||||
jump_back = None
|
||||
if not jump_back: # loop suite ends in return. wtf right?
|
||||
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
|
||||
if not jump_back:
|
||||
return
|
||||
if code[self.prev_op[next_line_byte]] not in (PJIF, PJIT):
|
||||
loop_type = 'for'
|
||||
else:
|
||||
loop_type = 'while'
|
||||
self.ignore_if.add(self.prev_op[next_line_byte])
|
||||
target = next_line_byte
|
||||
end = jump_back + 3
|
||||
else:
|
||||
if self.get_target(jump_back) >= next_line_byte:
|
||||
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE, start, False)
|
||||
if end > jump_back+4 and code[end] in (JUMP_FORWARD, JUMP_ABSOLUTE):
|
||||
if code[jump_back+4] in (JUMP_ABSOLUTE, JUMP_FORWARD):
|
||||
if self.get_target(jump_back+4) == self.get_target(end):
|
||||
self.fixed_jumps[offset] = jump_back+4
|
||||
end = jump_back+4
|
||||
elif target < offset:
|
||||
self.fixed_jumps[offset] = jump_back+4
|
||||
end = jump_back+4
|
||||
target = self.get_target(jump_back)
|
||||
|
||||
if code[target] in (FOR_ITER, GET_ITER):
|
||||
loop_type = 'for'
|
||||
else:
|
||||
loop_type = 'while'
|
||||
test = self.prev_op[next_line_byte]
|
||||
if test == offset:
|
||||
loop_type = 'while 1'
|
||||
elif self.code[test] in op3.hasjabs+op3.hasjrel:
|
||||
self.ignore_if.add(test)
|
||||
test_target = self.get_target(test)
|
||||
if test_target > (jump_back+3):
|
||||
jump_back = test_target
|
||||
self.not_continue.add(jump_back)
|
||||
self.loops.append(target)
|
||||
self.structs.append({'type': loop_type + '-loop',
|
||||
'start': target,
|
||||
'end': jump_back})
|
||||
if jump_back+3 != end:
|
||||
self.structs.append({'type': loop_type + '-else',
|
||||
'start': jump_back+3,
|
||||
'end': end})
|
||||
elif op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
|
||||
start = offset + self.op_size(op)
|
||||
target = self.get_target(offset)
|
||||
rtarget = self.restrict_to_parent(target, parent)
|
||||
|
Reference in New Issue
Block a user