You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 00:45:53 +08:00
Python 2 loop scanner detection in Python 3
scanner*.py: Make scanner27 and scanner3 more aligned Makefile: we can run py.test on Python 3.5 HISTORY.md: grammar changes
This commit is contained in:
@@ -69,7 +69,7 @@ supported.
|
|||||||
Next we get to ["uncompyle" and
|
Next we get to ["uncompyle" and
|
||||||
PyPI](https://pypi.python.org/pypi/uncompyle/1.1) and the era of
|
PyPI](https://pypi.python.org/pypi/uncompyle/1.1) and the era of
|
||||||
public version control. (Dan's code although not public used
|
public version control. (Dan's code although not public used
|
||||||
[darcs](http://darcs.net/) for version control.
|
[darcs](http://darcs.net/) for version control.)
|
||||||
|
|
||||||
In contrast to _decompyle_, _uncompyle_ at least in its final versions,
|
In contrast to _decompyle_, _uncompyle_ at least in its final versions,
|
||||||
runs only on Python 2.7. However it accepts bytecode back to Python
|
runs only on Python 2.7. However it accepts bytecode back to Python
|
||||||
@@ -98,7 +98,7 @@ actively, if briefly, worked on. Also starting around 2012 is Dark
|
|||||||
Fenx's uncompyle3 which I used for inspiration for Python3.
|
Fenx's uncompyle3 which I used for inspiration for Python3.
|
||||||
|
|
||||||
I started working on this late 2015, mostly to add fragment support.
|
I started working on this late 2015, mostly to add fragment support.
|
||||||
In that decided to make this runnable on Python 3.2+ and Python 2.6+
|
In that, I decided to make this runnable on Python 3.2+ and Python 2.6+
|
||||||
while, handling Python bytecodes from Python versions 2.5+ and
|
while, handling Python bytecodes from Python versions 2.5+ and
|
||||||
3.2+. (I think I could go back further, but I'd consider doing that
|
3.2+. (I think I could go back further, but I'd consider doing that
|
||||||
only after code is better cleaned up and supports Python 3 better.)
|
only after code is better cleaned up and supports Python 3 better.)
|
||||||
@@ -116,7 +116,7 @@ Hartmut a decade an a half ago:
|
|||||||
NB. This is not a masterpiece of software, but became more like a hack.
|
NB. This is not a masterpiece of software, but became more like a hack.
|
||||||
Probably a complete rewrite would be sensefull. hG/2000-12-27
|
Probably a complete rewrite would be sensefull. hG/2000-12-27
|
||||||
|
|
||||||
This project deparses using a Early-algorithm parse with lots of
|
This project deparses using an Early-algorithm parse with lots of
|
||||||
massaging of tokens and the grammar in the scanner
|
massaging of tokens and the grammar in the scanner
|
||||||
phase. Early-algorithm parsers are context free and tend to be linear
|
phase. Early-algorithm parsers are context free and tend to be linear
|
||||||
if the grammar is LR or left recursive.
|
if the grammar is LR or left recursive.
|
||||||
|
8
Makefile
8
Makefile
@@ -23,12 +23,12 @@ check:
|
|||||||
@PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \
|
@PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \
|
||||||
$(MAKE) check-$$PYTHON_VERSION
|
$(MAKE) check-$$PYTHON_VERSION
|
||||||
|
|
||||||
#: Tests for Python 2.7, 3.3 and 3.4
|
#: Tests for Python 2.7, 3.3, 3.4 and 3.5
|
||||||
check-2.7 check-3.3 check-3.4: pytest
|
check-2.7 check-3.3 check-3.4 check-3.5: pytest
|
||||||
$(MAKE) -C test $@
|
$(MAKE) -C test $@
|
||||||
|
|
||||||
#: Tests for Python 3.5 - pytest doesn't work here
|
#: Tests for Python 3.2 - pytest doesn't work here
|
||||||
check-3.2 check-3.5:
|
check-3.2:
|
||||||
$(MAKE) -C test $@
|
$(MAKE) -C test $@
|
||||||
|
|
||||||
#:Tests for Python 2.6 (doesn't have pytest)
|
#:Tests for Python 2.6 (doesn't have pytest)
|
||||||
|
@@ -243,7 +243,7 @@ class Python3Parser(PythonParser):
|
|||||||
testtrue ::= expr jmp_true
|
testtrue ::= expr jmp_true
|
||||||
|
|
||||||
_ifstmts_jump ::= return_if_stmts
|
_ifstmts_jump ::= return_if_stmts
|
||||||
_ifstmts_jump ::= c_stmts_opt
|
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
|
||||||
|
|
||||||
iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE
|
iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE
|
||||||
|
|
||||||
@@ -594,7 +594,6 @@ class Python32Parser(Python3Parser):
|
|||||||
"""
|
"""
|
||||||
# Store locals is only used in Python 3.2
|
# Store locals is only used in Python 3.2
|
||||||
designator ::= STORE_LOCALS
|
designator ::= STORE_LOCALS
|
||||||
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Python34Parser(Python3Parser):
|
class Python34Parser(Python3Parser):
|
||||||
@@ -614,6 +613,11 @@ class Python35onParser(Python3Parser):
|
|||||||
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
|
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
|
||||||
POP_BLOCK LOAD_CONST COME_FROM
|
POP_BLOCK LOAD_CONST COME_FROM
|
||||||
WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
|
WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
|
||||||
|
|
||||||
|
# Python 3.5 has more loop optimization that removes
|
||||||
|
# JUMP_FORWARD in some cases, and hence we also don't
|
||||||
|
# see COME_FROM
|
||||||
|
_ifstmts_jump ::= c_stmts_opt
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Python3ParserSingle(Python3Parser, PythonParserSingle):
|
class Python3ParserSingle(Python3Parser, PythonParserSingle):
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
# Copyright (c) 1999 John Aycock
|
# Copyright (c) 2016 by Rocky Bernstein
|
||||||
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
|
||||||
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
||||||
|
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
||||||
|
# Copyright (c) 1999 John Aycock
|
||||||
#
|
#
|
||||||
# See LICENSE
|
# See LICENSE
|
||||||
#
|
#
|
||||||
|
@@ -44,43 +44,11 @@ class Scanner27(scan.Scanner):
|
|||||||
|
|
||||||
customize = {}
|
customize = {}
|
||||||
Token = self.Token # shortcut
|
Token = self.Token # shortcut
|
||||||
self.code = array('B', co.co_code)
|
|
||||||
|
|
||||||
for i in self.op_range(0, len(self.code)):
|
n = self.setup_code(co)
|
||||||
if self.code[i] in (RETURN_VALUE, END_FINALLY):
|
self.build_lines_data(co, n)
|
||||||
n = i + 1
|
self.build_prev_op(n)
|
||||||
self.code = array('B', co.co_code[:n])
|
|
||||||
|
|
||||||
self.prev = [0]
|
|
||||||
# mapping addresses of instruction & argument
|
|
||||||
for i in self.op_range(0, n):
|
|
||||||
op = self.code[i]
|
|
||||||
self.prev.append(i)
|
|
||||||
if op >= HAVE_ARGUMENT:
|
|
||||||
self.prev.append(i)
|
|
||||||
self.prev.append(i)
|
|
||||||
|
|
||||||
self.lines = []
|
|
||||||
linetuple = namedtuple('linetuple', ['l_no', 'next'])
|
|
||||||
|
|
||||||
j = 0
|
|
||||||
|
|
||||||
# linestarts is a tuple of (offset, line number).
|
|
||||||
# Turn that in a has that we can index
|
|
||||||
linestarts = list(dis.findlinestarts(co))
|
|
||||||
linestartoffsets = {}
|
|
||||||
for offset, lineno in linestarts:
|
|
||||||
linestartoffsets[offset] = lineno
|
|
||||||
|
|
||||||
(prev_start_byte, prev_line_no) = linestarts[0]
|
|
||||||
for (start_byte, line_no) in linestarts[1:]:
|
|
||||||
while j < start_byte:
|
|
||||||
self.lines.append(linetuple(prev_line_no, start_byte))
|
|
||||||
j += 1
|
|
||||||
prev_line_no = start_byte
|
|
||||||
while j < n:
|
|
||||||
self.lines.append(linetuple(prev_line_no, n))
|
|
||||||
j+=1
|
|
||||||
# self.lines contains (block,addrLastInstr)
|
# self.lines contains (block,addrLastInstr)
|
||||||
if classname:
|
if classname:
|
||||||
classname = '_' + classname.lstrip('_') + '__'
|
classname = '_' + classname.lstrip('_') + '__'
|
||||||
@@ -104,7 +72,7 @@ class Scanner27(scan.Scanner):
|
|||||||
if names[self.get_argument(i+3)] == 'AssertionError':
|
if names[self.get_argument(i+3)] == 'AssertionError':
|
||||||
self.load_asserts.add(i+3)
|
self.load_asserts.add(i+3)
|
||||||
|
|
||||||
cf = self.find_jump_targets(self.code)
|
cf = self.find_jump_targets()
|
||||||
# contains (code, [addrRefToCode])
|
# contains (code, [addrRefToCode])
|
||||||
last_stmt = self.next_stmt[0]
|
last_stmt = self.next_stmt[0]
|
||||||
i = self.next_stmt[last_stmt]
|
i = self.next_stmt[last_stmt]
|
||||||
@@ -213,8 +181,8 @@ class Scanner27(scan.Scanner):
|
|||||||
if offset in self.return_end_ifs:
|
if offset in self.return_end_ifs:
|
||||||
op_name = 'RETURN_END_IF'
|
op_name = 'RETURN_END_IF'
|
||||||
|
|
||||||
if offset in linestartoffsets:
|
if offset in self.linestartoffsets:
|
||||||
linestart = linestartoffsets[offset]
|
linestart = self.linestartoffsets[offset]
|
||||||
else:
|
else:
|
||||||
linestart = None
|
linestart = None
|
||||||
|
|
||||||
@@ -224,6 +192,63 @@ class Scanner27(scan.Scanner):
|
|||||||
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||||
return tokens, customize
|
return tokens, customize
|
||||||
|
|
||||||
|
def setup_code(self, co):
|
||||||
|
"""
|
||||||
|
Creates Python-independent bytecode structure (byte array) in
|
||||||
|
self.code and records previous instruction in self.prev
|
||||||
|
The size of self.code is returned
|
||||||
|
"""
|
||||||
|
self.code = array('B', co.co_code)
|
||||||
|
|
||||||
|
n = -1
|
||||||
|
for i in self.op_range(0, len(self.code)):
|
||||||
|
if self.code[i] in (RETURN_VALUE, END_FINALLY):
|
||||||
|
n = i + 1
|
||||||
|
pass
|
||||||
|
pass
|
||||||
|
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY"
|
||||||
|
self.code = array('B', co.co_code[:n])
|
||||||
|
|
||||||
|
return n
|
||||||
|
|
||||||
|
def build_prev_op(self, n):
|
||||||
|
self.prev = [0]
|
||||||
|
# mapping addresses of instruction & argument
|
||||||
|
for i in self.op_range(0, n):
|
||||||
|
op = self.code[i]
|
||||||
|
self.prev.append(i)
|
||||||
|
if op >= HAVE_ARGUMENT:
|
||||||
|
self.prev.append(i)
|
||||||
|
self.prev.append(i)
|
||||||
|
pass
|
||||||
|
pass
|
||||||
|
|
||||||
|
def build_lines_data(self, co, n):
|
||||||
|
"""
|
||||||
|
Initializes self.lines and self.linesstartoffsets
|
||||||
|
"""
|
||||||
|
self.lines = []
|
||||||
|
linetuple = namedtuple('linetuple', ['l_no', 'next'])
|
||||||
|
|
||||||
|
# linestarts is a tuple of (offset, line number).
|
||||||
|
# Turn that in a has that we can index
|
||||||
|
linestarts = list(dis.findlinestarts(co))
|
||||||
|
self.linestartoffsets = {}
|
||||||
|
for offset, lineno in linestarts:
|
||||||
|
self.linestartoffsets[offset] = lineno
|
||||||
|
|
||||||
|
j = 0
|
||||||
|
(prev_start_byte, prev_line_no) = linestarts[0]
|
||||||
|
for (start_byte, line_no) in linestarts[1:]:
|
||||||
|
while j < start_byte:
|
||||||
|
self.lines.append(linetuple(prev_line_no, start_byte))
|
||||||
|
j += 1
|
||||||
|
prev_line_no = start_byte
|
||||||
|
while j < n:
|
||||||
|
self.lines.append(linetuple(prev_line_no, n))
|
||||||
|
j+=1
|
||||||
|
return
|
||||||
|
|
||||||
def build_stmt_indices(self):
|
def build_stmt_indices(self):
|
||||||
code = self.code
|
code = self.code
|
||||||
start = 0
|
start = 0
|
||||||
@@ -585,7 +610,7 @@ class Scanner27(scan.Scanner):
|
|||||||
target = self.get_target(pos, op)
|
target = self.get_target(pos, op)
|
||||||
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
|
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
|
||||||
|
|
||||||
def find_jump_targets(self, code):
|
def find_jump_targets(self):
|
||||||
'''
|
'''
|
||||||
Detect all offsets in a byte code which are jump targets.
|
Detect all offsets in a byte code which are jump targets.
|
||||||
|
|
||||||
@@ -595,7 +620,7 @@ class Scanner27(scan.Scanner):
|
|||||||
for each target the number of jumps are counted.
|
for each target the number of jumps are counted.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
n = len(code)
|
n = len(self.code)
|
||||||
self.structs = [{'type': 'root',
|
self.structs = [{'type': 'root',
|
||||||
'start': 0,
|
'start': 0,
|
||||||
'end': n-1}]
|
'end': n-1}]
|
||||||
@@ -603,12 +628,14 @@ class Scanner27(scan.Scanner):
|
|||||||
self.fixed_jumps = {} # Map fixed jumps to their real destination
|
self.fixed_jumps = {} # Map fixed jumps to their real destination
|
||||||
self.ignore_if = set()
|
self.ignore_if = set()
|
||||||
self.build_stmt_indices()
|
self.build_stmt_indices()
|
||||||
|
|
||||||
|
# Containers filled by detect_structure()
|
||||||
self.not_continue = set()
|
self.not_continue = set()
|
||||||
self.return_end_ifs = set()
|
self.return_end_ifs = set()
|
||||||
|
|
||||||
targets = {}
|
targets = {}
|
||||||
for i in self.op_range(0, n):
|
for i in self.op_range(0, n):
|
||||||
op = code[i]
|
op = self.code[i]
|
||||||
|
|
||||||
# Determine structures and fix jumps in Python versions
|
# Determine structures and fix jumps in Python versions
|
||||||
# since 2.3
|
# since 2.3
|
||||||
@@ -616,7 +643,7 @@ class Scanner27(scan.Scanner):
|
|||||||
|
|
||||||
if op >= HAVE_ARGUMENT:
|
if op >= HAVE_ARGUMENT:
|
||||||
label = self.fixed_jumps.get(i)
|
label = self.fixed_jumps.get(i)
|
||||||
oparg = code[i+1] + code[i+2] * 256
|
oparg = self.code[i+1] + self.code[i+2] * 256
|
||||||
if label is None:
|
if label is None:
|
||||||
if op in hasjrel and op != FOR_ITER:
|
if op in hasjrel and op != FOR_ITER:
|
||||||
label = i + 3 + oparg
|
label = i + 3 + oparg
|
||||||
@@ -634,7 +661,8 @@ class Scanner27(scan.Scanner):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
co = inspect.currentframe().f_code
|
co = inspect.currentframe().f_code
|
||||||
tokens, customize = Scanner27().disassemble(co)
|
from uncompyle6 import PYTHON_VERSION
|
||||||
|
tokens, customize = Scanner27(PYTHON_VERSION).disassemble(co)
|
||||||
for t in tokens:
|
for t in tokens:
|
||||||
print(t)
|
print(t)
|
||||||
pass
|
pass
|
||||||
|
@@ -394,7 +394,7 @@ class Scanner3(scan.Scanner):
|
|||||||
#
|
#
|
||||||
# We may however want to consider whether we do
|
# We may however want to consider whether we do
|
||||||
# this in 3.5 or not.
|
# this in 3.5 or not.
|
||||||
if oparg == 0 and self.version != 3.4:
|
if oparg == 0 and self.version >= 3.5:
|
||||||
tokens.append(Token('NOP', oparg, pattr, offset, linestart))
|
tokens.append(Token('NOP', oparg, pattr, offset, linestart))
|
||||||
continue
|
continue
|
||||||
elif op_name == 'LOAD_GLOBAL':
|
elif op_name == 'LOAD_GLOBAL':
|
||||||
@@ -481,23 +481,25 @@ class Scanner3(scan.Scanner):
|
|||||||
for each target the number of jumps is counted.
|
for each target the number of jumps is counted.
|
||||||
"""
|
"""
|
||||||
code = self.code
|
code = self.code
|
||||||
codelen = len(code)
|
n = len(code)
|
||||||
self.structs = [{'type': 'root',
|
self.structs = [{'type': 'root',
|
||||||
'start': 0,
|
'start': 0,
|
||||||
'end': codelen-1}]
|
'end': n-1}]
|
||||||
|
|
||||||
# All loop entry points
|
# All loop entry points
|
||||||
# self.loops = []
|
self.loops = []
|
||||||
|
|
||||||
# Map fixed jumps to their real destination
|
# Map fixed jumps to their real destination
|
||||||
self.fixed_jumps = {}
|
self.fixed_jumps = {}
|
||||||
self.ignore_if = set()
|
self.ignore_if = set()
|
||||||
self.build_statement_indices()
|
self.build_statement_indices()
|
||||||
|
|
||||||
# Containers filled by detect_structure()
|
# Containers filled by detect_structure()
|
||||||
self.not_continue = set()
|
self.not_continue = set()
|
||||||
self.return_end_ifs = set()
|
self.return_end_ifs = set()
|
||||||
|
|
||||||
targets = {}
|
targets = {}
|
||||||
for offset in self.op_range(0, codelen):
|
for offset in self.op_range(0, n):
|
||||||
op = code[offset]
|
op = code[offset]
|
||||||
|
|
||||||
# Determine structures and fix jumps in Python versions
|
# Determine structures and fix jumps in Python versions
|
||||||
@@ -656,7 +658,67 @@ class Scanner3(scan.Scanner):
|
|||||||
end = curent_end
|
end = curent_end
|
||||||
parent = struct
|
parent = struct
|
||||||
|
|
||||||
if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
|
if op == SETUP_LOOP:
|
||||||
|
start = offset+3
|
||||||
|
target = self.get_target(offset)
|
||||||
|
end = self.restrict_to_parent(target, parent)
|
||||||
|
|
||||||
|
if target != end:
|
||||||
|
self.fixed_jumps[offset] = end
|
||||||
|
(line_no, next_line_byte) = self.lines[offset]
|
||||||
|
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE,
|
||||||
|
next_line_byte, False)
|
||||||
|
|
||||||
|
if jump_back and jump_back != self.prev_op[end] and code[jump_back+3] in (JUMP_ABSOLUTE, JUMP_FORWARD):
|
||||||
|
if code[self.prev_op[end]] == RETURN_VALUE or \
|
||||||
|
(code[self.prev_op[end]] == POP_BLOCK and code[self.prev_op[self.prev_op[end]]] == RETURN_VALUE):
|
||||||
|
jump_back = None
|
||||||
|
if not jump_back: # loop suite ends in return. wtf right?
|
||||||
|
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
|
||||||
|
if not jump_back:
|
||||||
|
return
|
||||||
|
if code[self.prev_op[next_line_byte]] not in (PJIF, PJIT):
|
||||||
|
loop_type = 'for'
|
||||||
|
else:
|
||||||
|
loop_type = 'while'
|
||||||
|
self.ignore_if.add(self.prev_op[next_line_byte])
|
||||||
|
target = next_line_byte
|
||||||
|
end = jump_back + 3
|
||||||
|
else:
|
||||||
|
if self.get_target(jump_back) >= next_line_byte:
|
||||||
|
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE, start, False)
|
||||||
|
if end > jump_back+4 and code[end] in (JUMP_FORWARD, JUMP_ABSOLUTE):
|
||||||
|
if code[jump_back+4] in (JUMP_ABSOLUTE, JUMP_FORWARD):
|
||||||
|
if self.get_target(jump_back+4) == self.get_target(end):
|
||||||
|
self.fixed_jumps[offset] = jump_back+4
|
||||||
|
end = jump_back+4
|
||||||
|
elif target < offset:
|
||||||
|
self.fixed_jumps[offset] = jump_back+4
|
||||||
|
end = jump_back+4
|
||||||
|
target = self.get_target(jump_back)
|
||||||
|
|
||||||
|
if code[target] in (FOR_ITER, GET_ITER):
|
||||||
|
loop_type = 'for'
|
||||||
|
else:
|
||||||
|
loop_type = 'while'
|
||||||
|
test = self.prev_op[next_line_byte]
|
||||||
|
if test == offset:
|
||||||
|
loop_type = 'while 1'
|
||||||
|
elif self.code[test] in op3.hasjabs+op3.hasjrel:
|
||||||
|
self.ignore_if.add(test)
|
||||||
|
test_target = self.get_target(test)
|
||||||
|
if test_target > (jump_back+3):
|
||||||
|
jump_back = test_target
|
||||||
|
self.not_continue.add(jump_back)
|
||||||
|
self.loops.append(target)
|
||||||
|
self.structs.append({'type': loop_type + '-loop',
|
||||||
|
'start': target,
|
||||||
|
'end': jump_back})
|
||||||
|
if jump_back+3 != end:
|
||||||
|
self.structs.append({'type': loop_type + '-else',
|
||||||
|
'start': jump_back+3,
|
||||||
|
'end': end})
|
||||||
|
elif op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
|
||||||
start = offset + self.op_size(op)
|
start = offset + self.op_size(op)
|
||||||
target = self.get_target(offset)
|
target = self.get_target(offset)
|
||||||
rtarget = self.restrict_to_parent(target, parent)
|
rtarget = self.restrict_to_parent(target, parent)
|
||||||
|
Reference in New Issue
Block a user