You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-04 01:09:52 +08:00
DRY Python3 scanner code. Some cross version handling fixed.
Some Python 3.2 and 3.3 deparse fixes.
This commit is contained in:
117
ChangeLog
117
ChangeLog
@@ -1,3 +1,120 @@
|
||||
2015-12-27 rocky <rb@dustyfeet.com>
|
||||
|
||||
* README.rst, test/bytecompile-tests, uncompyle6/opcodes/Makefile,
|
||||
uncompyle6/opcodes/opcode_23.py, uncompyle6/opcodes/opcode_24.py,
|
||||
uncompyle6/opcodes/opcode_25.py, uncompyle6/opcodes/opcode_26.py,
|
||||
uncompyle6/opcodes/opcode_27.py, uncompyle6/opcodes/opcode_32.py,
|
||||
uncompyle6/opcodes/opcode_33.py, uncompyle6/opcodes/opcode_34.py,
|
||||
uncompyle6/parser.py, uncompyle6/parsers/parse3.py,
|
||||
uncompyle6/scanner.py, uncompyle6/scanners/scanner25.py,
|
||||
uncompyle6/scanners/scanner26.py, uncompyle6/scanners/scanner27.py,
|
||||
uncompyle6/scanners/scanner3.py, uncompyle6/scanners/scanner32.py,
|
||||
uncompyle6/scanners/scanner33.py, uncompyle6/scanners/scanner34.py,
|
||||
uncompyle6/semantics/fragments.py, uncompyle6/semantics/pysource.py:
|
||||
DRY Python3 scanner code. Some cross version handling fixed. Some
|
||||
Python 3.2 and 3.3 deparse fixes.
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* .travis.yml, test/Makefile, uncompyle6/verify.py: Running native
|
||||
on Python 3.3 needs more work
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* test/Makefile, test/test_pythonlib.py: Add ok-2.7 tests for 3.4
|
||||
full testing
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* test/Makefile, test/bytecompile-tests, test/test_pythonlib.py: Add
|
||||
verify tests. Add Python 2.6 bytecode and use.
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/semantics/fragments.py,
|
||||
uncompyle6/semantics/pysource.py: Add node and template code to
|
||||
cleanup "for" handling
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* .travis.yml: Try Python 2.6 testing on travis
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* test/Makefile: For testing we can't 3.3 bytecodes on 2.7 yet, so
|
||||
use 3.2
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* .travis.yml, Makefile, requirements-dev.txt, test/Makefile,
|
||||
test/bytecompile-tests, test/test_pythonlib.py,
|
||||
uncompyle6/__init__.py, uncompyle6/opcodes/opcode_32.py,
|
||||
uncompyle6/opcodes/opcode_33.py, uncompyle6/opcodes/opcode_34.py,
|
||||
uncompyle6/scanner.py, uncompyle6/scanners/scanner32.py,
|
||||
uncompyle6/scanners/scanner33.py, uncompyle6/scanners/scanner34.py,
|
||||
uncompyle6/semantics/pysource.py: Fix up Python 3.2, 3.3, and 3.4
|
||||
cross-version scanners Try travis 2.6 and 3.3
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* .travis.yml: Travis: try checking 3.4
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* test/simple_source/exception/05_try_except.py,
|
||||
test/simple_source/looping/10_while.py,
|
||||
test/simple_source/looping/while.py,
|
||||
test/simple_source/simple_stmts/00_assign.py,
|
||||
test/simple_source/simple_stmts/00_import.py,
|
||||
test/simple_source/simple_stmts/00_pass.py,
|
||||
test/simple_source/simple_stmts/15_assert.py,
|
||||
test/simple_source/stmts/00_assign.py,
|
||||
test/simple_source/stmts/00_import.py,
|
||||
test/simple_source/stmts/00_pass.py,
|
||||
test/simple_source/stmts/15_assert.py,
|
||||
test/simple_source/stmts/15_for_if.py,
|
||||
uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py,
|
||||
uncompyle6/scanners/scanner32.py, uncompyle6/scanners/scanner34.py:
|
||||
Fix up looping by reinstating JUMP_ABSOLUTE -> JUMP_BACK or CONTINUE
|
||||
get jump offsets into jump attributes. Fix up 3.2 scanner paritally
|
||||
and use that in 3.4 for in cross version disassembly.
|
||||
|
||||
2015-12-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* test/simple_source/exception/01_try_except.py,
|
||||
test/simple_source/exception/05_try_except.py, uncompyle6/main.py,
|
||||
uncompyle6/opcodes/opcode_34.py, uncompyle6/parsers/parse3.py,
|
||||
uncompyle6/semantics/pysource.py: Python3 try/except handling
|
||||
improvements. Add Walker exception and use that: fixes erroneous
|
||||
uncompyle success message on parse error.
|
||||
|
||||
2015-12-25 rocky <rb@dustyfeet.com>
|
||||
|
||||
* test/simple_source/exception/01_try_except.py,
|
||||
uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py: WIP redo
|
||||
try/except for Python3
|
||||
|
||||
2015-12-25 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/semantics/fragments.py,
|
||||
uncompyle6/semantics/pysource.py: Fix bugs in using pysource from
|
||||
fragments.
|
||||
|
||||
2015-12-25 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/semantics/Makefile, uncompyle6/semantics/fragments.py,
|
||||
uncompyle6/semantics/pysource.py: Two modes of disassembly, one
|
||||
where we show hidden code and one where we don't.
|
||||
|
||||
2015-12-25 rocky <rb@dustyfeet.com>
|
||||
|
||||
* README.rst: README.rst typos
|
||||
|
||||
2015-12-25 rocky <rb@dustyfeet.com>
|
||||
|
||||
* .gitignore, ChangeLog, MANIFEST.in, NEWS, __pkginfo__.py,
|
||||
test/Makefile: Get ready for releaes 2.0.0
|
||||
|
||||
2015-12-25 rocky <rb@dustyfeet.com>
|
||||
|
||||
* pytest/test_deparse.py: Port deparse test from python-deparse to
|
||||
|
9
NEWS
9
NEWS
@@ -1,3 +1,12 @@
|
||||
uncompyle6 1.0.0 2015-12-27
|
||||
|
||||
- Python 3.x deparsing much more solid
|
||||
- Better cross-version deparsing
|
||||
|
||||
Some bugs squashed while other run rampant. Some code cleanup while
|
||||
much more is yet needed. More tests added, but many more are needed.
|
||||
|
||||
|
||||
uncompyle6 1.0.0 2015-12-11
|
||||
|
||||
Changes from uncompyle2
|
||||
|
15
README.rst
15
README.rst
@@ -3,7 +3,8 @@
|
||||
uncompyle6
|
||||
==========
|
||||
|
||||
A native Python Byte-code Disassembler, Decompiler, and byte-code library
|
||||
A native Python Byte-code Disassembler, Decompiler, Fragment Decompiler
|
||||
and byte-code library
|
||||
|
||||
|
||||
Introduction
|
||||
@@ -11,7 +12,8 @@ Introduction
|
||||
|
||||
*uncompyle6* translates Python byte-code back into equivalent Python
|
||||
source code. It accepts byte-codes from Python version 2.5 to 3.4 or
|
||||
so and has been tested on Python 2.6, 2.7 and Python 3.4.
|
||||
so and has been tested on Python running verfsions 2.6, 2.7, 3.3 and
|
||||
3.4.
|
||||
|
||||
Why this?
|
||||
---------
|
||||
@@ -83,9 +85,12 @@ for usage help
|
||||
Known Bugs/Restrictions
|
||||
-----------------------
|
||||
|
||||
Python 3 deparsing is getting there, but not solid. Using Python 2 to
|
||||
deparse Python 3 is problematic, especilly for versions 3.4 and
|
||||
greater.
|
||||
Python 2 deparsing is probably as solid as the various versions of
|
||||
uncompyle2. Python 3 deparsing is not as solid. Using Python 2 to
|
||||
deparse Python 3 has severe limitations, due to byte code format
|
||||
differences and the current inablity to retrieve code object fields across
|
||||
different Python versions. (I envy the pycdc C++ code which doesn't have such
|
||||
problems because they live totally outside of Python.)
|
||||
|
||||
See Also
|
||||
--------
|
||||
|
@@ -40,7 +40,7 @@ def get_srcdir():
|
||||
return os.path.realpath(filename)
|
||||
|
||||
ns = {}
|
||||
version = '2.0.0'
|
||||
version = '2.1.0'
|
||||
web = 'https://github.com/rocky/python-uncompyle6/'
|
||||
|
||||
# tracebacks in zip files are funky and not debuggable
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -70,7 +70,7 @@ for root, dirs, basenames in os.walk('simple_source'):
|
||||
simple_source.append(os.path.join(root, basename)[0:-3])
|
||||
pass
|
||||
|
||||
tests['2.6'] = tests['2.7'] = tests['3.3'] = tests['3.4'] = simple_source
|
||||
tests['2.6'] = tests['2.7'] = tests['3.2'] = tests['3.3'] = tests['3.4'] = simple_source
|
||||
|
||||
total_tests = len(tests['2.7'])
|
||||
#tests['2.2'].sort(); print tests['2.2']
|
||||
|
7
uncompyle6/opcodes/Makefile
Normal file
7
uncompyle6/opcodes/Makefile
Normal file
@@ -0,0 +1,7 @@
|
||||
# Whatever it is you want to do, it should be forwarded to the
|
||||
# to top-level irectories
|
||||
PHONY=check all
|
||||
all: check
|
||||
|
||||
%:
|
||||
$(MAKE) -C ../.. $@
|
@@ -1,6 +1,10 @@
|
||||
"""
|
||||
opcode module - potentially shared between dis and other modules which
|
||||
operate on bytecodes (e.g. peephole optimizers).
|
||||
CPython 2.3 bytecode opcodes
|
||||
|
||||
This is used in scanner (bytecode disassembly) and parser (Python grammar).
|
||||
|
||||
This is a superset of Python 2.3's opcode.py with some opcodes that simplify
|
||||
parsing and semantic interpretation.
|
||||
"""
|
||||
|
||||
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
|
||||
|
@@ -1,6 +1,10 @@
|
||||
"""
|
||||
opcode module - potentially shared between dis and other modules which
|
||||
operate on bytecodes (e.g. peephole optimizers).
|
||||
CPython 2.4 bytecode opcodes
|
||||
|
||||
This is used in scanner (bytecode disassembly) and parser (Python grammar).
|
||||
|
||||
This is a superset of Python 2.4's opcode.py with some opcodes that simplify
|
||||
parsing and semantic interpretation.
|
||||
"""
|
||||
|
||||
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
|
||||
|
@@ -1,6 +1,10 @@
|
||||
"""
|
||||
opcode module - potentially shared between dis and other modules which
|
||||
operate on bytecodes (e.g. peephole optimizers).
|
||||
CPython 2.5 bytecode opcodes
|
||||
|
||||
This is used in scanner (bytecode disassembly) and parser (Python grammar).
|
||||
|
||||
This is a superset of Python 2.5's opcode.py with some opcodes that simplify
|
||||
parsing and semantic interpretation.
|
||||
"""
|
||||
|
||||
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
|
||||
|
@@ -1,6 +1,10 @@
|
||||
"""
|
||||
opcode module - potentially shared between dis and other modules which
|
||||
operate on bytecodes (e.g. peephole optimizers).
|
||||
CPython 2.6 bytecode opcodes
|
||||
|
||||
This is used in scanner (bytecode disassembly) and parser (Python grammar).
|
||||
|
||||
This is a superset of Python 3.4's opcode.py with some opcodes that simplify
|
||||
parsing and semantic interpretation.
|
||||
"""
|
||||
|
||||
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
|
||||
|
@@ -1,6 +1,10 @@
|
||||
"""
|
||||
opcode module - potentially shared between dis and other modules which
|
||||
operate on bytecodes (e.g. peephole optimizers).
|
||||
CPython 2.7 bytecode opcodes
|
||||
|
||||
This is used in scanner (bytecode disassembly) and parser (Python grammar).
|
||||
|
||||
This is a superset of Python 3.4's opcode.py with some opcodes that simplify
|
||||
parsing and semantic interpretation.
|
||||
"""
|
||||
|
||||
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
|
||||
@@ -198,3 +202,8 @@ def_op('MAP_ADD', 147)
|
||||
|
||||
updateGlobal()
|
||||
del def_op, name_op, jrel_op, jabs_op
|
||||
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION == 2.7:
|
||||
import dis
|
||||
assert all(item in opmap.items() for item in dis.opmap.items())
|
||||
|
@@ -1,7 +1,10 @@
|
||||
|
||||
"""
|
||||
opcode module - potentially shared between dis and other modules which
|
||||
operate on bytecodes (e.g. peephole optimizers).
|
||||
CPython 3.2 bytecode opcodes
|
||||
|
||||
This is used in scanner (bytecode disassembly) and parser (Python grammar).
|
||||
|
||||
This is a superset of Python 3.4's opcode.py with some opcodes that simplify
|
||||
parsing and semantic interpretation.
|
||||
"""
|
||||
|
||||
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
|
||||
@@ -40,6 +43,16 @@ def jabs_op(name, op):
|
||||
def_op(name, op)
|
||||
hasjabs.append(op)
|
||||
|
||||
def updateGlobal():
|
||||
# JUMP_OPs are used in verification are set in the scanner
|
||||
# and used in the parser grammar
|
||||
globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']})
|
||||
globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']})
|
||||
globals().update({'JA': opmap['JUMP_ABSOLUTE']})
|
||||
globals().update({'JF': opmap['JUMP_FORWARD']})
|
||||
globals().update(dict([(k.replace('+', '_'), v) for (k, v) in opmap.items()]))
|
||||
globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)})
|
||||
|
||||
# Instruction opcodes for compiled code
|
||||
# Blank lines correspond to available opcodes
|
||||
|
||||
@@ -191,4 +204,10 @@ def_op('MAP_ADD', 147)
|
||||
def_op('EXTENDED_ARG', 144)
|
||||
EXTENDED_ARG = 144
|
||||
|
||||
updateGlobal()
|
||||
del def_op, name_op, jrel_op, jabs_op
|
||||
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION == 3.2:
|
||||
import dis
|
||||
assert all(item in opmap.items() for item in dis.opmap.items())
|
||||
|
@@ -1,8 +1,13 @@
|
||||
"""
|
||||
CPython 3.3 bytecode opcodes
|
||||
|
||||
This is used in scanner (bytecode disassembly) and parser (Python grammar).
|
||||
|
||||
This is a superset of Python 3.3's opcode.py with some opcodes that simplify
|
||||
parsing and semantic interpretation.
|
||||
"""
|
||||
opcode module - potentially shared between dis and other modules which
|
||||
operate on bytecodes (e.g. peephole optimizers).
|
||||
"""
|
||||
|
||||
# Note: this should look exactly like Python 3.4's opcode.py
|
||||
|
||||
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
|
||||
"haslocal", "hascompare", "hasfree", "opname", "opmap",
|
||||
@@ -40,6 +45,16 @@ def jabs_op(name, op):
|
||||
def_op(name, op)
|
||||
hasjabs.append(op)
|
||||
|
||||
def updateGlobal():
|
||||
# JUMP_OPs are used in verification are set in the scanner
|
||||
# and used in the parser grammar
|
||||
globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']})
|
||||
globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']})
|
||||
globals().update({'JA': opmap['JUMP_ABSOLUTE']})
|
||||
globals().update({'JF': opmap['JUMP_FORWARD']})
|
||||
globals().update(dict([(k.replace('+', '_'), v) for (k, v) in opmap.items()]))
|
||||
globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)})
|
||||
|
||||
# Instruction opcodes for compiled code
|
||||
# Blank lines correspond to available opcodes
|
||||
|
||||
@@ -95,6 +110,7 @@ def_op('LOAD_BUILD_CLASS', 71)
|
||||
# Python3 drops/changes:
|
||||
# def_op('PRINT_ITEM', 71)
|
||||
# def_op('PRINT_NEWLINE', 72)
|
||||
def_op('YIELD_FROM', 72)
|
||||
# def_op('PRINT_ITEM_TO', 73)
|
||||
# def_op('PRINT_NEWLINE_TO', 74)
|
||||
|
||||
@@ -186,4 +202,13 @@ def_op('MAP_ADD', 147)
|
||||
def_op('EXTENDED_ARG', 144)
|
||||
EXTENDED_ARG = 144
|
||||
|
||||
updateGlobal()
|
||||
del def_op, name_op, jrel_op, jabs_op
|
||||
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION == 3.3:
|
||||
import dis
|
||||
# for item in dis.opmap.items():
|
||||
# if item not in opmap.items():
|
||||
# print(item)
|
||||
assert all(item in opmap.items() for item in dis.opmap.items())
|
||||
|
@@ -1,9 +1,11 @@
|
||||
"""
|
||||
opcode module - potentially shared between dis and other modules which
|
||||
operate on bytecodes (e.g. peephole optimizers).
|
||||
"""
|
||||
CPython 3.4 bytecode opcodes
|
||||
|
||||
# Note: this should look exactly like Python 3.4's opcode.py
|
||||
This is used in scanner (bytecode disassembly) and parser (Python grammar).
|
||||
|
||||
This is a superset of Python 3.4's opcode.py with some opcodes that simplify
|
||||
parsing and semantic interpretation.
|
||||
"""
|
||||
|
||||
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
|
||||
"haslocal", "hascompare", "hasfree", "opname", "opmap",
|
||||
@@ -43,8 +45,8 @@ def jabs_op(name, op):
|
||||
hasjabs.append(op)
|
||||
|
||||
def updateGlobal():
|
||||
# JUMP_OPs are used in verification and in the scanner in resolving forward/backward
|
||||
# jumps
|
||||
# JUMP_OPs are used in verification are set in the scanner
|
||||
# and used in the parser grammar
|
||||
globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']})
|
||||
globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']})
|
||||
globals().update({'JA': opmap['JUMP_ABSOLUTE']})
|
||||
@@ -215,3 +217,8 @@ EXTENDED_ARG = 144
|
||||
|
||||
updateGlobal()
|
||||
del def_op, name_op, jrel_op, jabs_op
|
||||
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION == 3.4:
|
||||
import dis
|
||||
assert all(item in opmap.items() for item in dis.opmap.items())
|
||||
|
@@ -82,10 +82,12 @@ def get_python_parser(version, debug_parser):
|
||||
"""
|
||||
if version < 3.0:
|
||||
import uncompyle6.parsers.parse2 as parse2
|
||||
return parse2.Python2Parser(debug_parser)
|
||||
p = parse2.Python2Parser(debug_parser)
|
||||
else:
|
||||
import uncompyle6.parsers.parse3 as parse3
|
||||
return parse3.Python3Parser(debug_parser)
|
||||
p = parse3.Python3Parser(debug_parser)
|
||||
p.version = version
|
||||
return p
|
||||
|
||||
def python_parser(version, co, out=sys.stdout, showasm=False,
|
||||
parser_debug=PARSER_DEFAULT_DEBUG):
|
||||
@@ -94,9 +96,9 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
|
||||
from uncompyle6.scanner import get_scanner
|
||||
scanner = get_scanner(version)
|
||||
tokens, customize = scanner.disassemble(co)
|
||||
# if showasm:
|
||||
# for t in tokens:
|
||||
# print(t)
|
||||
if showasm:
|
||||
for t in tokens:
|
||||
print(t)
|
||||
|
||||
p = get_python_parser(version, parser_debug)
|
||||
return parse(p, tokens, customize)
|
||||
|
@@ -53,12 +53,11 @@ class Python3Parser(PythonParser):
|
||||
|
||||
def p_list_comprehension(self, args):
|
||||
'''
|
||||
# Python3 adds LOAD_LISTCOMP and does list comprehension like
|
||||
# Python3 scanner adds LOAD_LISTCOMP. Python3 does list comprehension like
|
||||
# other comprehensions (set, dictionary).
|
||||
|
||||
# listcomp is a custom rule
|
||||
expr ::= listcomp
|
||||
listcomp ::= LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
|
||||
|
||||
|
||||
expr ::= list_compr
|
||||
list_compr ::= BUILD_LIST_0 list_iter
|
||||
@@ -673,6 +672,14 @@ class Python3Parser(PythonParser):
|
||||
'''
|
||||
|
||||
def custom_buildclass_rule(self, opname, i, token, tokens, customize):
|
||||
"""
|
||||
Python >= 3.3:
|
||||
buildclass ::= LOAD_BUILD_CLASS mkfunc LOAD_CONST LOAD_CLASSNAME CALL_FUNCTION_3
|
||||
Python < 3.3
|
||||
buildclass ::= LOAD_BUILD_CLASS LOAD_CONST MAKE_FUNCTION_0 LOAD_CONST
|
||||
CALL_FUNCTION_n
|
||||
|
||||
"""
|
||||
|
||||
# look for next MAKE_FUNCTION
|
||||
for i in range(i+1, len(tokens)):
|
||||
@@ -680,11 +687,15 @@ class Python3Parser(PythonParser):
|
||||
break
|
||||
pass
|
||||
assert i < len(tokens)
|
||||
assert tokens[i+1].type == 'LOAD_CONST'
|
||||
if self.version >= 3.3:
|
||||
assert tokens[i+1].type == 'LOAD_CONST'
|
||||
load_check = 'LOAD_NAME'
|
||||
else:
|
||||
load_check = 'LOAD_CONST'
|
||||
# find load names
|
||||
have_loadname = False
|
||||
for i in range(i+1, len(tokens)):
|
||||
if tokens[i].type == 'LOAD_NAME':
|
||||
if tokens[i].type == load_check:
|
||||
tokens[i].type = 'LOAD_CLASSNAME'
|
||||
have_loadname = True
|
||||
break
|
||||
@@ -706,9 +717,14 @@ class Python3Parser(PythonParser):
|
||||
j = 0
|
||||
load_names = ''
|
||||
# customize CALL_FUNCTION
|
||||
call_function = 'CALL_FUNCTION_%d' % (j + 2)
|
||||
rule = ("buildclass ::= LOAD_BUILD_CLASS mkfunc LOAD_CONST %s%s" %
|
||||
(load_names, call_function))
|
||||
if self.version >= 3.3:
|
||||
call_function = 'CALL_FUNCTION_%d' % (j + 2)
|
||||
rule = ("buildclass ::= LOAD_BUILD_CLASS mkfunc LOAD_CONST %s%s" %
|
||||
(load_names, call_function))
|
||||
else:
|
||||
call_function = 'CALL_FUNCTION_%d' % (j + 1)
|
||||
rule = ("buildclass ::= LOAD_BUILD_CLASS mkfunc %s%s" %
|
||||
(load_names, call_function))
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
return
|
||||
|
||||
@@ -717,6 +733,15 @@ class Python3Parser(PythonParser):
|
||||
Special handling for opcodes that take a variable number
|
||||
of arguments -- we add a new rule for each:
|
||||
|
||||
Python 3.4:
|
||||
listcomp ::= LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION_0 expr
|
||||
GET_ITER CALL_FUNCTION_1
|
||||
Python < 3.4
|
||||
listcomp ::= LOAD_LISTCOMP MAKE_FUNCTION_0 expr
|
||||
GET_ITER CALL_FUNCTION_1
|
||||
|
||||
buildclass (see load_build_class)
|
||||
|
||||
build_list ::= {expr}^n BUILD_LIST_n
|
||||
build_list ::= {expr}^n BUILD_TUPLE_n
|
||||
unpack_list ::= UNPACK_LIST {expr}^n
|
||||
@@ -750,6 +775,14 @@ class Python3Parser(PythonParser):
|
||||
+ ('kwarg ' * args_kw)
|
||||
+ 'expr ' * nak + token.type)
|
||||
self.add_unique_rule(rule, token.type, args_pos, customize)
|
||||
elif opname == 'LOAD_LISTCOMP':
|
||||
if self.version >= 3.4:
|
||||
rule = ("listcomp ::= LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION_0 expr "
|
||||
"GET_ITER CALL_FUNCTION_1")
|
||||
else:
|
||||
rule = ("listcomp ::= LOAD_LISTCOMP MAKE_FUNCTION_0 expr "
|
||||
"GET_ITER CALL_FUNCTION_1")
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
elif opname == 'LOAD_BUILD_CLASS':
|
||||
self.custom_buildclass_rule(opname, i, token, tokens, customize)
|
||||
elif opname_base in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'):
|
||||
@@ -763,7 +796,10 @@ class Python3Parser(PythonParser):
|
||||
elif opname_base == ('MAKE_FUNCTION'):
|
||||
self.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
|
||||
('expr ' * token.attr, opname), nop_func)
|
||||
rule = 'mkfunc ::= %s LOAD_CONST LOAD_CONST %s' % ('expr ' * token.attr, opname)
|
||||
if self.version >= 3.3:
|
||||
rule = 'mkfunc ::= %s LOAD_CONST LOAD_CONST %s' % ('expr ' * token.attr, opname)
|
||||
else:
|
||||
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr ' * token.attr, opname)
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
pass
|
||||
return
|
||||
|
@@ -32,6 +32,14 @@ else:
|
||||
from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_33, opcode_34
|
||||
|
||||
|
||||
class GenericPythonCode:
|
||||
'''
|
||||
Class for representing code-like objects across different versions of
|
||||
Python.
|
||||
'''
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
class Code:
|
||||
'''
|
||||
Class for representing code-objects.
|
||||
@@ -46,9 +54,9 @@ class Code:
|
||||
self._tokens, self._customize = scanner.disassemble(co, classname)
|
||||
|
||||
class Scanner(object):
|
||||
opc = None # opcode module
|
||||
|
||||
def __init__(self, version):
|
||||
# FIXME: DRY
|
||||
if version == 2.7:
|
||||
self.opc = opcode_27
|
||||
elif version == 2.6:
|
||||
@@ -61,14 +69,12 @@ class Scanner(object):
|
||||
self.opc = opcode_33
|
||||
elif version == 3.4:
|
||||
self.opc = opcode_34
|
||||
else:
|
||||
raise TypeError("%i is not a Python version I know about")
|
||||
|
||||
# FIXME: This weird Python2 behavior is not Python3
|
||||
self.resetTokenClass()
|
||||
|
||||
def setShowAsm(self, showasm, out=None):
|
||||
self.showasm = showasm
|
||||
self.out = out
|
||||
|
||||
def setTokenClass(self, tokenClass):
|
||||
# assert isinstance(tokenClass, types.ClassType)
|
||||
self.Token = tokenClass
|
||||
|
@@ -12,7 +12,6 @@ Python 3 and other versions of Python. Also, we save token
|
||||
information for later use in deparsing.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
from collections import namedtuple
|
||||
from array import array
|
||||
|
||||
@@ -151,7 +150,11 @@ class Scanner25(scan.Scanner):
|
||||
continue
|
||||
if op in hasconst:
|
||||
const = co.co_consts[oparg]
|
||||
if inspect.iscode(const):
|
||||
# We can't use inspect.iscode() because we may be
|
||||
# using a different version of Python than the
|
||||
# one that this was byte-compiled on. So the code
|
||||
# types may mismatch.
|
||||
if hasattr(const, 'co_name'):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
assert op_name == 'LOAD_CONST'
|
||||
@@ -912,6 +915,7 @@ class Scanner25(scan.Scanner):
|
||||
return targets
|
||||
|
||||
if __name__ == "__main__":
|
||||
import inspect
|
||||
co = inspect.currentframe().f_code
|
||||
tokens, customize = Scanner25().disassemble(co)
|
||||
for t in tokens:
|
||||
|
@@ -11,7 +11,6 @@ other versions of Python. Also, we save token information for later
|
||||
use in deparsing.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
from collections import namedtuple
|
||||
from array import array
|
||||
|
||||
@@ -145,7 +144,11 @@ class Scanner26(scan.Scanner):
|
||||
continue
|
||||
if op in hasconst:
|
||||
const = co.co_consts[oparg]
|
||||
if inspect.iscode(const):
|
||||
# We can't use inspect.iscode() because we may be
|
||||
# using a different version of Python than the
|
||||
# one that this was byte-compiled on. So the code
|
||||
# types may mismatch.
|
||||
if hasattr(const, 'co_name'):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
assert op_name == 'LOAD_CONST'
|
||||
@@ -901,6 +904,7 @@ class Scanner26(scan.Scanner):
|
||||
return targets
|
||||
|
||||
if __name__ == "__main__":
|
||||
import inspect
|
||||
co = inspect.currentframe().f_code
|
||||
tokens, customize = Scanner26().disassemble(co)
|
||||
for t in tokens:
|
||||
|
@@ -138,7 +138,11 @@ class Scanner27(scan.Scanner):
|
||||
continue
|
||||
if op in hasconst:
|
||||
const = co.co_consts[oparg]
|
||||
if inspect.iscode(const):
|
||||
# We can't use inspect.iscode() because we may be
|
||||
# using a different version of Python than the
|
||||
# one that this was byte-compiled on. So the code
|
||||
# types may mismatch.
|
||||
if hasattr(const, 'co_name'):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
assert op_name == 'LOAD_CONST'
|
||||
|
607
uncompyle6/scanners/scanner3.py
Normal file
607
uncompyle6/scanners/scanner3.py
Normal file
@@ -0,0 +1,607 @@
|
||||
# Copyright (c) 2015 by Rocky Bernstein
|
||||
"""
|
||||
Python 3 Generic ytecode scanner/deparser
|
||||
|
||||
This overlaps various Python3's dis module, but it can be run from
|
||||
Python 2 and other versions of Python. Also, we save token information
|
||||
for later use in deparsing.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import dis, re
|
||||
from collections import namedtuple
|
||||
from array import array
|
||||
|
||||
from uncompyle6.scanner import Token
|
||||
from uncompyle6 import PYTHON_VERSION, PYTHON3
|
||||
|
||||
|
||||
# Get all the opcodes into globals
|
||||
globals().update(dis.opmap)
|
||||
from uncompyle6.opcodes.opcode_33 import *
|
||||
import uncompyle6.scanner as scan
|
||||
|
||||
|
||||
class Scanner3(scan.Scanner):
|
||||
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, PYTHON_VERSION)
|
||||
|
||||
def disassemble_generic(self, co, classname=None):
|
||||
"""
|
||||
Convert code object <co> into a sequence of tokens.
|
||||
|
||||
The below is based on (an older version?) of Python dis.disassemble_bytes().
|
||||
"""
|
||||
# Container for tokens
|
||||
tokens = []
|
||||
customize = {}
|
||||
self.code = code = array('B', co.co_code)
|
||||
codelen = len(code)
|
||||
self.build_lines_data(co)
|
||||
self.build_prev_op()
|
||||
|
||||
# self.lines contains (block,addrLastInstr)
|
||||
if classname:
|
||||
classname = '_' + classname.lstrip('_') + '__'
|
||||
|
||||
def unmangle(name):
|
||||
if name.startswith(classname) and name[-2:] != '__':
|
||||
return name[len(classname) - 2:]
|
||||
return name
|
||||
|
||||
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
|
||||
names = [ unmangle(name) for name in co.co_names ]
|
||||
varnames = [ unmangle(name) for name in co.co_varnames ]
|
||||
else:
|
||||
free = co.co_cellvars + co.co_freevars
|
||||
names = co.co_names
|
||||
varnames = co.co_varnames
|
||||
pass
|
||||
|
||||
# Scan for assertions. Later we will
|
||||
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
|
||||
# assertions
|
||||
|
||||
self.load_asserts = set()
|
||||
for i in self.op_range(0, codelen):
|
||||
if self.code[i] == POP_JUMP_IF_TRUE and self.code[i+3] == LOAD_GLOBAL:
|
||||
if names[self.get_argument(i+3)] == 'AssertionError':
|
||||
self.load_asserts.add(i+3)
|
||||
|
||||
# Get jump targets
|
||||
# Format: {target offset: [jump offsets]}
|
||||
jump_targets = self.find_jump_targets()
|
||||
|
||||
# contains (code, [addrRefToCode])
|
||||
last_stmt = self.next_stmt[0]
|
||||
i = self.next_stmt[last_stmt]
|
||||
replace = {}
|
||||
|
||||
imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
|
||||
if len(imports) > 1:
|
||||
last_import = imports[0]
|
||||
for i in imports[1:]:
|
||||
if self.lines[last_import].next > i:
|
||||
if self.code[last_import] == IMPORT_NAME == self.code[i]:
|
||||
replace[i] = 'IMPORT_NAME_CONT'
|
||||
last_import = i
|
||||
|
||||
# Initialize extended arg at 0. When extended arg op is encountered,
|
||||
# variable preserved for next cycle and added as arg for next op
|
||||
extended_arg = 0
|
||||
|
||||
for offset in self.op_range(0, codelen):
|
||||
# Add jump target tokens
|
||||
if offset in jump_targets:
|
||||
jump_idx = 0
|
||||
for jump_offset in jump_targets[offset]:
|
||||
tokens.append(Token('COME_FROM', None, repr(jump_offset),
|
||||
offset='{}_{}'.format(offset, jump_idx)))
|
||||
jump_idx += 1
|
||||
pass
|
||||
pass
|
||||
|
||||
op = code[offset]
|
||||
op_name = opname[op]
|
||||
|
||||
oparg = None; pattr = None
|
||||
|
||||
if op >= HAVE_ARGUMENT:
|
||||
oparg = self.get_argument(offset) + extended_arg
|
||||
extended_arg = 0
|
||||
if op == EXTENDED_ARG:
|
||||
extended_arg = oparg * scan.L65536
|
||||
continue
|
||||
if op in hasconst:
|
||||
const = co.co_consts[oparg]
|
||||
if not PYTHON3 and isinstance(const, str):
|
||||
m = re.search('^<code object (.*) '
|
||||
'at 0x(.*), file "(.*)", line (.*)>', const)
|
||||
if m:
|
||||
const = scan.GenericPythonCode()
|
||||
const.co_name = m.group(1)
|
||||
const.co_filenaame = m.group(3)
|
||||
const.co_firstlineno = m.group(4)
|
||||
pass
|
||||
# We can't use inspect.iscode() because we may be
|
||||
# using a different version of Python than the
|
||||
# one that this was byte-compiled on. So the code
|
||||
# types may mismatch.
|
||||
if hasattr(const, 'co_name'):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
assert op_name == 'LOAD_CONST'
|
||||
op_name = 'LOAD_LAMBDA'
|
||||
elif const.co_name == '<genexpr>':
|
||||
op_name = 'LOAD_GENEXPR'
|
||||
elif const.co_name == '<dictcomp>':
|
||||
op_name = 'LOAD_DICTCOMP'
|
||||
elif const.co_name == '<setcomp>':
|
||||
op_name = 'LOAD_SETCOMP'
|
||||
elif const.co_name == '<listcomp>':
|
||||
op_name = 'LOAD_LISTCOMP'
|
||||
# verify() uses 'pattr' for comparison, since 'attr'
|
||||
# now holds Code(const) and thus can not be used
|
||||
# for comparison (todo: think about changing this)
|
||||
# pattr = 'code_object @ 0x%x %s->%s' %\
|
||||
# (id(const), const.co_filename, const.co_name)
|
||||
pattr = '<code_object ' + const.co_name + '>'
|
||||
else:
|
||||
pattr = const
|
||||
elif op in hasname:
|
||||
pattr = names[oparg]
|
||||
elif op in hasjrel:
|
||||
pattr = repr(offset + 3 + oparg)
|
||||
elif op in hasjabs:
|
||||
pattr = repr(oparg)
|
||||
elif op in haslocal:
|
||||
pattr = varnames[oparg]
|
||||
elif op in hascompare:
|
||||
pattr = cmp_op[oparg]
|
||||
elif op in hasfree:
|
||||
pattr = free[oparg]
|
||||
|
||||
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
|
||||
UNPACK_SEQUENCE,
|
||||
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
|
||||
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
|
||||
CALL_FUNCTION_VAR_KW, RAISE_VARARGS
|
||||
):
|
||||
# As of Python 2.5, values loaded via LOAD_CLOSURE are packed into
|
||||
# a tuple before calling MAKE_CLOSURE.
|
||||
if (op == BUILD_TUPLE and
|
||||
self.code[self.prev_op[offset]] == LOAD_CLOSURE):
|
||||
continue
|
||||
else:
|
||||
# CALL_FUNCTION OP renaming is done as a custom rule in parse3
|
||||
if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
|
||||
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
|
||||
op_name = '%s_%d' % (op_name, oparg)
|
||||
if op != BUILD_SLICE:
|
||||
customize[op_name] = oparg
|
||||
elif op == JUMP_ABSOLUTE:
|
||||
target = self.get_target(offset)
|
||||
if target < offset:
|
||||
if (offset in self.stmts
|
||||
and self.code[offset+3] not in (END_FINALLY, POP_BLOCK)
|
||||
and offset not in self.not_continue):
|
||||
op_name = 'CONTINUE'
|
||||
else:
|
||||
op_name = 'JUMP_BACK'
|
||||
|
||||
elif op == LOAD_GLOBAL:
|
||||
if offset in self.load_asserts:
|
||||
op_name = 'LOAD_ASSERT'
|
||||
elif op == RETURN_VALUE:
|
||||
if offset in self.return_end_ifs:
|
||||
op_name = 'RETURN_END_IF'
|
||||
|
||||
if offset in self.linestarts:
|
||||
linestart = self.linestarts[offset]
|
||||
else:
|
||||
linestart = None
|
||||
|
||||
if offset not in replace:
|
||||
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
else:
|
||||
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||
pass
|
||||
return tokens, customize
|
||||
|
||||
def build_lines_data(self, code_obj):
|
||||
"""
|
||||
Generate various line-related helper data.
|
||||
"""
|
||||
# Offset: lineno pairs, only for offsets which start line.
|
||||
# Locally we use list for more convenient iteration using indices
|
||||
linestarts = list(dis.findlinestarts(code_obj))
|
||||
self.linestarts = dict(linestarts)
|
||||
# Plain set with offsets of first ops on line
|
||||
self.linestart_offsets = {a for (a, _) in linestarts}
|
||||
# 'List-map' which shows line number of current op and offset of
|
||||
# first op on following line, given offset of op as index
|
||||
self.lines = lines = []
|
||||
LineTuple = namedtuple('LineTuple', ['l_no', 'next'])
|
||||
# Iterate through available linestarts, and fill
|
||||
# the data for all code offsets encountered until
|
||||
# last linestart offset
|
||||
_, prev_line_no = linestarts[0]
|
||||
offset = 0
|
||||
for start_offset, line_no in linestarts[1:]:
|
||||
while offset < start_offset:
|
||||
lines.append(LineTuple(prev_line_no, start_offset))
|
||||
offset += 1
|
||||
prev_line_no = line_no
|
||||
# Fill remaining offsets with reference to last line number
|
||||
# and code length as start offset of following non-existing line
|
||||
codelen = len(self.code)
|
||||
while offset < codelen:
|
||||
lines.append(LineTuple(prev_line_no, codelen))
|
||||
offset += 1
|
||||
|
||||
def build_prev_op(self):
|
||||
"""
|
||||
Compose 'list-map' which allows to jump to previous
|
||||
op, given offset of current op as index.
|
||||
"""
|
||||
code = self.code
|
||||
codelen = len(code)
|
||||
self.prev_op = [0]
|
||||
for offset in self.op_range(0, codelen):
|
||||
op = code[offset]
|
||||
for _ in range(self.op_size(op)):
|
||||
self.prev_op.append(offset)
|
||||
|
||||
def op_size(self, op):
|
||||
"""
|
||||
Return size of operator with its arguments
|
||||
for given opcode <op>.
|
||||
"""
|
||||
if op < dis.HAVE_ARGUMENT:
|
||||
return 1
|
||||
else:
|
||||
return 3
|
||||
|
||||
def find_jump_targets(self):
|
||||
"""
|
||||
Detect all offsets in a byte code which are jump targets.
|
||||
|
||||
Return the list of offsets.
|
||||
|
||||
This procedure is modelled after dis.findlables(), but here
|
||||
for each target the number of jumps is counted.
|
||||
"""
|
||||
code = self.code
|
||||
codelen = len(code)
|
||||
self.structs = [{'type': 'root',
|
||||
'start': 0,
|
||||
'end': codelen-1}]
|
||||
|
||||
# All loop entry points
|
||||
# self.loops = []
|
||||
# Map fixed jumps to their real destination
|
||||
self.fixed_jumps = {}
|
||||
self.ignore_if = set()
|
||||
self.build_statement_indices()
|
||||
# Containers filled by detect_structure()
|
||||
self.not_continue = set()
|
||||
self.return_end_ifs = set()
|
||||
|
||||
targets = {}
|
||||
for offset in self.op_range(0, codelen):
|
||||
op = code[offset]
|
||||
|
||||
# Determine structures and fix jumps for 2.3+
|
||||
self.detect_structure(offset)
|
||||
|
||||
if op >= dis.HAVE_ARGUMENT:
|
||||
label = self.fixed_jumps.get(offset)
|
||||
oparg = code[offset+1] + code[offset+2] * 256
|
||||
|
||||
if label is None:
|
||||
if op in dis.hasjrel and op != FOR_ITER:
|
||||
label = offset + 3 + oparg
|
||||
elif op in dis.hasjabs:
|
||||
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
|
||||
if oparg > offset:
|
||||
label = oparg
|
||||
|
||||
if label is not None and label != -1:
|
||||
targets[label] = targets.get(label, []) + [offset]
|
||||
elif op == END_FINALLY and offset in self.fixed_jumps:
|
||||
label = self.fixed_jumps[offset]
|
||||
targets[label] = targets.get(label, []) + [offset]
|
||||
return targets
|
||||
|
||||
def build_statement_indices(self):
|
||||
code = self.code
|
||||
start = 0
|
||||
end = codelen = len(code)
|
||||
|
||||
statement_opcodes = {
|
||||
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
|
||||
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
|
||||
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
|
||||
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
|
||||
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
|
||||
RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR,
|
||||
JUMP_ABSOLUTE
|
||||
}
|
||||
|
||||
statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE),
|
||||
(POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)]
|
||||
|
||||
designator_ops = {
|
||||
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
|
||||
STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE
|
||||
}
|
||||
|
||||
# Compose preliminary list of indices with statements,
|
||||
# using plain statement opcodes
|
||||
prelim = self.all_instr(start, end, statement_opcodes)
|
||||
|
||||
# Initialize final container with statements with
|
||||
# preliminnary data
|
||||
stmts = self.stmts = set(prelim)
|
||||
|
||||
# Same for opcode sequences
|
||||
pass_stmts = set()
|
||||
for sequence in statement_opcode_sequences:
|
||||
for i in self.op_range(start, end-(len(sequence)+1)):
|
||||
match = True
|
||||
for elem in sequence:
|
||||
if elem != code[i]:
|
||||
match = False
|
||||
break
|
||||
i += self.op_size(code[i])
|
||||
|
||||
if match is True:
|
||||
i = self.prev_op[i]
|
||||
stmts.add(i)
|
||||
pass_stmts.add(i)
|
||||
|
||||
# Initialize statement list with the full data we've gathered so far
|
||||
if pass_stmts:
|
||||
stmt_offset_list = list(stmts)
|
||||
stmt_offset_list.sort()
|
||||
else:
|
||||
stmt_offset_list = prelim
|
||||
# 'List-map' which contains offset of start of
|
||||
# next statement, when op offset is passed as index
|
||||
self.next_stmt = slist = []
|
||||
last_stmt_offset = -1
|
||||
i = 0
|
||||
# Go through all statement offsets
|
||||
for stmt_offset in stmt_offset_list:
|
||||
# Process absolute jumps, but do not remove 'pass' statements
|
||||
# from the set
|
||||
if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts:
|
||||
# If absolute jump occurs in forward direction or it takes off from the
|
||||
# same line as previous statement, this is not a statement
|
||||
target = self.get_target(stmt_offset)
|
||||
if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Rewing ops till we encounter non-JA one
|
||||
j = self.prev_op[stmt_offset]
|
||||
while code[j] == JUMP_ABSOLUTE:
|
||||
j = self.prev_op[j]
|
||||
# If we got here, then it's list comprehension which
|
||||
# is not a statement too
|
||||
if code[j] == LIST_APPEND:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Exclude ROT_TWO + POP_TOP
|
||||
elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Exclude FOR_ITER + designators
|
||||
elif code[stmt_offset] in designator_ops:
|
||||
j = self.prev_op[stmt_offset]
|
||||
while code[j] in designator_ops:
|
||||
j = self.prev_op[j]
|
||||
if code[j] == FOR_ITER:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Add to list another list with offset of current statement,
|
||||
# equal to length of previous statement
|
||||
slist += [stmt_offset] * (stmt_offset-i)
|
||||
last_stmt_offset = stmt_offset
|
||||
i = stmt_offset
|
||||
# Finish filling the list for last statement
|
||||
slist += [codelen] * (codelen-len(slist))
|
||||
|
||||
def get_target(self, offset):
|
||||
"""
|
||||
Get target offset for op located at given <offset>.
|
||||
"""
|
||||
op = self.code[offset]
|
||||
target = self.code[offset+1] + self.code[offset+2] * 256
|
||||
if op in dis.hasjrel:
|
||||
target += offset + 3
|
||||
return target
|
||||
|
||||
def detect_structure(self, offset):
|
||||
"""
|
||||
Detect structures and their boundaries to fix optimizied jumps
|
||||
in python2.3+
|
||||
"""
|
||||
code = self.code
|
||||
op = code[offset]
|
||||
# Detect parent structure
|
||||
parent = self.structs[0]
|
||||
start = parent['start']
|
||||
end = parent['end']
|
||||
|
||||
# Pick inner-most parent for our offset
|
||||
for struct in self.structs:
|
||||
curent_start = struct['start']
|
||||
curent_end = struct['end']
|
||||
if (curent_start <= offset < curent_end) and (curent_start >= start and curent_end <= end):
|
||||
start = curent_start
|
||||
end = curent_end
|
||||
parent = struct
|
||||
|
||||
if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
|
||||
start = offset + self.op_size(op)
|
||||
target = self.get_target(offset)
|
||||
rtarget = self.restrict_to_parent(target, parent)
|
||||
prev_op = self.prev_op
|
||||
|
||||
# Do not let jump to go out of parent struct bounds
|
||||
if target != rtarget and parent['type'] == 'and/or':
|
||||
self.fixed_jumps[offset] = rtarget
|
||||
return
|
||||
|
||||
# Does this jump to right after another cond jump?
|
||||
# If so, it's part of a larger conditional
|
||||
if (code[prev_op[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
|
||||
POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > offset):
|
||||
self.fixed_jumps[offset] = prev_op[target]
|
||||
self.structs.append({'type': 'and/or',
|
||||
'start': start,
|
||||
'end': prev_op[target]})
|
||||
return
|
||||
# Is it an and inside if block
|
||||
if op == POP_JUMP_IF_FALSE:
|
||||
# Search for other POP_JUMP_IF_FALSE targetting the same op,
|
||||
# in current statement, starting from current offset, and filter
|
||||
# everything inside inner 'or' jumps and midline ifs
|
||||
match = self.rem_or(start, self.next_stmt[offset], POP_JUMP_IF_FALSE, target)
|
||||
match = self.remove_mid_line_ifs(match)
|
||||
# If we still have any offsets in set, start working on it
|
||||
if match:
|
||||
if (code[prev_op[rtarget]] in (JUMP_FORWARD, JUMP_ABSOLUTE) and prev_op[rtarget] not in self.stmts and
|
||||
self.restrict_to_parent(self.get_target(prev_op[rtarget]), parent) == rtarget):
|
||||
if (code[prev_op[prev_op[rtarget]]] == JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and
|
||||
target == self.get_target(prev_op[prev_op[rtarget]]) and
|
||||
(prev_op[prev_op[rtarget]] not in self.stmts or self.get_target(prev_op[prev_op[rtarget]]) > prev_op[prev_op[rtarget]]) and
|
||||
1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)))):
|
||||
pass
|
||||
elif (code[prev_op[prev_op[rtarget]]] == RETURN_VALUE and self.remove_mid_line_ifs([offset]) and
|
||||
1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]],
|
||||
(POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target))) |
|
||||
set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]],
|
||||
(POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE, JUMP_ABSOLUTE),
|
||||
prev_op[rtarget], True)))))):
|
||||
pass
|
||||
else:
|
||||
fix = None
|
||||
jump_ifs = self.all_instr(start, self.next_stmt[offset], POP_JUMP_IF_FALSE)
|
||||
last_jump_good = True
|
||||
for j in jump_ifs:
|
||||
if target == self.get_target(j):
|
||||
if self.lines[j].next == j + 3 and last_jump_good:
|
||||
fix = j
|
||||
break
|
||||
else:
|
||||
last_jump_good = False
|
||||
self.fixed_jumps[offset] = fix or match[-1]
|
||||
return
|
||||
else:
|
||||
self.fixed_jumps[offset] = match[-1]
|
||||
return
|
||||
# op == POP_JUMP_IF_TRUE
|
||||
else:
|
||||
next = self.next_stmt[offset]
|
||||
if prev_op[next] == offset:
|
||||
pass
|
||||
elif code[next] in (JUMP_FORWARD, JUMP_ABSOLUTE) and target == self.get_target(next):
|
||||
if code[prev_op[next]] == POP_JUMP_IF_FALSE:
|
||||
if code[next] == JUMP_FORWARD or target != rtarget or code[prev_op[prev_op[rtarget]]] not in (JUMP_ABSOLUTE, RETURN_VALUE):
|
||||
self.fixed_jumps[offset] = prev_op[next]
|
||||
return
|
||||
elif (code[next] == JUMP_ABSOLUTE and code[target] in (JUMP_ABSOLUTE, JUMP_FORWARD) and
|
||||
self.get_target(target) == self.get_target(next)):
|
||||
self.fixed_jumps[offset] = prev_op[next]
|
||||
return
|
||||
|
||||
# Don't add a struct for a while test, it's already taken care of
|
||||
if offset in self.ignore_if:
|
||||
return
|
||||
|
||||
if (code[prev_op[rtarget]] == JUMP_ABSOLUTE and prev_op[rtarget] in self.stmts and
|
||||
prev_op[rtarget] != offset and prev_op[prev_op[rtarget]] != offset and
|
||||
not (code[rtarget] == JUMP_ABSOLUTE and code[rtarget+3] == POP_BLOCK and code[prev_op[prev_op[rtarget]]] != JUMP_ABSOLUTE)):
|
||||
rtarget = prev_op[rtarget]
|
||||
|
||||
# Does the if jump just beyond a jump op, then this is probably an if statement
|
||||
if code[prev_op[rtarget]] in (JUMP_ABSOLUTE, JUMP_FORWARD):
|
||||
if_end = self.get_target(prev_op[rtarget])
|
||||
|
||||
# Is this a loop not an if?
|
||||
if (if_end < prev_op[rtarget]) and (code[prev_op[if_end]] == SETUP_LOOP):
|
||||
if(if_end > start):
|
||||
return
|
||||
|
||||
end = self.restrict_to_parent(if_end, parent)
|
||||
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': prev_op[rtarget]})
|
||||
self.not_continue.add(prev_op[rtarget])
|
||||
|
||||
if rtarget < end:
|
||||
self.structs.append({'type': 'if-else',
|
||||
'start': rtarget,
|
||||
'end': end})
|
||||
elif code[prev_op[rtarget]] == RETURN_VALUE:
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': rtarget})
|
||||
self.return_end_ifs.add(prev_op[rtarget])
|
||||
|
||||
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
|
||||
target = self.get_target(offset)
|
||||
if target > offset:
|
||||
unop_target = self.last_instr(offset, target, JUMP_FORWARD, target)
|
||||
if unop_target and code[unop_target+3] != ROT_TWO:
|
||||
self.fixed_jumps[offset] = unop_target
|
||||
else:
|
||||
self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
|
||||
|
||||
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
|
||||
"""
|
||||
Find offsets of all requested <instr> between <start> and <end>,
|
||||
optionally <target>ing specified offset, and return list found
|
||||
<instr> offsets which are not within any POP_JUMP_IF_TRUE jumps.
|
||||
"""
|
||||
# Find all offsets of requested instructions
|
||||
instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target)
|
||||
# Get all POP_JUMP_IF_TRUE (or) offsets
|
||||
pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE)
|
||||
filtered = []
|
||||
for pjit_offset in pjit_offsets:
|
||||
pjit_tgt = self.get_target(pjit_offset) - 3
|
||||
for instr_offset in instr_offsets:
|
||||
if instr_offset <= pjit_offset or instr_offset >= pjit_tgt:
|
||||
filtered.append(instr_offset)
|
||||
instr_offsets = filtered
|
||||
filtered = []
|
||||
return instr_offsets
|
||||
|
||||
def remove_mid_line_ifs(self, ifs):
|
||||
"""
|
||||
Go through passed offsets, filtering ifs
|
||||
located somewhere mid-line.
|
||||
"""
|
||||
filtered = []
|
||||
for if_ in ifs:
|
||||
# For each offset, if line number of current and next op
|
||||
# is the same
|
||||
if self.lines[if_].l_no == self.lines[if_+3].l_no:
|
||||
# Check if last op on line is PJIT or PJIF, and if it is - skip it
|
||||
if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE):
|
||||
continue
|
||||
filtered.append(if_)
|
||||
return filtered
|
||||
|
||||
if __name__ == "__main__":
|
||||
import inspect
|
||||
co = inspect.currentframe().f_code
|
||||
tokens, customize = Scanner3().disassemble_generic(co)
|
||||
for t in tokens:
|
||||
print(t)
|
||||
pass
|
@@ -9,19 +9,21 @@ for later use in deparsing.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import uncompyle6.scanners.scanner33 as scan33
|
||||
import uncompyle6.scanner as scan
|
||||
import uncompyle6.scanners.scanner3 as scan3
|
||||
|
||||
class Scanner32(scan.Scanner):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, 3.2) # check
|
||||
import uncompyle6.opcodes.opcode_34
|
||||
# verify uses JUMP_OPs from here
|
||||
JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs
|
||||
|
||||
class Scanner32(scan3.Scanner3):
|
||||
|
||||
def disassemble(self, co, classname=None):
|
||||
return scan33.Scanner33().disassemble(co, classname)
|
||||
return self.disassemble_generic(co, classname)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import inspect
|
||||
co = inspect.currentframe().f_code
|
||||
tokens, customize = Scanner33().disassemble(co)
|
||||
tokens, customize = Scanner32().disassemble(co)
|
||||
for t in tokens:
|
||||
print(t)
|
||||
pass
|
||||
|
@@ -1,6 +1,6 @@
|
||||
# Copyright (c) 2015 by Rocky Bernstein
|
||||
"""
|
||||
Python 3.3 bytecode scanner/deparser
|
||||
Python 3 bytecode scanner/deparser
|
||||
|
||||
This overlaps Python's 3.3's dis module, but it can be run from
|
||||
Python 2 and other versions of Python. Also, we save token information
|
||||
@@ -9,598 +9,19 @@ for later use in deparsing.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import dis, inspect, marshal
|
||||
from collections import namedtuple
|
||||
from array import array
|
||||
import uncompyle6.scanners.scanner3 as scan3
|
||||
|
||||
from uncompyle6.scanner import Token, L65536
|
||||
import uncompyle6.opcodes.opcode_33
|
||||
# verify uses JUMP_OPs from here
|
||||
JUMP_OPs = uncompyle6.opcodes.opcode_33.JUMP_OPs
|
||||
|
||||
|
||||
# Get all the opcodes into globals
|
||||
globals().update(dis.opmap)
|
||||
from uncompyle6.opcodes.opcode_27 import *
|
||||
import uncompyle6.scanner as scan
|
||||
|
||||
|
||||
class Scanner33(scan.Scanner):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, 3.2) # check
|
||||
|
||||
def run(self, bytecode):
|
||||
code_object = marshal.loads(bytecode)
|
||||
tokens = self.tokenize(code_object)
|
||||
return tokens
|
||||
class Scanner33(scan3.Scanner3):
|
||||
|
||||
def disassemble(self, co, classname=None):
|
||||
"""
|
||||
Convert code object <co> into a sequence of tokens.
|
||||
|
||||
The below is based on (an older version?) of Python dis.disassemble_bytes().
|
||||
"""
|
||||
# Container for tokens
|
||||
tokens = []
|
||||
customize = {}
|
||||
self.code = code = array('B', co.co_code)
|
||||
codelen = len(code)
|
||||
self.build_lines_data(co)
|
||||
self.build_prev_op()
|
||||
|
||||
# self.lines contains (block,addrLastInstr)
|
||||
if classname:
|
||||
classname = '_' + classname.lstrip('_') + '__'
|
||||
|
||||
def unmangle(name):
|
||||
if name.startswith(classname) and name[-2:] != '__':
|
||||
return name[len(classname) - 2:]
|
||||
return name
|
||||
|
||||
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
|
||||
names = [ unmangle(name) for name in co.co_names ]
|
||||
varnames = [ unmangle(name) for name in co.co_varnames ]
|
||||
else:
|
||||
free = co.co_cellvars + co.co_freevars
|
||||
names = co.co_names
|
||||
varnames = co.co_varnames
|
||||
pass
|
||||
|
||||
# Scan for assertions. Later we will
|
||||
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
|
||||
# assertions
|
||||
|
||||
self.load_asserts = set()
|
||||
for i in self.op_range(0, codelen):
|
||||
if self.code[i] == POP_JUMP_IF_TRUE and self.code[i+3] == LOAD_GLOBAL:
|
||||
if names[self.get_argument(i+3)] == 'AssertionError':
|
||||
self.load_asserts.add(i+3)
|
||||
|
||||
# Get jump targets
|
||||
# Format: {target offset: [jump offsets]}
|
||||
jump_targets = self.find_jump_targets()
|
||||
|
||||
# contains (code, [addrRefToCode])
|
||||
last_stmt = self.next_stmt[0]
|
||||
i = self.next_stmt[last_stmt]
|
||||
replace = {}
|
||||
while i < codelen-1:
|
||||
if self.lines[last_stmt].next > i:
|
||||
if self.code[last_stmt] == PRINT_ITEM:
|
||||
if self.code[i] == PRINT_ITEM:
|
||||
replace[i] = 'PRINT_ITEM_CONT'
|
||||
elif self.code[i] == PRINT_NEWLINE:
|
||||
replace[i] = 'PRINT_NEWLINE_CONT'
|
||||
last_stmt = i
|
||||
i = self.next_stmt[i]
|
||||
|
||||
imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
|
||||
if len(imports) > 1:
|
||||
last_import = imports[0]
|
||||
for i in imports[1:]:
|
||||
if self.lines[last_import].next > i:
|
||||
if self.code[last_import] == IMPORT_NAME == self.code[i]:
|
||||
replace[i] = 'IMPORT_NAME_CONT'
|
||||
last_import = i
|
||||
|
||||
# Initialize extended arg at 0. When extended arg op is encountered,
|
||||
# variable preserved for next cycle and added as arg for next op
|
||||
extended_arg = 0
|
||||
|
||||
for offset in self.op_range(0, codelen):
|
||||
# Add jump target tokens
|
||||
if offset in jump_targets:
|
||||
jump_idx = 0
|
||||
for jump_offset in jump_targets[offset]:
|
||||
tokens.append(Token('COME_FROM', None, repr(jump_offset),
|
||||
offset='{}_{}'.format(offset, jump_idx)))
|
||||
jump_idx += 1
|
||||
pass
|
||||
pass
|
||||
|
||||
op = code[offset]
|
||||
op_name = opname[op]
|
||||
|
||||
oparg = None; pattr = None
|
||||
|
||||
if op >= HAVE_ARGUMENT:
|
||||
oparg = self.get_argument(offset) + extended_arg
|
||||
extended_arg = 0
|
||||
if op == EXTENDED_ARG:
|
||||
extended_arg = oparg * scan.L65536
|
||||
continue
|
||||
if op in hasconst:
|
||||
const = co.co_consts[oparg]
|
||||
if inspect.iscode(const):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
assert op_name == 'LOAD_CONST'
|
||||
op_name = 'LOAD_LAMBDA'
|
||||
elif const.co_name == '<genexpr>':
|
||||
op_name = 'LOAD_GENEXPR'
|
||||
elif const.co_name == '<dictcomp>':
|
||||
op_name = 'LOAD_DICTCOMP'
|
||||
elif const.co_name == '<setcomp>':
|
||||
op_name = 'LOAD_SETCOMP'
|
||||
# verify() uses 'pattr' for comparison, since 'attr'
|
||||
# now holds Code(const) and thus can not be used
|
||||
# for comparison (todo: think about changing this)
|
||||
# pattr = 'code_object @ 0x%x %s->%s' %\
|
||||
# (id(const), const.co_filename, const.co_name)
|
||||
pattr = '<code_object ' + const.co_name + '>'
|
||||
else:
|
||||
pattr = const
|
||||
elif op in hasname:
|
||||
pattr = names[oparg]
|
||||
elif op in hasjrel:
|
||||
pattr = repr(offset + 3 + oparg)
|
||||
elif op in hasjabs:
|
||||
pattr = repr(oparg)
|
||||
elif op in haslocal:
|
||||
pattr = varnames[oparg]
|
||||
elif op in hascompare:
|
||||
pattr = cmp_op[oparg]
|
||||
elif op in hasfree:
|
||||
pattr = free[oparg]
|
||||
|
||||
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
|
||||
UNPACK_SEQUENCE,
|
||||
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
|
||||
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
|
||||
CALL_FUNCTION_VAR_KW, RAISE_VARARGS
|
||||
):
|
||||
# As of Python 2.5, values loaded via LOAD_CLOSURE are packed into
|
||||
# a tuple before calling MAKE_CLOSURE.
|
||||
if (op == BUILD_TUPLE and
|
||||
self.code[self.prev_op[offset]] == LOAD_CLOSURE):
|
||||
continue
|
||||
else:
|
||||
# CALL_FUNCTION OP renaming is done as a custom rule in parse3
|
||||
if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
|
||||
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
|
||||
op_name = '%s_%d' % (op_name, oparg)
|
||||
if op != BUILD_SLICE:
|
||||
customize[op_name] = oparg
|
||||
elif op == JUMP_ABSOLUTE:
|
||||
target = self.get_target(offset)
|
||||
if target < offset:
|
||||
if (offset in self.stmts
|
||||
and self.code[offset+3] not in (END_FINALLY, POP_BLOCK)
|
||||
and offset not in self.not_continue):
|
||||
op_name = 'CONTINUE'
|
||||
else:
|
||||
op_name = 'JUMP_BACK'
|
||||
|
||||
elif op == LOAD_GLOBAL:
|
||||
if offset in self.load_asserts:
|
||||
op_name = 'LOAD_ASSERT'
|
||||
elif op == RETURN_VALUE:
|
||||
if offset in self.return_end_ifs:
|
||||
op_name = 'RETURN_END_IF'
|
||||
|
||||
if offset in self.linestarts:
|
||||
linestart = self.linestarts[offset]
|
||||
else:
|
||||
linestart = None
|
||||
|
||||
if offset not in replace:
|
||||
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
else:
|
||||
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||
pass
|
||||
return tokens, customize
|
||||
|
||||
def build_lines_data(self, code_obj):
|
||||
"""
|
||||
Generate various line-related helper data.
|
||||
"""
|
||||
# Offset: lineno pairs, only for offsets which start line.
|
||||
# Locally we use list for more convenient iteration using indices
|
||||
linestarts = list(dis.findlinestarts(code_obj))
|
||||
self.linestarts = dict(linestarts)
|
||||
# Plain set with offsets of first ops on line
|
||||
self.linestart_offsets = {a for (a, _) in linestarts}
|
||||
# 'List-map' which shows line number of current op and offset of
|
||||
# first op on following line, given offset of op as index
|
||||
self.lines = lines = []
|
||||
LineTuple = namedtuple('LineTuple', ['l_no', 'next'])
|
||||
# Iterate through available linestarts, and fill
|
||||
# the data for all code offsets encountered until
|
||||
# last linestart offset
|
||||
_, prev_line_no = linestarts[0]
|
||||
offset = 0
|
||||
for start_offset, line_no in linestarts[1:]:
|
||||
while offset < start_offset:
|
||||
lines.append(LineTuple(prev_line_no, start_offset))
|
||||
offset += 1
|
||||
prev_line_no = line_no
|
||||
# Fill remaining offsets with reference to last line number
|
||||
# and code length as start offset of following non-existing line
|
||||
codelen = len(self.code)
|
||||
while offset < codelen:
|
||||
lines.append(LineTuple(prev_line_no, codelen))
|
||||
offset += 1
|
||||
|
||||
def build_prev_op(self):
|
||||
"""
|
||||
Compose 'list-map' which allows to jump to previous
|
||||
op, given offset of current op as index.
|
||||
"""
|
||||
code = self.code
|
||||
codelen = len(code)
|
||||
self.prev_op = [0]
|
||||
for offset in self.op_range(0, codelen):
|
||||
op = code[offset]
|
||||
for _ in range(self.op_size(op)):
|
||||
self.prev_op.append(offset)
|
||||
|
||||
def op_size(self, op):
|
||||
"""
|
||||
Return size of operator with its arguments
|
||||
for given opcode <op>.
|
||||
"""
|
||||
if op < dis.HAVE_ARGUMENT:
|
||||
return 1
|
||||
else:
|
||||
return 3
|
||||
|
||||
def find_jump_targets(self):
|
||||
"""
|
||||
Detect all offsets in a byte code which are jump targets.
|
||||
|
||||
Return the list of offsets.
|
||||
|
||||
This procedure is modelled after dis.findlables(), but here
|
||||
for each target the number of jumps is counted.
|
||||
"""
|
||||
code = self.code
|
||||
codelen = len(code)
|
||||
self.structs = [{'type': 'root',
|
||||
'start': 0,
|
||||
'end': codelen-1}]
|
||||
|
||||
# All loop entry points
|
||||
# self.loops = []
|
||||
# Map fixed jumps to their real destination
|
||||
self.fixed_jumps = {}
|
||||
self.ignore_if = set()
|
||||
self.build_statement_indices()
|
||||
# Containers filled by detect_structure()
|
||||
self.not_continue = set()
|
||||
self.return_end_ifs = set()
|
||||
|
||||
targets = {}
|
||||
for offset in self.op_range(0, codelen):
|
||||
op = code[offset]
|
||||
|
||||
# Determine structures and fix jumps for 2.3+
|
||||
self.detect_structure(offset)
|
||||
|
||||
if op >= dis.HAVE_ARGUMENT:
|
||||
label = self.fixed_jumps.get(offset)
|
||||
oparg = code[offset+1] + code[offset+2] * 256
|
||||
|
||||
if label is None:
|
||||
if op in dis.hasjrel and op != FOR_ITER:
|
||||
label = offset + 3 + oparg
|
||||
elif op in dis.hasjabs:
|
||||
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
|
||||
if oparg > offset:
|
||||
label = oparg
|
||||
|
||||
if label is not None and label != -1:
|
||||
targets[label] = targets.get(label, []) + [offset]
|
||||
elif op == END_FINALLY and offset in self.fixed_jumps:
|
||||
label = self.fixed_jumps[offset]
|
||||
targets[label] = targets.get(label, []) + [offset]
|
||||
return targets
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def build_statement_indices(self):
|
||||
code = self.code
|
||||
start = 0
|
||||
end = codelen = len(code)
|
||||
|
||||
statement_opcodes = {
|
||||
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
|
||||
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
|
||||
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
|
||||
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
|
||||
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
|
||||
RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR,
|
||||
JUMP_ABSOLUTE
|
||||
}
|
||||
|
||||
statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE),
|
||||
(POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)]
|
||||
|
||||
designator_ops = {
|
||||
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
|
||||
STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE
|
||||
}
|
||||
|
||||
# Compose preliminary list of indices with statements,
|
||||
# using plain statement opcodes
|
||||
prelim = self.all_instr(start, end, statement_opcodes)
|
||||
|
||||
# Initialize final container with statements with
|
||||
# preliminnary data
|
||||
stmts = self.stmts = set(prelim)
|
||||
|
||||
# Same for opcode sequences
|
||||
pass_stmts = set()
|
||||
for sequence in statement_opcode_sequences:
|
||||
for i in self.op_range(start, end-(len(sequence)+1)):
|
||||
match = True
|
||||
for elem in sequence:
|
||||
if elem != code[i]:
|
||||
match = False
|
||||
break
|
||||
i += self.op_size(code[i])
|
||||
|
||||
if match is True:
|
||||
i = self.prev_op[i]
|
||||
stmts.add(i)
|
||||
pass_stmts.add(i)
|
||||
|
||||
# Initialize statement list with the full data we've gathered so far
|
||||
if pass_stmts:
|
||||
stmt_offset_list = list(stmts)
|
||||
stmt_offset_list.sort()
|
||||
else:
|
||||
stmt_offset_list = prelim
|
||||
# 'List-map' which contains offset of start of
|
||||
# next statement, when op offset is passed as index
|
||||
self.next_stmt = slist = []
|
||||
last_stmt_offset = -1
|
||||
i = 0
|
||||
# Go through all statement offsets
|
||||
for stmt_offset in stmt_offset_list:
|
||||
# Process absolute jumps, but do not remove 'pass' statements
|
||||
# from the set
|
||||
if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts:
|
||||
# If absolute jump occurs in forward direction or it takes off from the
|
||||
# same line as previous statement, this is not a statement
|
||||
target = self.get_target(stmt_offset)
|
||||
if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Rewing ops till we encounter non-JA one
|
||||
j = self.prev_op[stmt_offset]
|
||||
while code[j] == JUMP_ABSOLUTE:
|
||||
j = self.prev_op[j]
|
||||
# If we got here, then it's list comprehension which
|
||||
# is not a statement too
|
||||
if code[j] == LIST_APPEND:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Exclude ROT_TWO + POP_TOP
|
||||
elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Exclude FOR_ITER + designators
|
||||
elif code[stmt_offset] in designator_ops:
|
||||
j = self.prev_op[stmt_offset]
|
||||
while code[j] in designator_ops:
|
||||
j = self.prev_op[j]
|
||||
if code[j] == FOR_ITER:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Add to list another list with offset of current statement,
|
||||
# equal to length of previous statement
|
||||
slist += [stmt_offset] * (stmt_offset-i)
|
||||
last_stmt_offset = stmt_offset
|
||||
i = stmt_offset
|
||||
# Finish filling the list for last statement
|
||||
slist += [codelen] * (codelen-len(slist))
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def get_target(self, offset):
|
||||
"""
|
||||
Get target offset for op located at given <offset>.
|
||||
"""
|
||||
op = self.code[offset]
|
||||
target = self.code[offset+1] + self.code[offset+2] * 256
|
||||
if op in dis.hasjrel:
|
||||
target += offset + 3
|
||||
return target
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def detect_structure(self, offset):
|
||||
"""
|
||||
Detect structures and their boundaries to fix optimizied jumps
|
||||
in python2.3+
|
||||
"""
|
||||
code = self.code
|
||||
op = code[offset]
|
||||
# Detect parent structure
|
||||
parent = self.structs[0]
|
||||
start = parent['start']
|
||||
end = parent['end']
|
||||
|
||||
# Pick inner-most parent for our offset
|
||||
for struct in self.structs:
|
||||
curent_start = struct['start']
|
||||
curent_end = struct['end']
|
||||
if (curent_start <= offset < curent_end) and (curent_start >= start and curent_end <= end):
|
||||
start = curent_start
|
||||
end = curent_end
|
||||
parent = struct
|
||||
|
||||
if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
|
||||
start = offset + self.op_size(op)
|
||||
target = self.get_target(offset)
|
||||
rtarget = self.restrict_to_parent(target, parent)
|
||||
prev_op = self.prev_op
|
||||
|
||||
# Do not let jump to go out of parent struct bounds
|
||||
if target != rtarget and parent['type'] == 'and/or':
|
||||
self.fixed_jumps[offset] = rtarget
|
||||
return
|
||||
|
||||
# Does this jump to right after another cond jump?
|
||||
# If so, it's part of a larger conditional
|
||||
if (code[prev_op[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
|
||||
POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > offset):
|
||||
self.fixed_jumps[offset] = prev_op[target]
|
||||
self.structs.append({'type': 'and/or',
|
||||
'start': start,
|
||||
'end': prev_op[target]})
|
||||
return
|
||||
# Is it an and inside if block
|
||||
if op == POP_JUMP_IF_FALSE:
|
||||
# Search for other POP_JUMP_IF_FALSE targetting the same op,
|
||||
# in current statement, starting from current offset, and filter
|
||||
# everything inside inner 'or' jumps and midline ifs
|
||||
match = self.rem_or(start, self.next_stmt[offset], POP_JUMP_IF_FALSE, target)
|
||||
match = self.remove_mid_line_ifs(match)
|
||||
# If we still have any offsets in set, start working on it
|
||||
if match:
|
||||
if (code[prev_op[rtarget]] in (JUMP_FORWARD, JUMP_ABSOLUTE) and prev_op[rtarget] not in self.stmts and
|
||||
self.restrict_to_parent(self.get_target(prev_op[rtarget]), parent) == rtarget):
|
||||
if (code[prev_op[prev_op[rtarget]]] == JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and
|
||||
target == self.get_target(prev_op[prev_op[rtarget]]) and
|
||||
(prev_op[prev_op[rtarget]] not in self.stmts or self.get_target(prev_op[prev_op[rtarget]]) > prev_op[prev_op[rtarget]]) and
|
||||
1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)))):
|
||||
pass
|
||||
elif (code[prev_op[prev_op[rtarget]]] == RETURN_VALUE and self.remove_mid_line_ifs([offset]) and
|
||||
1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]],
|
||||
(POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target))) |
|
||||
set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]],
|
||||
(POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE, JUMP_ABSOLUTE),
|
||||
prev_op[rtarget], True)))))):
|
||||
pass
|
||||
else:
|
||||
fix = None
|
||||
jump_ifs = self.all_instr(start, self.next_stmt[offset], POP_JUMP_IF_FALSE)
|
||||
last_jump_good = True
|
||||
for j in jump_ifs:
|
||||
if target == self.get_target(j):
|
||||
if self.lines[j].next == j + 3 and last_jump_good:
|
||||
fix = j
|
||||
break
|
||||
else:
|
||||
last_jump_good = False
|
||||
self.fixed_jumps[offset] = fix or match[-1]
|
||||
return
|
||||
else:
|
||||
self.fixed_jumps[offset] = match[-1]
|
||||
return
|
||||
# op == POP_JUMP_IF_TRUE
|
||||
else:
|
||||
next = self.next_stmt[offset]
|
||||
if prev_op[next] == offset:
|
||||
pass
|
||||
elif code[next] in (JUMP_FORWARD, JUMP_ABSOLUTE) and target == self.get_target(next):
|
||||
if code[prev_op[next]] == POP_JUMP_IF_FALSE:
|
||||
if code[next] == JUMP_FORWARD or target != rtarget or code[prev_op[prev_op[rtarget]]] not in (JUMP_ABSOLUTE, RETURN_VALUE):
|
||||
self.fixed_jumps[offset] = prev_op[next]
|
||||
return
|
||||
elif (code[next] == JUMP_ABSOLUTE and code[target] in (JUMP_ABSOLUTE, JUMP_FORWARD) and
|
||||
self.get_target(target) == self.get_target(next)):
|
||||
self.fixed_jumps[offset] = prev_op[next]
|
||||
return
|
||||
|
||||
# Don't add a struct for a while test, it's already taken care of
|
||||
if offset in self.ignore_if:
|
||||
return
|
||||
|
||||
if (code[prev_op[rtarget]] == JUMP_ABSOLUTE and prev_op[rtarget] in self.stmts and
|
||||
prev_op[rtarget] != offset and prev_op[prev_op[rtarget]] != offset and
|
||||
not (code[rtarget] == JUMP_ABSOLUTE and code[rtarget+3] == POP_BLOCK and code[prev_op[prev_op[rtarget]]] != JUMP_ABSOLUTE)):
|
||||
rtarget = prev_op[rtarget]
|
||||
|
||||
# Does the if jump just beyond a jump op, then this is probably an if statement
|
||||
if code[prev_op[rtarget]] in (JUMP_ABSOLUTE, JUMP_FORWARD):
|
||||
if_end = self.get_target(prev_op[rtarget])
|
||||
|
||||
# Is this a loop not an if?
|
||||
if (if_end < prev_op[rtarget]) and (code[prev_op[if_end]] == SETUP_LOOP):
|
||||
if(if_end > start):
|
||||
return
|
||||
|
||||
end = self.restrict_to_parent(if_end, parent)
|
||||
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': prev_op[rtarget]})
|
||||
self.not_continue.add(prev_op[rtarget])
|
||||
|
||||
if rtarget < end:
|
||||
self.structs.append({'type': 'if-else',
|
||||
'start': rtarget,
|
||||
'end': end})
|
||||
elif code[prev_op[rtarget]] == RETURN_VALUE:
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': rtarget})
|
||||
self.return_end_ifs.add(prev_op[rtarget])
|
||||
|
||||
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
|
||||
target = self.get_target(offset)
|
||||
if target > offset:
|
||||
unop_target = self.last_instr(offset, target, JUMP_FORWARD, target)
|
||||
if unop_target and code[unop_target+3] != ROT_TWO:
|
||||
self.fixed_jumps[offset] = unop_target
|
||||
else:
|
||||
self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
|
||||
"""
|
||||
Find offsets of all requested <instr> between <start> and <end>,
|
||||
optionally <target>ing specified offset, and return list found
|
||||
<instr> offsets which are not within any POP_JUMP_IF_TRUE jumps.
|
||||
"""
|
||||
# Find all offsets of requested instructions
|
||||
instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target)
|
||||
# Get all POP_JUMP_IF_TRUE (or) offsets
|
||||
pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE)
|
||||
filtered = []
|
||||
for pjit_offset in pjit_offsets:
|
||||
pjit_tgt = self.get_target(pjit_offset) - 3
|
||||
for instr_offset in instr_offsets:
|
||||
if instr_offset <= pjit_offset or instr_offset >= pjit_tgt:
|
||||
filtered.append(instr_offset)
|
||||
instr_offsets = filtered
|
||||
filtered = []
|
||||
return instr_offsets
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def remove_mid_line_ifs(self, ifs):
|
||||
"""
|
||||
Go through passed offsets, filtering ifs
|
||||
located somewhere mid-line.
|
||||
"""
|
||||
filtered = []
|
||||
for if_ in ifs:
|
||||
# For each offset, if line number of current and next op
|
||||
# is the same
|
||||
if self.lines[if_].l_no == self.lines[if_+3].l_no:
|
||||
# Check if last op on line is PJIT or PJIF, and if it is - skip it
|
||||
if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE):
|
||||
continue
|
||||
filtered.append(if_)
|
||||
return filtered
|
||||
return self.disassemble_generic(co, classname)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import inspect
|
||||
co = inspect.currentframe().f_code
|
||||
tokens, customize = Scanner33().disassemble(co)
|
||||
for t in tokens:
|
||||
|
@@ -11,34 +11,26 @@ for later use in deparsing.
|
||||
from __future__ import print_function
|
||||
|
||||
import dis, inspect
|
||||
from collections import namedtuple
|
||||
from array import array
|
||||
import uncompyle6.scanners.scanner3 as scan3
|
||||
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
from uncompyle6.scanner import Token
|
||||
|
||||
import uncompyle6.opcodes.opcode_34
|
||||
# Get all the opcodes into globals
|
||||
JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs
|
||||
globals().update(dis.opmap)
|
||||
|
||||
import uncompyle6.opcodes.opcode_34
|
||||
# verify uses JUMP_OPs from here
|
||||
JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs
|
||||
|
||||
from uncompyle6.opcodes.opcode_34 import *
|
||||
|
||||
import uncompyle6.scanner as scan
|
||||
import uncompyle6.scanners.scanner33 as scan33
|
||||
|
||||
|
||||
class Scanner34(scan.Scanner):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, 3.4) # check
|
||||
|
||||
def get_argument(self, bytecode, pos):
|
||||
arg = bytecode[pos+1] + bytecode[pos+2] * 256
|
||||
return arg
|
||||
class Scanner34(scan3.Scanner3):
|
||||
|
||||
def disassemble(self, co, classname=None):
|
||||
fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \
|
||||
else self.disassemble_cross_version
|
||||
else self.disassemble_generic
|
||||
return fn(co, classname)
|
||||
|
||||
def disassemble_built_in(self, co, classname=None):
|
||||
@@ -167,255 +159,7 @@ class Scanner34(scan.Scanner):
|
||||
pass
|
||||
return tokens, {}
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def disassemble_cross_version(self, co, classname=None):
|
||||
return scan33.Scanner33().disassemble(co, classname)
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def build_lines_data(self, code_obj):
|
||||
"""
|
||||
Generate various line-related helper data.
|
||||
"""
|
||||
# Offset: lineno pairs, only for offsets which start line.
|
||||
# Locally we use list for more convenient iteration using indices
|
||||
linestarts = list(dis.findlinestarts(code_obj))
|
||||
self.linestarts = dict(linestarts)
|
||||
# Plain set with offsets of first ops on line
|
||||
self.linestart_offsets = {a for (a, _) in linestarts}
|
||||
# 'List-map' which shows line number of current op and offset of
|
||||
# first op on following line, given offset of op as index
|
||||
self.lines = lines = []
|
||||
LineTuple = namedtuple('LineTuple', ['l_no', 'next'])
|
||||
# Iterate through available linestarts, and fill
|
||||
# the data for all code offsets encountered until
|
||||
# last linestart offset
|
||||
_, prev_line_no = linestarts[0]
|
||||
offset = 0
|
||||
for start_offset, line_no in linestarts[1:]:
|
||||
while offset < start_offset:
|
||||
lines.append(LineTuple(prev_line_no, start_offset))
|
||||
offset += 1
|
||||
prev_line_no = line_no
|
||||
# Fill remaining offsets with reference to last line number
|
||||
# and code length as start offset of following non-existing line
|
||||
codelen = len(self.code)
|
||||
while offset < codelen:
|
||||
lines.append(LineTuple(prev_line_no, codelen))
|
||||
offset += 1
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def build_prev_op(self):
|
||||
"""
|
||||
Compose 'list-map' which allows to jump to previous
|
||||
op, given offset of current op as index.
|
||||
"""
|
||||
code = self.code
|
||||
codelen = len(code)
|
||||
self.prev_op = [0]
|
||||
for offset in self.op_range(0, codelen):
|
||||
op = code[offset]
|
||||
for _ in range(self.op_size(op)):
|
||||
self.prev_op.append(offset)
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def op_size(self, op):
|
||||
"""
|
||||
Return size of operator with its arguments
|
||||
for given opcode <op>.
|
||||
"""
|
||||
if op < dis.HAVE_ARGUMENT:
|
||||
return 1
|
||||
else:
|
||||
return 3
|
||||
|
||||
def find_jump_targets(self):
|
||||
"""
|
||||
Detect all offsets in a byte code which are jump targets.
|
||||
|
||||
Return the list of offsets.
|
||||
|
||||
This procedure is modelled after dis.findlables(), but here
|
||||
for each target the number of jumps is counted.
|
||||
"""
|
||||
code = self.code
|
||||
codelen = len(code)
|
||||
self.structs = [{'type': 'root',
|
||||
'start': 0,
|
||||
'end': codelen-1}]
|
||||
|
||||
# All loop entry points
|
||||
# self.loops = []
|
||||
# Map fixed jumps to their real destination
|
||||
self.fixed_jumps = {}
|
||||
self.ignore_if = set()
|
||||
self.build_statement_indices()
|
||||
# Containers filled by detect_structure()
|
||||
self.not_continue = set()
|
||||
self.return_end_ifs = set()
|
||||
|
||||
targets = {}
|
||||
for offset in self.op_range(0, codelen):
|
||||
op = code[offset]
|
||||
|
||||
# Determine structures and fix jumps for 2.3+
|
||||
self.detect_structure(offset)
|
||||
|
||||
if op >= dis.HAVE_ARGUMENT:
|
||||
label = self.fixed_jumps.get(offset)
|
||||
oparg = code[offset+1] + code[offset+2] * 256
|
||||
|
||||
if label is None:
|
||||
if op in dis.hasjrel and op != FOR_ITER:
|
||||
label = offset + 3 + oparg
|
||||
elif op in dis.hasjabs:
|
||||
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
|
||||
if oparg > offset:
|
||||
label = oparg
|
||||
|
||||
if label is not None and label != -1:
|
||||
targets[label] = targets.get(label, []) + [offset]
|
||||
elif op == END_FINALLY and offset in self.fixed_jumps:
|
||||
label = self.fixed_jumps[offset]
|
||||
targets[label] = targets.get(label, []) + [offset]
|
||||
return targets
|
||||
|
||||
def build_statement_indices(self):
|
||||
code = self.code
|
||||
start = 0
|
||||
end = codelen = len(code)
|
||||
|
||||
statement_opcodes = {
|
||||
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
|
||||
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
|
||||
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
|
||||
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
|
||||
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
|
||||
RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR,
|
||||
JUMP_ABSOLUTE
|
||||
}
|
||||
|
||||
statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE),
|
||||
(POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)]
|
||||
|
||||
designator_ops = {
|
||||
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
|
||||
STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE
|
||||
}
|
||||
|
||||
# Compose preliminary list of indices with statements,
|
||||
# using plain statement opcodes
|
||||
prelim = self.all_instr(start, end, statement_opcodes)
|
||||
|
||||
# Initialize final container with statements with
|
||||
# preliminnary data
|
||||
stmts = self.stmts = set(prelim)
|
||||
|
||||
# Same for opcode sequences
|
||||
pass_stmts = set()
|
||||
for sequence in statement_opcode_sequences:
|
||||
for i in self.op_range(start, end-(len(sequence)+1)):
|
||||
match = True
|
||||
for elem in sequence:
|
||||
if elem != code[i]:
|
||||
match = False
|
||||
break
|
||||
i += self.op_size(code[i])
|
||||
|
||||
if match is True:
|
||||
i = self.prev_op[i]
|
||||
stmts.add(i)
|
||||
pass_stmts.add(i)
|
||||
|
||||
# Initialize statement list with the full data we've gathered so far
|
||||
if pass_stmts:
|
||||
stmt_offset_list = list(stmts)
|
||||
stmt_offset_list.sort()
|
||||
else:
|
||||
stmt_offset_list = prelim
|
||||
# 'List-map' which contains offset of start of
|
||||
# next statement, when op offset is passed as index
|
||||
self.next_stmt = slist = []
|
||||
last_stmt_offset = -1
|
||||
i = 0
|
||||
# Go through all statement offsets
|
||||
for stmt_offset in stmt_offset_list:
|
||||
# Process absolute jumps, but do not remove 'pass' statements
|
||||
# from the set
|
||||
if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts:
|
||||
# If absolute jump occurs in forward direction or it takes off from the
|
||||
# same line as previous statement, this is not a statement
|
||||
target = self.get_target(stmt_offset)
|
||||
if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Rewing ops till we encounter non-JA one
|
||||
j = self.prev_op[stmt_offset]
|
||||
while code[j] == JUMP_ABSOLUTE:
|
||||
j = self.prev_op[j]
|
||||
# If we got here, then it's list comprehension which
|
||||
# is not a statement too
|
||||
if code[j] == LIST_APPEND:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Exclude ROT_TWO + POP_TOP
|
||||
elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Exclude FOR_ITER + designators
|
||||
elif code[stmt_offset] in designator_ops:
|
||||
j = self.prev_op[stmt_offset]
|
||||
while code[j] in designator_ops:
|
||||
j = self.prev_op[j]
|
||||
if code[j] == FOR_ITER:
|
||||
stmts.remove(stmt_offset)
|
||||
continue
|
||||
# Add to list another list with offset of current statement,
|
||||
# equal to length of previous statement
|
||||
slist += [stmt_offset] * (stmt_offset-i)
|
||||
last_stmt_offset = stmt_offset
|
||||
i = stmt_offset
|
||||
# Finish filling the list for last statement
|
||||
slist += [codelen] * (codelen-len(slist))
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
def get_target(self, offset):
|
||||
"""
|
||||
Get target offset for op located at given <offset>.
|
||||
"""
|
||||
op = self.code[offset]
|
||||
target = self.code[offset+1] + self.code[offset+2] * 256
|
||||
if op in dis.hasjrel:
|
||||
target += offset + 3
|
||||
return target
|
||||
|
||||
def next_except_jump(self, start):
|
||||
"""
|
||||
Return the next jump that was generated by an except SomeException:
|
||||
construct in a try...except...else clause or None if not found.
|
||||
"""
|
||||
|
||||
if self.code[start] == DUP_TOP:
|
||||
except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE)
|
||||
if except_match:
|
||||
jmp = self.prev_op[self.get_target(except_match)]
|
||||
self.ignore_if.add(except_match)
|
||||
self.not_continue.add(jmp)
|
||||
return jmp
|
||||
|
||||
count_END_FINALLY = 0
|
||||
count_SETUP_ = 0
|
||||
for i in self.op_range(start, len(self.code)):
|
||||
op = self.code[i]
|
||||
if op == END_FINALLY:
|
||||
if count_END_FINALLY == count_SETUP_:
|
||||
assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE)
|
||||
self.not_continue.add(self.prev_op[i])
|
||||
return self.prev_op[i]
|
||||
count_END_FINALLY += 1
|
||||
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
|
||||
count_SETUP_ += 1
|
||||
|
||||
# FIXME Create and move to scanner3
|
||||
# FIXME: merge with scanner3 code
|
||||
def detect_structure(self, offset):
|
||||
"""
|
||||
Detect structures and their boundaries to fix optimizied jumps
|
||||
@@ -598,41 +342,32 @@ class Scanner34(scan.Scanner):
|
||||
else:
|
||||
self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
|
||||
|
||||
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
|
||||
def next_except_jump(self, start):
|
||||
"""
|
||||
Find offsets of all requested <instr> between <start> and <end>,
|
||||
optionally <target>ing specified offset, and return list found
|
||||
<instr> offsets which are not within any POP_JUMP_IF_TRUE jumps.
|
||||
Return the next jump that was generated by an except SomeException:
|
||||
construct in a try...except...else clause or None if not found.
|
||||
"""
|
||||
# Find all offsets of requested instructions
|
||||
instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target)
|
||||
# Get all POP_JUMP_IF_TRUE (or) offsets
|
||||
pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE)
|
||||
filtered = []
|
||||
for pjit_offset in pjit_offsets:
|
||||
pjit_tgt = self.get_target(pjit_offset) - 3
|
||||
for instr_offset in instr_offsets:
|
||||
if instr_offset <= pjit_offset or instr_offset >= pjit_tgt:
|
||||
filtered.append(instr_offset)
|
||||
instr_offsets = filtered
|
||||
filtered = []
|
||||
return instr_offsets
|
||||
|
||||
def remove_mid_line_ifs(self, ifs):
|
||||
"""
|
||||
Go through passed offsets, filtering ifs
|
||||
located somewhere mid-line.
|
||||
"""
|
||||
filtered = []
|
||||
for if_ in ifs:
|
||||
# For each offset, if line number of current and next op
|
||||
# is the same
|
||||
if self.lines[if_].l_no == self.lines[if_+3].l_no:
|
||||
# Check if last op on line is PJIT or PJIF, and if it is - skip it
|
||||
if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE):
|
||||
continue
|
||||
filtered.append(if_)
|
||||
return filtered
|
||||
if self.code[start] == DUP_TOP:
|
||||
except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE)
|
||||
if except_match:
|
||||
jmp = self.prev_op[self.get_target(except_match)]
|
||||
self.ignore_if.add(except_match)
|
||||
self.not_continue.add(jmp)
|
||||
return jmp
|
||||
|
||||
count_END_FINALLY = 0
|
||||
count_SETUP_ = 0
|
||||
for i in self.op_range(start, len(self.code)):
|
||||
op = self.code[i]
|
||||
if op == END_FINALLY:
|
||||
if count_END_FINALLY == count_SETUP_:
|
||||
assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE)
|
||||
self.not_continue.add(self.prev_op[i])
|
||||
return self.prev_op[i]
|
||||
count_END_FINALLY += 1
|
||||
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
|
||||
count_SETUP_ += 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
co = inspect.currentframe().f_code
|
||||
|
@@ -445,18 +445,26 @@ class Traverser(pysource.Walker, object):
|
||||
def n_mkfunc(self, node):
|
||||
start = len(self.f.getvalue())
|
||||
old_name = self.name
|
||||
if PYTHON3:
|
||||
if self.version >= 3.0:
|
||||
# LOAD_CONST code object ..
|
||||
# LOAD_CONST 'x0'
|
||||
# LOAD_CONST 'x0' if >= 3.3
|
||||
# MAKE_FUNCTION ..
|
||||
self.name = node[-2].attr
|
||||
code_index = -3
|
||||
if self.version >= 3.4:
|
||||
func_name = node[-2].attr
|
||||
code_index = -3
|
||||
elif self.version == 3.3:
|
||||
func_name = node[-2].pattr
|
||||
code_index = -3
|
||||
else:
|
||||
func_name = node[-2].attr.co_name
|
||||
code_index = -2
|
||||
pass
|
||||
else:
|
||||
# LOAD_CONST code object ..
|
||||
# MAKE_FUNCTION ..
|
||||
self.name = node[-2].attr.co_name
|
||||
func_name = node[-2].attr.co_name
|
||||
code_index = -2
|
||||
self.write(self.name)
|
||||
self.write(func_name)
|
||||
self.indentMore()
|
||||
self.make_function(node, isLambda=False, code_index=code_index)
|
||||
self.name = old_name
|
||||
|
@@ -67,7 +67,7 @@ from uncompyle6 import PYTHON3
|
||||
from uncompyle6.parser import get_python_parser
|
||||
from uncompyle6.parsers.astnode import AST
|
||||
from uncompyle6.parsers.spark import GenericASTTraversal, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
|
||||
from uncompyle6.scanner import Code, get_scanner
|
||||
from uncompyle6.scanner import Code, GenericPythonCode, get_scanner
|
||||
from uncompyle6.scanners.tok import Token, NoneToken
|
||||
import uncompyle6.parser as python_parser
|
||||
|
||||
@@ -920,12 +920,21 @@ class Walker(GenericASTTraversal, object):
|
||||
n_importstar = n_importfrom
|
||||
|
||||
def n_mkfunc(self, node):
|
||||
|
||||
if self.version >= 3.0:
|
||||
# LOAD_CONST code object ..
|
||||
# LOAD_CONST 'x0'
|
||||
# MAKE_FUNCTION ..
|
||||
func_name = node[-2].attr
|
||||
code_index = -3
|
||||
if self.version >= 3.4:
|
||||
func_name = node[-2].attr
|
||||
code_index = -3
|
||||
elif self.version == 3.3:
|
||||
func_name = node[-2].pattr
|
||||
code_index = -3
|
||||
else:
|
||||
func_name = node[-2].attr.co_name
|
||||
code_index = -2
|
||||
pass
|
||||
else:
|
||||
# LOAD_CONST code object ..
|
||||
# MAKE_FUNCTION ..
|
||||
@@ -972,9 +981,12 @@ class Walker(GenericASTTraversal, object):
|
||||
self.prec = 27
|
||||
code = node[code_index].attr
|
||||
|
||||
if isinstance(code, GenericPythonCode):
|
||||
self.write(' for i_am in ["Python 2-3 deparsing limitation"]')
|
||||
return
|
||||
|
||||
assert inspect.iscode(code)
|
||||
code = Code(code, self.scanner, self.currentclass)
|
||||
# assert isinstance(code, Code)
|
||||
|
||||
ast = self.build_ast(code._tokens, code._customize)
|
||||
self.customize(code._customize)
|
||||
@@ -1019,6 +1031,10 @@ class Walker(GenericASTTraversal, object):
|
||||
self.prec = 27
|
||||
code = node[code_index].attr
|
||||
|
||||
if isinstance(code, GenericPythonCode):
|
||||
self.write(' for i_am in ["Python 2-3 deparsing limitation"]')
|
||||
return
|
||||
|
||||
assert inspect.iscode(code)
|
||||
code = Code(code, self.scanner, self.currentclass)
|
||||
# assert isinstance(code, Code)
|
||||
@@ -1438,6 +1454,10 @@ class Walker(GenericASTTraversal, object):
|
||||
defparams = node[:node[-1].attr]
|
||||
code = node[code_index].attr
|
||||
|
||||
if isinstance(code, GenericPythonCode):
|
||||
self.write('(limitation="Cross Python 2/3 deparsing")')
|
||||
return
|
||||
|
||||
assert inspect.iscode(code)
|
||||
code = Code(code, self.scanner, self.currentclass)
|
||||
# assert isinstance(code, Code)
|
||||
|
Reference in New Issue
Block a user