Merge branch 'master' into python-2.4

This commit is contained in:
rocky
2017-04-10 00:48:04 -04:00
20 changed files with 310 additions and 45 deletions

2
.gitignore vendored
View File

@@ -16,3 +16,5 @@
/unpyc
__pycache__
build
/.venv*
/.idea

View File

@@ -115,7 +115,7 @@ I started working on this late 2015, mostly to add fragment support.
In that, I decided to make this runnable on Python 3.2+ and Python 2.6+
while, handling Python bytecodes from Python versions 2.5+ and
3.2+. In doing so, it has been expedient to separate this into three
projects: load loading and disassembly (xdis), parsing and tree
projects: bytecode loading and disassembly (xdis), parsing and tree
building (spark_parser), and grammar and semantic actions for
decompiling (uncompyle6).

View File

@@ -11,8 +11,8 @@ Introduction
------------
*uncompyle6* translates Python bytecode back into equivalent Python
source code. It accepts bytecodes from Python version 2.1 to 3.6 or
so, including PyPy bytecode and Dropbox's Python 2.5 bytecode.
source code. It accepts bytecodes from Python version 1.5, and 2.1 to
3.6 or so, including PyPy bytecode and Dropbox's Python 2.5 bytecode.
Why this?
---------
@@ -46,7 +46,7 @@ Requirements
This project requires Python 2.6 or later, PyPy 3-2.4, or PyPy-5.0.1.
Python versions 2.4-2.7 are supported in the python-2.4 branch.
The bytecode files it can read has been tested on Python bytecodes from
versions 1.5, 2.1-2.7, and 3.2-3.6 and the above-mentioned PyPy versions.
versions 1.5, 2.1-2.7, and 3.0-3.6 and the above-mentioned PyPy versions.
Installation
------------
@@ -140,11 +140,10 @@ and 2.0.
In the Python 3 series, Python support is is strongest around 3.4 or
3.3 and drops off as you move further away from those versions. Python
3.5 largely works, but still has some bugs in it and is missing some
opcodes. Python 3.6 changes things drastically by using word codes
rather than byte codes. That has been addressed, but then it also
changes function call opcodes and its semantics and has more problems
with control flow than 3.5 has.
3.6 changes things drastically by using word codes rather than byte
codes. That has been addressed, but then it also changes function call
opcodes and its semantics and has more problems with control flow than
3.5 has.
Currently not all Python magic numbers are supported. Specifically in
some versions of Python, notably Python 3.6, the magic number has
@@ -158,6 +157,9 @@ We also don't handle PJOrion_ obfuscated code. For that try: PJOrion
Deobfuscator_ to unscramble the bytecode to get valid bytecode before
trying this tool.
Handling pathologically long lists of expressions or statements is
slow.
There is lots to do, so please dig in and help.

View File

@@ -40,7 +40,7 @@ entry_points={
]}
ftp_url = None
install_requires = ['spark-parser >= 1.6.0, < 1.7.0',
'xdis >= 3.2.4, < 3.3.0']
'xdis >= 3.3.0, < 3.4.0']
license = 'MIT'
mailing_list = 'python-debugger@googlegroups.com'
modname = 'uncompyle6'

View File

@@ -0,0 +1,128 @@
# std
import string
# 3rd party
from hypothesis import given, assume, strategies as st
import pytest
# uncompyle
from validate import validate_uncompyle
alpha = st.sampled_from(string.ascii_lowercase)
numbers = st.sampled_from(string.digits)
alphanum = st.sampled_from(string.ascii_lowercase + string.digits)
expressions = st.sampled_from([x for x in string.ascii_lowercase + string.digits] + ['x+1'])
@st.composite
def function_calls(draw):
"""
Strategy factory for generating function calls.
:param draw: Callable which draws examples from other strategies.
:return: The function call text.
"""
list1 = st.lists(alpha, min_size=0, max_size=1)
list3 = st.lists(alpha, min_size=0, max_size=3)
positional_args = draw(list3)
named_args = [x + '=0' for x in draw(list3)]
star_args = ['*' + x for x in draw(list1)]
double_star_args = ['**' + x for x in draw(list1)]
arguments = positional_args + named_args + star_args + double_star_args
draw(st.randoms()).shuffle(arguments)
arguments = ','.join(arguments)
function_call = 'fn({arguments})'.format(arguments=arguments)
try:
# TODO: Figure out the exact rules for ordering of positional, named,
# star args, double star args and in which versions the various
# types of arguments are supported so we don't need to check that the
# expression compiles like this.
compile(function_call, '<string>', 'single')
except:
assume(False)
return function_call
@pytest.mark.xfail()
def test_CALL_FUNCTION():
validate_uncompyle("fn(w,m,f)")
@pytest.mark.xfail()
def test_BUILD_CONST_KEY_MAP_BUILD_MAP_UNPACK_WITH_CALL_BUILD_TUPLE_CALL_FUNCTION_EX():
validate_uncompyle("fn(w=0,m=0,**v)")
@pytest.mark.xfail()
def test_BUILD_MAP_BUILD_MAP_UNPACK_WITH_CALL_BUILD_TUPLE_CALL_FUNCTION_EX():
validate_uncompyle("fn(a=0,**g)")
@pytest.mark.xfail()
def test_CALL_FUNCTION_KW():
validate_uncompyle("fn(j=0)")
@pytest.mark.xfail()
def test_CALL_FUNCTION_EX():
validate_uncompyle("fn(*g,**j)")
@pytest.mark.xfail()
def test_BUILD_MAP_CALL_FUNCTION_EX():
validate_uncompyle("fn(*z,u=0)")
@pytest.mark.xfail()
def test_BUILD_TUPLE_CALL_FUNCTION_EX():
validate_uncompyle("fn(**a)")
@pytest.mark.xfail()
def test_BUILD_MAP_BUILD_TUPLE_BUILD_TUPLE_UNPACK_WITH_CALL_CALL_FUNCTION_EX():
validate_uncompyle("fn(b,b,b=0,*a)")
@pytest.mark.xfail()
def test_BUILD_TUPLE_BUILD_TUPLE_UNPACK_WITH_CALL_CALL_FUNCTION_EX():
validate_uncompyle("fn(*c,v)")
@pytest.mark.xfail()
def test_BUILD_CONST_KEY_MAP_CALL_FUNCTION_EX():
validate_uncompyle("fn(i=0,y=0,*p)")
@pytest.mark.skip(reason='skipping property based test until all individual tests are passing')
@given(function_calls())
def test_function_call(function_call):
validate_uncompyle(function_call)
examples = set()
generate_examples = False
@pytest.mark.skipif(not generate_examples, reason='not generating examples')
@given(function_calls())
def test_generate_hypothesis(function_call):
examples.add(function_call)
@pytest.mark.skipif(not generate_examples, reason='not generating examples')
def test_generate_examples():
import dis
example_opcodes = {}
for example in examples:
opcodes = tuple(sorted(set(
instruction.opname
for instruction in dis.Bytecode(example)
if instruction.opname not in ('LOAD_CONST', 'LOAD_NAME', 'RETURN_VALUE')
)))
example_opcodes[opcodes] = example
for k, v in example_opcodes.items():
print('def test_' + '_'.join(k) + '():\n validate_uncompyle("' + v + '")\n\n')
return

View File

@@ -40,7 +40,9 @@ def test_grammar():
ignore_set = set(
"""
JUMP_BACK CONTINUE RETURN_END_IF
COME_FROM COME_FROM_EXCEPT COME_FROM_LOOP COME_FROM_WITH
COME_FROM COME_FROM_EXCEPT
COME_FROM_EXCEPT_CLAUSE
COME_FROM_LOOP COME_FROM_WITH
COME_FROM_FINALLY ELSE
LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP
LAMBDA_MARKER RETURN_LAST

View File

@@ -2,18 +2,23 @@
from __future__ import print_function
# std
import os
import dis
import difflib
import subprocess
import tempfile
import functools
# compatability
import six
# uncompyle6 / xdis
from uncompyle6 import PYTHON_VERSION, deparse_code
from uncompyle6 import PYTHON_VERSION, IS_PYPY, deparse_code
# TODO : I think we can get xdis to support the dis api (python 3 version) by doing something like this there
from xdis.bytecode import Bytecode
from xdis.main import get_opcode
opc = get_opcode(PYTHON_VERSION, IS_PYPY)
Bytecode = functools.partial(Bytecode, opc=opc)
def _dis_to_text(co):
return dis.Bytecode(co).dis()
return Bytecode(co).dis()
def print_diff(original, uncompyled):
@@ -99,9 +104,8 @@ def are_code_objects_equal(co1, co2):
:return: True if the two code objects are approximately equal, otherwise False.
"""
# TODO : Use xdis for python2 compatability
instructions1 = dis.Bytecode(co1)
instructions2 = dis.Bytecode(co2)
instructions1 = Bytecode(co1)
instructions2 = Bytecode(co2)
for opcode1, opcode2 in zip(instructions1, instructions2):
if not are_instructions_equal(opcode1, opcode2):
return False

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -18,3 +18,12 @@ def __init__(self, defaults=None, dict_type=_default_dict,
default_section=DEFAULTSECT,
interpolation=_UNSET):
pass
# Bug found by hypothesis in creating function calls
# thanks to moagstar
def fn(a, b, d):
return (a, b, d)
b = {'b': 1,
'd': 2}
fn(a=0, **b)

View File

@@ -0,0 +1,16 @@
# From 3.3.5 _osx_support.py
def _get_system_version():
if __file__ is None:
try:
m = 5
except IOError:
pass
else:
try:
m = 10
finally:
m = 15
if m is not None:
m = 20
return m

View File

@@ -0,0 +1,16 @@
# From 3.3.5 _osx_support.py
def _get_system_version():
if __file__ is None:
try:
m = 5
except IOError:
pass
else:
try:
m = 10
finally:
m = 15
if m is not None:
m = 20
return m

View File

@@ -8,3 +8,20 @@ def __init__(self, defaults=None, dict_type=_default_dict,
default_section=DEFAULTSECT,
interpolation=_UNSET):
pass
# From 3.5 sqlalchemy/orm/__init__.py
# Python 3.5 changes the stack position of where * args are (furthest down the stack)
# Python 3.6+ replaces CALL_FUNCTION_VAR_KW with CALL_FUNCTION_EX
def deferred(*columns, **kw):
return ColumnProperty(deferred=True, *columns, **kw)
# From sqlalchemy/sql/selectable.py
class GenerativeSelect():
def __init__(self,
ClauseList,
util,
order_by=None):
self._order_by_clause = ClauseList(
*util.to_list(order_by),
_literal_as_text=5)

View File

@@ -29,7 +29,7 @@ class PythonParser(GenericASTBuilder):
def __init__(self, AST, start, debug):
super(PythonParser, self).__init__(AST, start, debug)
self.collect = [
'stmts', 'except_stmts', '_stmts',
'stmts', 'except_stmts', '_stmts', 'load_attrs',
'exprlist', 'kvlist', 'kwargs', 'come_froms', '_come_from',
# Python < 3
'print_items',
@@ -405,8 +405,7 @@ class PythonParser(GenericASTBuilder):
import_cont ::= LOAD_CONST LOAD_CONST import_as_cont
import_as_cont ::= IMPORT_FROM designator
load_attrs ::= LOAD_ATTR
load_attrs ::= load_attrs LOAD_ATTR
load_attrs ::= LOAD_ATTR+
"""
def p_list_comprehension(self, args):

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015, 2016 Rocky Bernstein
# Copyright (c) 2015-2017 Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
@@ -178,14 +178,17 @@ class Python3Parser(PythonParser):
POP_BLOCK LOAD_CONST
come_from_or_finally suite_stmts_opt END_FINALLY
tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suite come_from_except_clauses
tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suite come_froms
tryelsestmtc ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suitec COME_FROM
try_middle else_suitec come_from_except_clauses
tryelsestmtl ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suitel COME_FROM
try_middle else_suitel come_from_except_clauses
try_middle ::= jmp_abs COME_FROM except_stmts
END_FINALLY
@@ -252,7 +255,10 @@ class Python3Parser(PythonParser):
def p_misc3(self, args):
"""
try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM
try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts
END_FINALLY COME_FROM
try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts
END_FINALLY COME_FROM_EXCEPT_CLAUSE
for_block ::= l_stmts_opt opt_come_from_loop JUMP_BACK
for_block ::= l_stmts
@@ -283,10 +289,13 @@ class Python3Parser(PythonParser):
"""
opt_come_from_except ::= COME_FROM_EXCEPT
opt_come_from_except ::= come_froms
opt_come_from_except ::= come_from_except_clauses
come_froms ::= come_froms COME_FROM
come_froms ::=
come_froms ::= COME_FROM*
come_from_except_clauses ::= COME_FROM_EXCEPT_CLAUSE+
opt_come_from_loop ::= opt_come_from_loop COME_FROM_LOOP
opt_come_from_loop ::= opt_come_from_loop COME_FROM_LOOP
opt_come_from_loop ::=
@@ -451,9 +460,9 @@ class Python3Parser(PythonParser):
def custom_classfunc_rule(self, opname, token, customize):
"""
call_function ::= expr {expr}^n CALL_FUNCTION_n
call_function ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
call_function ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
call_function ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
call_function ::= expr {expr}^n CALL_FUNCTION_VAR_n
call_function ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n
call_function ::= expr {expr}^n CALL_FUNCTION_KW_n
classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc {expr}^n-1 CALL_FUNCTION_n
"""
@@ -461,25 +470,47 @@ class Python3Parser(PythonParser):
# high byte number of positional parameters
args_pos = token.attr & 0xff
args_kw = (token.attr >> 8) & 0xff
# Additional exprs for * and ** args:
# 0 if neither
# 1 for CALL_FUNCTION_VAR or CALL_FUNCTION_KW
# 2 for * and ** args (CALL_FUNCTION_VAR_KW).
# Yes, this computation based on instruction name is a little bit hoaky.
nak = ( len(opname)-len('CALL_FUNCTION') ) // 3
token.type = self.call_fn_name(token)
uniq_param = args_kw + args_pos
if self.version == 3.5 and opname.startswith('CALL_FUNCTION_VAR'):
# Python 3.5 changes the stack position of where * args, the
# first LOAD_FAST, below are located.
# Python 3.6+ replaces CALL_FUNCTION_VAR_KW with CALL_FUNCTION_EX
if opname.endswith('KW'):
kw = 'LOAD_FAST '
else:
kw = ''
rule = ('call_function ::= expr expr ' +
('pos_arg ' * args_pos) +
('kwarg ' * args_kw) + kw + token.type)
self.add_unique_rule(rule, token.type, uniq_param, customize)
rule = ('call_function ::= expr ' +
('pos_arg ' * args_pos) +
('kwarg ' * args_kw) +
'expr ' * nak + token.type)
self.add_unique_rule(rule, token.type, args_pos, customize)
self.add_unique_rule(rule, token.type, uniq_param, customize)
if self.version >= 3.5:
rule = ('async_call_function ::= expr ' +
('pos_arg ' * args_pos) +
('kwarg ' * args_kw) +
'expr ' * nak + token.type +
' GET_AWAITABLE LOAD_CONST YIELD_FROM')
self.add_unique_rule(rule, token.type, args_pos, customize)
self.add_unique_rule('expr ::= async_call_function', token.type, args_pos, customize)
self.add_unique_rule(rule, token.type, uniq_param, customize)
self.add_unique_rule('expr ::= async_call_function', token.type, uniq_param, customize)
rule = ('classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d'
% (('expr ' * (args_pos-1)), opname, args_pos))
self.add_unique_rule(rule, token.type, args_pos, customize)
self.add_unique_rule(rule, token.type, uniq_param, customize)
def add_make_function_rule(self, rule, opname, attr, customize):
"""Python 3.3 added a an addtional LOAD_CONST before MAKE_FUNCTION and

View File

@@ -20,6 +20,19 @@ class Python33Parser(Python32Parser):
iflaststmt ::= testexpr c_stmts_opt33
c_stmts_opt33 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD _come_from
# Python 3.5+ has jump optimization to remove the redundant
# jump_excepts. But in 3.3 we need them added
tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suite
jump_excepts come_from_except_clauses
trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle
jump_excepts come_from_except_clauses
jump_excepts ::= jump_except+
"""
class Python33ParserSingle(Python33Parser, PythonParserSingle):

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015, 2016 by Rocky Bernstein
# Copyright (c) 2015-2017 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
"""
@@ -44,9 +44,6 @@ if PYTHON3:
globals().update(op3.opmap)
# POP_JUMP_IF is used by verify
POP_JUMP_TF = (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE)
class Scanner3(Scanner):
def __init__(self, version, show_asm=None, is_pypy=False):
@@ -65,6 +62,11 @@ class Scanner3(Scanner):
setup_ops.append(self.opc.SETUP_WITH)
self.setup_ops = frozenset(setup_ops)
if self.version == 3.0:
self.pop_jump_tf = frozenset([self.opc.JUMP_IF_FALSE, self.opc.JUMP_IF_TRUE])
else:
self.pop_jump_tf = frozenset([self.opc.PJIF, self.opc.PJIT])
self.setup_ops_no_loop = frozenset(setup_ops) - frozenset([self.opc.SETUP_LOOP])
# Opcodes that can start a statement.
@@ -227,6 +229,9 @@ class Scanner3(Scanner):
come_from_type = opname[len('SETUP_'):]
come_from_name = 'COME_FROM_%s' % come_from_type
pass
elif inst.offset in self.except_targets:
come_from_name = 'COME_FROM_EXCEPT_CLAUSE'
pass
tokens.append(Token(come_from_name,
None, repr(jump_offset),
offset='%s_%s' % (inst.offset, jump_idx),
@@ -449,6 +454,7 @@ class Scanner3(Scanner):
# Map fixed jumps to their real destination
self.fixed_jumps = {}
self.except_targets = {}
self.ignore_if = set()
self.build_statement_indices()
self.else_start = {}
@@ -663,7 +669,7 @@ class Scanner3(Scanner):
jump_back += 2
if_offset = None
if code[self.prev_op[next_line_byte]] not in POP_JUMP_TF:
if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf:
if_offset = self.prev[next_line_byte]
if if_offset:
loop_type = 'while'
@@ -708,7 +714,7 @@ class Scanner3(Scanner):
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op in POP_JUMP_TF:
elif op in self.pop_jump_tf:
start = offset + self.op_size(op)
target = self.get_target(offset)
rtarget = self.restrict_to_parent(target, parent)
@@ -755,12 +761,12 @@ class Scanner3(Scanner):
target == self.get_target(prev_op[pre_rtarget]) and
(prev_op[pre_rtarget] not in self.stmts or
self.get_target(prev_op[pre_rtarget]) > prev_op[pre_rtarget]) and
1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], POP_JUMP_TF, target)))):
1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], self.pop_jump_tf, target)))):
pass
elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE
and self.remove_mid_line_ifs([offset]) and
1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget],
POP_JUMP_TF, target))) |
self.pop_jump_tf, target))) |
set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget],
(self.opc.POP_JUMP_IF_FALSE,
self.opc.POP_JUMP_IF_TRUE,
@@ -843,6 +849,9 @@ class Scanner3(Scanner):
self.structs.append({'type': 'if-then',
'start': start,
'end': pre_rtarget})
# FIXME: add this
# self.fixed_jumps[offset] = rtarget
self.not_continue.add(pre_rtarget)
if rtarget < end and (
@@ -913,6 +922,8 @@ class Scanner3(Scanner):
target = self.get_target(next_offset)
if target > next_offset:
self.fixed_jumps[next_offset] = target
self.except_targets[target] = next_offset
elif op == self.opc.SETUP_FINALLY:
target = self.get_target(offset)
end = self.restrict_to_parent(target, parent)

View File

@@ -1790,16 +1790,31 @@ class SourceWalker(GenericASTTraversal, object):
str = '%c(%C, '
p2 = (1, -2, ', ')
if op == 'CALL_FUNCTION_VAR':
str += '*%c)'
# Python 3.5 only puts optional args (the VAR part)
# lowest down the stack
if self.version == 3.5:
if str == '%c(%C, ':
str = '%c(*%C, %c)'
else:
str += '*%c)'
entry = (str, 0, p2, -2)
elif op == 'CALL_FUNCTION_KW':
str += '**%c)'
entry = (str, 0, p2, -2)
else:
elif op == 'CALL_FUNCTION_VAR_KW':
str += '*%c, **%c)'
if p2[2]: p2 = (1, -3, ', ')
entry = (str, 0, p2, -3, -2)
# Python 3.5 only puts optional args (the VAR part)
# lowest down the stack
if self.version == 3.5:
if p2[2]: p2 = (2, -2, ', ')
entry = (str, 0, p2, 1, -2)
else:
if p2[2]: p2 = (1, -3, ', ')
entry = (str, 0, p2, -3, -2)
pass
else:
assert False, "Unhandled CALL_FUNCTION %s" % op
TABLE_R[k] = entry
pass
# handled by n_mapexpr: