Python 3 bytecode handles opcodes with varargs (better). Decompiling

assert works. Add more of the simple tests and their compiled bytecode.
This commit is contained in:
rocky
2015-12-19 03:00:39 -05:00
parent f70641da5d
commit a75bd0bf97
100 changed files with 122 additions and 24 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
test/bytecode_3.2/or.pyc Normal file

Binary file not shown.

BIN
test/bytecode_3.2/power.pyc Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
test/bytecode_3.2/xor.pyc Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
test/bytecode_3.4/equal.pyc Normal file

Binary file not shown.

BIN
test/bytecode_3.4/for.pyc Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
test/bytecode_3.4/iter.pyc Normal file

Binary file not shown.

BIN
test/bytecode_3.4/left.pyc Normal file

Binary file not shown.

BIN
test/bytecode_3.4/less.pyc Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
test/bytecode_3.4/not.pyc Normal file

Binary file not shown.

Binary file not shown.

BIN
test/bytecode_3.4/or.pyc Normal file

Binary file not shown.

Binary file not shown.

BIN
test/bytecode_3.4/power.pyc Normal file

Binary file not shown.

BIN
test/bytecode_3.4/right.pyc Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
test/bytecode_3.4/while.pyc Normal file

Binary file not shown.

BIN
test/bytecode_3.4/xor.pyc Normal file

Binary file not shown.

View File

@@ -0,0 +1 @@
a = b * c

View File

@@ -0,0 +1 @@
a = b | c

View File

@@ -0,0 +1 @@
a = b ** c

View File

@@ -0,0 +1 @@
a = b << c

View File

@@ -0,0 +1 @@
a = b >> c

View File

@@ -0,0 +1 @@
a = b[c]

View File

@@ -0,0 +1 @@
a = b - c

View File

@@ -0,0 +1 @@
a = b ^ c

View File

@@ -0,0 +1 @@
a == b

View File

@@ -0,0 +1 @@
a > b

View File

@@ -0,0 +1 @@
a >= b

View File

@@ -0,0 +1 @@
a < b

View File

@@ -0,0 +1 @@
a <= b

View File

@@ -0,0 +1 @@
a != b

View File

@@ -0,0 +1 @@
a += b

View File

@@ -0,0 +1 @@
a &= b

View File

@@ -0,0 +1 @@
a //= b

View File

@@ -0,0 +1 @@
a /= b

View File

@@ -0,0 +1 @@
a %= b

View File

@@ -0,0 +1 @@
a *= b

View File

@@ -0,0 +1 @@
a |= b

View File

@@ -0,0 +1 @@
a **= b

View File

@@ -0,0 +1 @@
a <<= b

View File

@@ -0,0 +1 @@
a >>= b

View File

@@ -0,0 +1 @@
a -= b

View File

@@ -0,0 +1 @@
a ^= b

View File

@@ -0,0 +1 @@
a = b and c

View File

@@ -0,0 +1 @@
a = b and c or d

View File

@@ -0,0 +1 @@
a = (b or c) and d

View File

@@ -0,0 +1 @@
a = b or c or d

View File

@@ -0,0 +1,2 @@
a = (b + c) * d
a = b + c * d

View File

@@ -0,0 +1,2 @@
a = b * (c + d)
a = b * c + d

View File

@@ -0,0 +1,2 @@
a = b + c + d + e
a = b + c + (d + e)

View File

@@ -665,24 +665,23 @@ class Python3Parser(PythonParser):
"""
new_rules = set()
for token in tokens:
if token.type not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
continue
# Low byte indicates number of positional paramters,
# high byte number of positional parameters
args_pos = token.attr & 0xff
args_kw = (token.attr >> 8) & 0xff
nak = ( len(token.type)-len('CALL_FUNCTION') ) // 3
token.type = 'CALL_FUNCTION_%i' % token.attr
rule = ('call_function ::= expr '
+ ('expr ' * args_pos)
+ ('kwarg ' * args_kw)
+ 'expr ' * nak + token.type)
# Make sure we do not add the same rule twice
if rule not in new_rules:
new_rules.add(rule)
self.addRule(rule, nop_func)
customize[token.type] = args_pos
if token.type in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
# Low byte indicates number of positional paramters,
# high byte number of positional parameters
args_pos = token.attr & 0xff
args_kw = (token.attr >> 8) & 0xff
nak = ( len(token.type)-len('CALL_FUNCTION') ) // 3
token.type = 'CALL_FUNCTION_%i' % token.attr
rule = ('call_function ::= expr '
+ ('expr ' * args_pos)
+ ('kwarg ' * args_kw)
+ 'expr ' * nak + token.type)
# Make sure we do not add the same rule twice
if rule not in new_rules:
new_rules.add(rule)
self.addRule(rule, nop_func)
customize[token.type] = args_pos
pass
pass
pass
return

View File

@@ -17,6 +17,7 @@ from __future__ import print_function
import dis
from collections import namedtuple
from array import array
from uncompyle6 import PYTHON_VERSION
from uncompyle6.scanner import Token, L65536
@@ -36,21 +37,59 @@ class Scanner34(scan.Scanner):
def __init__(self):
scan.Scanner.__init__(self, 3.4) # check
def get_argument(self, bytecode, pos):
arg = bytecode[pos+1] + bytecode[pos+2] * 256
return arg
def disassemble(self, co):
fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \
else self.disassemble_cross_version
return fn(co)
def disassemble_built_in(self, co):
def disassemble_built_in(self, co, classname=None):
# Container for tokens
tokens = []
self.code = co.co_code
customize = {}
self.code = array('B', co.co_code)
self.build_lines_data(co)
self.build_prev_op()
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets()
bytecode = dis.Bytecode(co)
# self.lines contains (block,addrLastInstr)
# if classname:
# classname = '_' + classname.lstrip('_') + '__'
# def unmangle(name):
# if name.startswith(classname) and name[-2:] != '__':
# return name[len(classname) - 2:]
# return name
# free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
# names = [ unmangle(name) for name in co.co_names ]
# varnames = [ unmangle(name) for name in co.co_varnames ]
# else:
# free = co.co_cellvars + co.co_freevars
# names = co.co_names
# varnames = co.co_varnames
# Scan for assertions. Later we will
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
# assertions
self.load_asserts = set()
bs = list(bytecode)
n = len(bs)
for i in range(n):
inst = bs[i]
if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
next_inst = bs[i+1]
if (next_inst.opname == 'LOAD_GLOBAL' and
next_inst.argval == 'AssertionError'):
self.load_asserts.add(next_inst.offset)
for inst in bytecode:
if inst.offset in jump_targets:
jump_idx = 0
@@ -60,14 +99,38 @@ class Scanner34(scan.Scanner):
jump_idx += 1
pass
pass
pattr = inst.argrepr
opname = inst.opname
# For constants, the pattr is the same as attr. Using pattr adds
# an extra level of quotes which messes other things up, like getting
# keyword attribute names in a call. I suspect there will be things
# other than LOAD_CONST, but we'll start out with just this for now.
pattr = inst.argval if inst.opname in ['LOAD_CONST'] else inst.argrepr
if opname in ['LOAD_CONST']:
pattr = inst.argval
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
'UNPACK_SEQUENCE',
'MAKE_FUNCTION', 'MAKE_CLOSURE',
'DUP_TOPX', 'RAISE_VARARGS'
):
# if opname == 'BUILD_TUPLE' and \
# self.code[self.prev[offset]] == LOAD_CLOSURE:
# continue
# else:
# op_name = '%s_%d' % (op_name, oparg)
# if opname != BUILD_SLICE:
# customize[op_name] = oparg
opname = '%s_%d' % (opname, inst.argval)
if inst.opname != 'BUILD_SLICE':
customize[opname] = inst.argval
elif inst.offset in self.load_asserts:
opname = 'LOAD_ASSERT'
tokens.append(
Token(
type_ = inst.opname,
type_ = opname,
attr = inst.argval,
pattr = pattr,
offset = inst.offset,
@@ -85,7 +148,7 @@ class Scanner34(scan.Scanner):
"""
# Container for tokens
tokens = []
self.code = code = co.co_code
self.code = code = array('B', co.co_code)
codelen = len(code)
self.build_lines_data(co)
self.build_prev_op()