DRY scanner34 and scanner35

handle 3.0..3.4 build maps as key/value pairs
This commit is contained in:
rocky
2016-05-15 03:00:13 -04:00
parent b16a166d84
commit b9692c9b1f
6 changed files with 225 additions and 343 deletions

View File

@@ -36,6 +36,7 @@ class Python3Parser(PythonParser):
"""Add rule to grammar, but only if it hasn't been added previously
"""
if rule not in self.new_rules:
# print("XXX ", rule) # debug
self.new_rules.add(rule)
self.addRule(rule, nop_func)
customize[opname] = count
@@ -506,11 +507,16 @@ class Python3Parser(PythonParser):
rule = ('load_closure ::= %s%s' % (('LOAD_CLOSURE ' * v), opname))
self.add_unique_rule(rule, opname, token.attr, customize)
elif self.version >= 3.5 and opname_base == 'BUILD_MAP':
elif opname_base == 'BUILD_MAP':
kvlist_n = "kvlist_%s" % token.attr
if self.version >= 3.5:
rule = kvlist_n + ' ::= ' + 'expr ' * (token.attr*2)
self.add_unique_rule(rule, opname, token.attr, customize)
rule = "mapexpr ::= %s %s" % (kvlist_n, opname)
else:
rule = kvlist_n + ' ::= ' + 'expr expr STORE_MAP ' * token.attr
self.add_unique_rule(rule, opname, token.attr, customize)
rule = "mapexpr ::= %s %s" % (opname, kvlist_n)
self.add_unique_rule(rule, opname, token.attr, customize)
elif opname_base in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
rule = 'unpack ::= ' + opname + ' designator' * token.attr

View File

@@ -21,6 +21,8 @@ Finally we save token information.
from __future__ import print_function
import dis
import uncompyle6.scanners.dis3 as dis3
from collections import namedtuple
from array import array
@@ -42,6 +44,142 @@ class Scanner3(scan.Scanner):
self.version = version
scan.Scanner.__init__(self, version)
def disassemble3(self, co, opnames, classname=None, code_objects={}):
# import dis; dis.disassemble(co) # DEBUG
# Container for tokens
tokens = []
customize = {}
self.code = array('B', co.co_code)
self.build_lines_data(co)
self.build_prev_op()
bytecode = dis3.Bytecode(co, opnames)
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
else:
pass
# Scan for assertions. Later we will
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
# assertions
self.load_asserts = set()
bs = list(bytecode)
n = len(bs)
for i in range(n):
inst = bs[i]
if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
next_inst = bs[i+1]
if (next_inst.opname == 'LOAD_GLOBAL' and
next_inst.argval == 'AssertionError'):
self.load_asserts.add(next_inst.offset)
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets()
for inst in bytecode:
if inst.offset in jump_targets:
jump_idx = 0
for jump_offset in jump_targets[inst.offset]:
tokens.append(Token('COME_FROM', None, repr(jump_offset),
offset='%s_%s' % (inst.offset, jump_idx)))
jump_idx += 1
pass
pass
pattr = inst.argrepr
opname = inst.opname
if opname in ['LOAD_CONST']:
const = inst.argval
if iscode(const):
if const.co_name == '<lambda>':
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
elif const.co_name == '<listcomp>':
opname = 'LOAD_LISTCOMP'
# verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
pass
elif opname == 'MAKE_FUNCTION':
argc = inst.argval
attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF)
pos_args, name_pair_args, annotate_args = attr
if name_pair_args > 0:
opname = 'MAKE_FUNCTION_N%d' % name_pair_args
pass
if annotate_args > 0:
opname = '%s_A_%d' % [op_name, annotate_args]
pass
opname = '%s_%d' % (opname, pos_args)
pattr = ("%d positional, %d keyword pair, %d annotated" %
(pos_args, name_pair_args, annotate_args))
tokens.append(
Token(
type_ = opname,
attr = (pos_args, name_pair_args, annotate_args),
pattr = pattr,
offset = inst.offset,
linestart = inst.starts_line)
)
continue
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
'BUILD_MAP', 'UNPACK_SEQUENCE', 'MAKE_CLOSURE',
'RAISE_VARARGS'
):
pos_args = inst.argval
if inst.opname != 'BUILD_SLICE':
customize[opname] = pos_args
pass
opname = '%s_%d' % (opname, pos_args)
elif opname == 'JUMP_ABSOLUTE':
pattr = inst.argval
target = self.get_target(inst.offset)
if target < inst.offset:
if (inst.offset in self.stmts and
self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK)
and inst.offset not in self.not_continue):
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif inst.offset in self.load_asserts:
opname = 'LOAD_ASSERT'
tokens.append(
Token(
type_ = opname,
attr = inst.argval,
pattr = pattr,
offset = inst.offset,
linestart = inst.starts_line,
)
)
pass
return tokens, {}
def disassemble_generic(self, co, classname=None, code_objects={}):
"""
Convert code object <co> into a sequence of tokens.

View File

@@ -2,180 +2,24 @@
"""
Python 3.4 bytecode scanner/deparser
This overlaps Python's 3.4's dis module, and in fact in some cases
we just fall back to that. But the intent is that it can be run from
Python 2 and other versions of Python. Also, we save token information
for later use in deparsing.
This sets up opcodes Python's 3.5 and calls a generalized
scanner routine for Python 3.
"""
from __future__ import print_function
import dis, inspect
from array import array
import uncompyle6.scanners.dis3 as dis3
import uncompyle6.scanners.scanner3 as scan3
from uncompyle6.opcodes.opcode_34 import opname as opnames
from uncompyle6 import PYTHON_VERSION
from uncompyle6.code import iscode
from uncompyle6.scanner import Token
# Get all the opcodes into globals
globals().update(dis.opmap)
import uncompyle6.opcodes.opcode_34
# verify uses JUMP_OPs from here
JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs
from uncompyle6.opcodes.opcode_34 import *
# bytecode verification, verify(), uses JUMP_OPs from here
from uncompyle6.opcodes.opcode_34 import JUMP_OPs
class Scanner34(scan3.Scanner3):
## FIXME: DRY with scanner35.py
# Note: we can't use built-in disassembly routines, unless
# we do post-processing like we do here.
def disassemble(self, co, classname=None, code_objects={}):
# import dis; dis.disassemble(co) # DEBUG
# Container for tokens
tokens = []
customize = {}
self.code = array('B', co.co_code)
self.build_lines_data(co)
self.build_prev_op()
bytecode = dis3.Bytecode(co, opnames)
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
else:
pass
# Scan for assertions. Later we will
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
# assertions
self.load_asserts = set()
bs = list(bytecode)
n = len(bs)
for i in range(n):
inst = bs[i]
if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
next_inst = bs[i+1]
if (next_inst.opname == 'LOAD_GLOBAL' and
next_inst.argval == 'AssertionError'):
self.load_asserts.add(next_inst.offset)
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets()
for inst in bytecode:
if inst.offset in jump_targets:
jump_idx = 0
for jump_offset in jump_targets[inst.offset]:
tokens.append(Token('COME_FROM', None, repr(jump_offset),
offset='%s_%s' % (inst.offset, jump_idx)))
jump_idx += 1
pass
pass
pattr = inst.argrepr
opname = inst.opname
if opname in ['LOAD_CONST']:
const = inst.argval
if iscode(const):
if const.co_name == '<lambda>':
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
elif const.co_name == '<listcomp>':
opname = 'LOAD_LISTCOMP'
# verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
pass
elif opname == 'MAKE_FUNCTION':
argc = inst.argval
attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF)
pos_args, name_pair_args, annotate_args = attr
if name_pair_args > 0:
opname = 'MAKE_FUNCTION_N%d' % name_pair_args
pass
if annotate_args > 0:
opname = '%s_A_%d' % [op_name, annotate_args]
pass
opname = '%s_%d' % (opname, pos_args)
pattr = ("%d positional, %d keyword pair, %d annotated" %
(pos_args, name_pair_args, annotate_args))
tokens.append(
Token(
type_ = opname,
attr = (pos_args, name_pair_args, annotate_args),
pattr = pattr,
offset = inst.offset,
linestart = inst.starts_line)
)
continue
# Note: care is needed in merging this with python3.5
# and BUILD_MAP and parse3 custom rules.
# BUILD_MAP in 3.4 comes at the beginning and each tuple has STORE_MAP
# in 3.5 it comes at the end and STORE_MAP
# see parse3.py
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
'UNPACK_SEQUENCE', 'MAKE_CLOSURE',
'RAISE_VARARGS'
):
pos_args = inst.argval
if inst.opname != 'BUILD_SLICE':
customize[opname] = pos_args
pass
opname = '%s_%d' % (opname, pos_args)
elif opname == 'JUMP_ABSOLUTE':
pattr = inst.argval
target = self.get_target(inst.offset)
if target < inst.offset:
if (inst.offset in self.stmts and
self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK)
and inst.offset not in self.not_continue):
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif inst.offset in self.load_asserts:
opname = 'LOAD_ASSERT'
tokens.append(
Token(
type_ = opname,
attr = inst.argval,
pattr = pattr,
offset = inst.offset,
linestart = inst.starts_line,
)
)
pass
return tokens, {}
return self.disassemble3(co, opnames, classname, code_objects)
if __name__ == "__main__":
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner34(3.4).disassemble(co)
for t in tokens:

View File

@@ -2,171 +2,24 @@
"""
Python 3.5 bytecode scanner/deparser
This overlaps Python's 3.5's dis module, and in fact in some cases
we just fall back to that. But the intent is that it can be run from
Python 2 and other versions of Python. Also, we save token information
for later use in deparsing.
This sets up opcodes Python's 3.5 and calls a generalized
scanner routine for Python 3.
"""
from __future__ import print_function
import inspect
from array import array
import uncompyle6.scanners.dis3 as dis3
import uncompyle6.scanners.scanner3 as scan3
from uncompyle6.opcodes.opcode_35 import opname as opnames
from uncompyle6.code import iscode
from uncompyle6.scanner import Token
import uncompyle6.opcodes.opcode_35
# verify uses JUMP_OPs from here
JUMP_OPs = uncompyle6.opcodes.opcode_35.JUMP_OPs
from uncompyle6.opcodes.opcode_35 import *
# bytecode verification, verify(), uses JUMP_OPs from here
from uncompyle6.opcodes.opcode_35 import JUMP_OPs
class Scanner35(scan3.Scanner3):
## FIXME: DRY with scanner34.py
# Note: we can't use built-in disassembly routines, unless
# we do post-processing like we do here.
def disassemble(self, co, classname=None, code_objects={}):
# import dis; dis.disassemble(co) # DEBUG
# Container for tokens
tokens = []
customize = {}
self.code = array('B', co.co_code)
self.build_lines_data(co)
self.build_prev_op()
bytecode = dis3.Bytecode(co, opnames)
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
else:
pass
# Scan for assertions. Later we will
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
# assertions
self.load_asserts = set()
bs = list(bytecode)
n = len(bs)
for i in range(n):
inst = bs[i]
if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
next_inst = bs[i+1]
if (next_inst.opname == 'LOAD_GLOBAL' and
next_inst.argval == 'AssertionError'):
self.load_asserts.add(next_inst.offset)
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets()
for inst in bytecode:
if inst.offset in jump_targets:
jump_idx = 0
for jump_offset in jump_targets[inst.offset]:
tokens.append(Token('COME_FROM', None, repr(jump_offset),
offset='%s_%s' % (inst.offset, jump_idx)))
jump_idx += 1
pass
pass
pattr = inst.argrepr
opname = inst.opname
if opname in ['LOAD_CONST']:
const = inst.argval
if iscode(const):
if const.co_name == '<lambda>':
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
elif const.co_name == '<listcomp>':
opname = 'LOAD_LISTCOMP'
# verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
pass
elif opname == 'MAKE_FUNCTION':
argc = inst.argval
attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF)
pos_args, name_pair_args, annotate_args = attr
if name_pair_args > 0:
opname = 'MAKE_FUNCTION_N%d' % name_pair_args
pass
if annotate_args > 0:
opname = '%s_A_%d' % [op_name, annotate_args]
pass
opname = '%s_%d' % (opname, pos_args)
pattr = ("%d positional, %d keyword pair, %d annotated" %
(pos_args, name_pair_args, annotate_args))
tokens.append(
Token(
type_ = opname,
attr = (pos_args, name_pair_args, annotate_args),
pattr = pattr,
offset = inst.offset,
linestart = inst.starts_line)
)
continue
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
'BUILD_MAP', 'UNPACK_SEQUENCE', 'MAKE_CLOSURE',
'RAISE_VARARGS'
):
pos_args = inst.argval
if inst.opname != 'BUILD_SLICE':
customize[opname] = pos_args
pass
opname = '%s_%d' % (opname, pos_args)
elif opname == 'JUMP_ABSOLUTE':
pattr = inst.argval
target = self.get_target(inst.offset)
if target < inst.offset:
if (inst.offset in self.stmts and
self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK)
and inst.offset not in self.not_continue):
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif inst.offset in self.load_asserts:
opname = 'LOAD_ASSERT'
tokens.append(
Token(
type_ = opname,
attr = inst.argval,
pattr = pattr,
offset = inst.offset,
linestart = inst.starts_line,
)
)
pass
return tokens, {}
return self.disassemble3(co, opnames, classname, code_objects)
if __name__ == "__main__":
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner35(3.5).disassemble(co)
for t in tokens:

View File

@@ -992,14 +992,44 @@ class FragmentsWalker(pysource.SourceWalker, object):
"""
p = self.prec
self.prec = 100
assert node[-1] == 'kvlist'
kv_node = node[-1] # goto kvlist
self.indentMore(INDENT_PER_LEVEL)
line_seperator = ',\n' + self.indent
sep = INDENT_PER_LEVEL[:-1]
start = len(self.f.getvalue())
self.write('{')
if node[0].type.startswith('kvlist'):
# Python 3.5+ style key/value list in mapexpr
kv_node = node[0]
l = list(kv_node)
i = 0
while i < len(l):
l[1].parent = kv_node
l[i+1].parent = kv_node
name = self.traverse(l[i], indent='')
value = self.traverse(l[i+1], indent=self.indent+(len(name)+2)*' ')
self.write(sep, name, ': ', value)
sep = line_seperator
i += 2
elif node[1].type.startswith('kvlist'):
# Python 3.0..3.4 style key/value list in mapexpr
kv_node = node[1]
l = list(kv_node)
i = 0
while i < len(l):
l[1].parent = kv_node
l[i+1].parent = kv_node
name = self.traverse(l[i+1], indent='')
value = self.traverse(l[i], indent=self.indent+(len(name)+2)*' ')
self.write(sep, name, ': ', value)
sep = line_seperator
i += 3
else:
# Python 2 style kvlist
assert node[-1] == 'kvlist'
kv_node = node[-1] # goto kvlist
for kv in kv_node:
assert kv in ('kv', 'kv2', 'kv3')
# kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR

View File

@@ -1238,7 +1238,7 @@ class SourceWalker(GenericASTTraversal, object):
self.write('{')
if node[0].type.startswith('kvlist'):
# Python 3.5 style key/value list in mapexpr
# Python 3.5+ style key/value list in mapexpr
l = list(node[0])
i = 0
while i < len(l):
@@ -1247,11 +1247,22 @@ class SourceWalker(GenericASTTraversal, object):
self.write(sep, name, ': ', value)
sep = line_seperator
i += 2
elif node[1].type.startswith('kvlist'):
# Python 3.0..3.4 style key/value list in mapexpr
l = list(node[1])
i = 0
while i < len(l):
name = self.traverse(l[i+1], indent='')
value = self.traverse(l[i], indent=self.indent+(len(name)+2)*' ')
self.write(sep, name, ': ', value)
sep = line_seperator
i += 3
else:
# Python 2 style kvlist
assert node[-1] == 'kvlist'
node = node[-1] # goto kvlist
kv_node = node[-1] # goto kvlist
for kv in node:
for kv in kv_node:
assert kv in ('kv', 'kv2', 'kv3')
# kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
# kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR