disassemble -> ingest where appropriate

As part of tokenization for (de)parsing, we need to do something like a
disassembly, but is is really a little different.

Disassembly, strictly speaking, is done by the xdis module now.
What "ingestion" does is massage the instruction tokens to a form that is
more amenable for parsing.

In sum, ingestion is different than disassembly, although disassembly is
generally the first part of ingestion.
This commit is contained in:
rocky
2016-09-04 11:43:02 -04:00
parent 979bca4fe0
commit c7788e4545
19 changed files with 37 additions and 43 deletions

View File

@@ -18,7 +18,7 @@ want to run on Python 2.7.
from __future__ import print_function
import os, sys
import sys
from collections import deque
import uncompyle6
@@ -45,7 +45,7 @@ def disco(version, co, out=None, is_pypy=False):
scanner = get_scanner(version, is_pypy=is_pypy)
queue = deque([co])
disco_loop(scanner.disassemble, queue, real_out)
disco_loop(scanner.ingest, queue, real_out)
def disco_loop(disasm, queue, real_out):

View File

@@ -14,7 +14,7 @@ def uncompyle(
timestamp=None, showgrammar=False, code_objects={},
is_pypy=False, magic_int=None):
"""
disassembles and deparses a given code block 'co'
ingests and deparses a given code block 'co'
"""
assert iscode(co)

View File

@@ -645,10 +645,8 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
:param co: The code object to parse.
:param out: File like object to write the output to.
:param showasm: Flag which determines whether the disassembled code
is written to sys.stdout or not. (It is also to
pass a file like object, into which the asm will be
written).
:param showasm: Flag which determines whether the disassembled and
ingested code is written to sys.stdout or not.
:param parser_debug: dict containing debug flags for the spark parser.
:return: Abstract syntax tree representation of the code object.
@@ -657,7 +655,7 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
assert iscode(co)
from uncompyle6.scanner import get_scanner
scanner = get_scanner(version, is_pypy)
tokens, customize = scanner.disassemble(co)
tokens, customize = scanner.ingest(co)
maybe_show_asm(showasm, tokens)
# For heavy grammar debugging

View File

@@ -6,12 +6,8 @@
# See LICENSE
#
"""
scanner/disassembler module. From here we call various version-specific
scanner/ingestion module. From here we call various version-specific
scanners, e.g. for Python 2.7 or 3.4.
This overlaps Python's dis module, but it can be run from Python 2 or
Python 3 and other versions of Python. Also, we save token information
for later use in deparsing.
"""
from __future__ import print_function
@@ -45,7 +41,7 @@ class Code(object):
for i in dir(co):
if i.startswith('co_'):
setattr(self, i, getattr(co, i))
self._tokens, self._customize = scanner.disassemble(co, classname)
self._tokens, self._customize = scanner.ingest(co, classname)
class Scanner(object):
@@ -286,4 +282,4 @@ if __name__ == "__main__":
import inspect, uncompyle6
co = inspect.currentframe().f_code
scanner = get_scanner(uncompyle6.PYTHON_VERSION, IS_PYPY, True)
tokens, customize = scanner.disassemble(co, {})
tokens, customize = scanner.ingest(co, {})

View File

@@ -39,7 +39,7 @@ class Scanner2(scan.Scanner):
# For <2.5 it is <generator expression>
self.genexpr_name = '<genexpr>';
def disassemble(self, co, classname=None, code_objects={}, show_asm=None):
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Pick out tokens from an uncompyle6 code object, and transform them,
returning a list of uncompyle6 'Token's.
@@ -906,7 +906,7 @@ if __name__ == "__main__":
if PYTHON_VERSION >= 2.3:
co = inspect.currentframe().f_code
from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner2(PYTHON_VERSION).disassemble(co)
tokens, customize = Scanner2(PYTHON_VERSION).ingest(co)
for t in tokens:
print(t)
else:

View File

@@ -8,7 +8,7 @@ information for later use in deparsing.
"""
import uncompyle6.scanners.scanner23 as scan
# from uncompyle6.scanners.scanner26 import disassemble as disassemble26
# from uncompyle6.scanners.scanner26 import ingest as ingest26
# bytecode verification, verify(), uses JUMP_OPs from here
from xdis.opcodes import opcode_22
@@ -25,11 +25,11 @@ class Scanner22(scan.Scanner23):
self.opname = opcode_22.opname
self.version = 2.2
self.genexpr_name = '<generator expression>';
self.parent_injest = self.disassemble
self.disassemble = self.disassemble22
self.parent_ingest = self.ingest
self.ingest = self.ingest22
return
def disassemble22(self, co, classname=None, code_objects={}, show_asm=None):
tokens, customize = self.parent_injest(co, classname, code_objects, show_asm)
def ingest22(self, co, classname=None, code_objects={}, show_asm=None):
tokens, customize = self.parent_ingest(co, classname, code_objects, show_asm)
tokens = [t for t in tokens if t.type != 'SET_LINENO']
return tokens, customize

View File

@@ -70,7 +70,7 @@ class Scanner26(scan.Scanner2):
self.pop_jump_if_or_pop = frozenset([])
return
def disassemble(self, co, classname=None, code_objects={}, show_asm=None):
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Pick out tokens from an uncompyle6 code object, and transform them,
returning a list of uncompyle6 'Token's.
@@ -311,7 +311,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 2.6:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner26(show_asm=True).disassemble(co)
tokens, customize = Scanner26(show_asm=True).ingest(co)
else:
print("Need to be Python 2.6 to demo; I am %s." %
PYTHON_VERSION)

View File

@@ -106,7 +106,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 2.7:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner27().disassemble(co)
tokens, customize = Scanner27().ingest(co)
for t in tokens:
print(t)
pass

View File

@@ -110,7 +110,7 @@ class Scanner3(Scanner):
(self.opc.POP_JUMP_IF_TRUE, self.opc.JUMP_ABSOLUTE)]
def disassemble(self, co, classname=None, code_objects={}, show_asm=None):
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Pick out tokens from an uncompyle6 code object, and transform them,
returning a list of uncompyle6 'Token's.
@@ -837,7 +837,7 @@ if __name__ == "__main__":
import inspect
co = inspect.currentframe().f_code
from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner3(PYTHON_VERSION).disassemble(co)
tokens, customize = Scanner3(PYTHON_VERSION).ingest(co)
for t in tokens:
print(t)
else:

View File

@@ -25,7 +25,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.2:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner32().disassemble(co)
tokens, customize = Scanner32().ingest(co)
for t in tokens:
print(t)
pass

View File

@@ -25,7 +25,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.3:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner33().disassemble(co)
tokens, customize = Scanner33().ingest(co)
for t in tokens:
print(t)
pass

View File

@@ -27,7 +27,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.4:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner34().disassemble(co)
tokens, customize = Scanner34().ingest(co)
for t in tokens:
print(t)
pass

View File

@@ -26,7 +26,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.5:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner35().disassemble(co)
tokens, customize = Scanner35().ingest(co)
for t in tokens:
print(t)
pass

View File

@@ -26,7 +26,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.6:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner36().disassemble(co)
tokens, customize = Scanner36().ingest(co)
for t in tokens:
print(t.format())
pass

View File

@@ -85,14 +85,14 @@ from uncompyle6.show import (
def align_deparse_code(version, co, out=sys.stderr, showasm=False, showast=False,
showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False):
"""
disassembles and deparses a given code block 'co'
ingests and deparses a given code block 'co'
"""
assert iscode(co)
# store final output stream for case of error
scanner = get_scanner(version, is_pypy=is_pypy)
tokens, customize = scanner.disassemble(co, code_objects=code_objects)
tokens, customize = scanner.ingest(co, code_objects=code_objects)
maybe_show_asm(showasm, tokens)
debug_parser = dict(PARSER_DEFAULT_DEBUG)

View File

@@ -1738,7 +1738,7 @@ def deparse_code(version, co, out=StringIO(), showasm=False, showast=False,
example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
:param co: The code object to parse.
:param out: File like object to write the output to.
:param showasm: Flag which determines whether the disassembled code
:param showasm: Flag which determines whether the ingestd code
is written to sys.stdout or not. (It is also to
pass a file like object, into which the asm will be
written).
@@ -1758,9 +1758,9 @@ def deparse_code(version, co, out=StringIO(), showasm=False, showast=False,
# store final output stream for case of error
scanner = get_scanner(version, is_pypy=is_pypy)
tokens, customize = scanner.disassemble(co)
tokens, customize = scanner.ingest(co)
tokens, customize = scanner.disassemble(co)
tokens, customize = scanner.ingest(co)
maybe_show_asm(showasm, tokens)
debug_parser = dict(PARSER_DEFAULT_DEBUG)

View File

@@ -2279,14 +2279,14 @@ class SourceWalker(GenericASTTraversal, object):
def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False,
showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False):
"""
disassembles and deparses a given code block 'co'
ingests and deparses a given code block 'co'
"""
assert iscode(co)
# store final output stream for case of error
scanner = get_scanner(version, is_pypy=is_pypy)
tokens, customize = scanner.disassemble(co, code_objects=code_objects)
tokens, customize = scanner.ingest(co, code_objects=code_objects)
maybe_show_asm(showasm, tokens)
debug_parser = dict(PARSER_DEFAULT_DEBUG)

View File

@@ -6,7 +6,7 @@ def maybe_show_asm(showasm, tokens):
Show the asm based on the showasm flag (or file object), writing to the
appropriate stream depending on the type of the flag.
:param showasm: Flag which determines whether the disassembled code is
:param showasm: Flag which determines whether the ingested code is
written to sys.stdout or not. (It is also to pass a file
like object, into which the asm will be written).
:param tokens: The asm tokens to show.

View File

@@ -229,10 +229,10 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2,
# which would get confusing.
scanner.setTokenClass(Token)
try:
# disassemble both code-objects
tokens1, customize = scanner.disassemble(code_obj1)
# ingest both code-objects
tokens1, customize = scanner.ingest(code_obj1)
del customize # save memory
tokens2, customize = scanner.disassemble(code_obj2)
tokens2, customize = scanner.ingest(code_obj2)
del customize # save memory
finally:
scanner.resetTokenClass() # restore Token class