disassemble -> ingest where appropriate

As part of tokenization for (de)parsing, we need to do something like a
disassembly, but is is really a little different.

Disassembly, strictly speaking, is done by the xdis module now.
What "ingestion" does is massage the instruction tokens to a form that is
more amenable for parsing.

In sum, ingestion is different than disassembly, although disassembly is
generally the first part of ingestion.
This commit is contained in:
rocky
2016-09-04 11:43:02 -04:00
parent 979bca4fe0
commit c7788e4545
19 changed files with 37 additions and 43 deletions

View File

@@ -18,7 +18,7 @@ want to run on Python 2.7.
from __future__ import print_function from __future__ import print_function
import os, sys import sys
from collections import deque from collections import deque
import uncompyle6 import uncompyle6
@@ -45,7 +45,7 @@ def disco(version, co, out=None, is_pypy=False):
scanner = get_scanner(version, is_pypy=is_pypy) scanner = get_scanner(version, is_pypy=is_pypy)
queue = deque([co]) queue = deque([co])
disco_loop(scanner.disassemble, queue, real_out) disco_loop(scanner.ingest, queue, real_out)
def disco_loop(disasm, queue, real_out): def disco_loop(disasm, queue, real_out):

View File

@@ -14,7 +14,7 @@ def uncompyle(
timestamp=None, showgrammar=False, code_objects={}, timestamp=None, showgrammar=False, code_objects={},
is_pypy=False, magic_int=None): is_pypy=False, magic_int=None):
""" """
disassembles and deparses a given code block 'co' ingests and deparses a given code block 'co'
""" """
assert iscode(co) assert iscode(co)

View File

@@ -645,10 +645,8 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc. example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
:param co: The code object to parse. :param co: The code object to parse.
:param out: File like object to write the output to. :param out: File like object to write the output to.
:param showasm: Flag which determines whether the disassembled code :param showasm: Flag which determines whether the disassembled and
is written to sys.stdout or not. (It is also to ingested code is written to sys.stdout or not.
pass a file like object, into which the asm will be
written).
:param parser_debug: dict containing debug flags for the spark parser. :param parser_debug: dict containing debug flags for the spark parser.
:return: Abstract syntax tree representation of the code object. :return: Abstract syntax tree representation of the code object.
@@ -657,7 +655,7 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
assert iscode(co) assert iscode(co)
from uncompyle6.scanner import get_scanner from uncompyle6.scanner import get_scanner
scanner = get_scanner(version, is_pypy) scanner = get_scanner(version, is_pypy)
tokens, customize = scanner.disassemble(co) tokens, customize = scanner.ingest(co)
maybe_show_asm(showasm, tokens) maybe_show_asm(showasm, tokens)
# For heavy grammar debugging # For heavy grammar debugging

View File

@@ -6,12 +6,8 @@
# See LICENSE # See LICENSE
# #
""" """
scanner/disassembler module. From here we call various version-specific scanner/ingestion module. From here we call various version-specific
scanners, e.g. for Python 2.7 or 3.4. scanners, e.g. for Python 2.7 or 3.4.
This overlaps Python's dis module, but it can be run from Python 2 or
Python 3 and other versions of Python. Also, we save token information
for later use in deparsing.
""" """
from __future__ import print_function from __future__ import print_function
@@ -45,7 +41,7 @@ class Code(object):
for i in dir(co): for i in dir(co):
if i.startswith('co_'): if i.startswith('co_'):
setattr(self, i, getattr(co, i)) setattr(self, i, getattr(co, i))
self._tokens, self._customize = scanner.disassemble(co, classname) self._tokens, self._customize = scanner.ingest(co, classname)
class Scanner(object): class Scanner(object):
@@ -286,4 +282,4 @@ if __name__ == "__main__":
import inspect, uncompyle6 import inspect, uncompyle6
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
scanner = get_scanner(uncompyle6.PYTHON_VERSION, IS_PYPY, True) scanner = get_scanner(uncompyle6.PYTHON_VERSION, IS_PYPY, True)
tokens, customize = scanner.disassemble(co, {}) tokens, customize = scanner.ingest(co, {})

View File

@@ -39,7 +39,7 @@ class Scanner2(scan.Scanner):
# For <2.5 it is <generator expression> # For <2.5 it is <generator expression>
self.genexpr_name = '<genexpr>'; self.genexpr_name = '<genexpr>';
def disassemble(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Pick out tokens from an uncompyle6 code object, and transform them,
returning a list of uncompyle6 'Token's. returning a list of uncompyle6 'Token's.
@@ -906,7 +906,7 @@ if __name__ == "__main__":
if PYTHON_VERSION >= 2.3: if PYTHON_VERSION >= 2.3:
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
from uncompyle6 import PYTHON_VERSION from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner2(PYTHON_VERSION).disassemble(co) tokens, customize = Scanner2(PYTHON_VERSION).ingest(co)
for t in tokens: for t in tokens:
print(t) print(t)
else: else:

View File

@@ -8,7 +8,7 @@ information for later use in deparsing.
""" """
import uncompyle6.scanners.scanner23 as scan import uncompyle6.scanners.scanner23 as scan
# from uncompyle6.scanners.scanner26 import disassemble as disassemble26 # from uncompyle6.scanners.scanner26 import ingest as ingest26
# bytecode verification, verify(), uses JUMP_OPs from here # bytecode verification, verify(), uses JUMP_OPs from here
from xdis.opcodes import opcode_22 from xdis.opcodes import opcode_22
@@ -25,11 +25,11 @@ class Scanner22(scan.Scanner23):
self.opname = opcode_22.opname self.opname = opcode_22.opname
self.version = 2.2 self.version = 2.2
self.genexpr_name = '<generator expression>'; self.genexpr_name = '<generator expression>';
self.parent_injest = self.disassemble self.parent_ingest = self.ingest
self.disassemble = self.disassemble22 self.ingest = self.ingest22
return return
def disassemble22(self, co, classname=None, code_objects={}, show_asm=None): def ingest22(self, co, classname=None, code_objects={}, show_asm=None):
tokens, customize = self.parent_injest(co, classname, code_objects, show_asm) tokens, customize = self.parent_ingest(co, classname, code_objects, show_asm)
tokens = [t for t in tokens if t.type != 'SET_LINENO'] tokens = [t for t in tokens if t.type != 'SET_LINENO']
return tokens, customize return tokens, customize

View File

@@ -70,7 +70,7 @@ class Scanner26(scan.Scanner2):
self.pop_jump_if_or_pop = frozenset([]) self.pop_jump_if_or_pop = frozenset([])
return return
def disassemble(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Pick out tokens from an uncompyle6 code object, and transform them,
returning a list of uncompyle6 'Token's. returning a list of uncompyle6 'Token's.
@@ -311,7 +311,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 2.6: if PYTHON_VERSION == 2.6:
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
tokens, customize = Scanner26(show_asm=True).disassemble(co) tokens, customize = Scanner26(show_asm=True).ingest(co)
else: else:
print("Need to be Python 2.6 to demo; I am %s." % print("Need to be Python 2.6 to demo; I am %s." %
PYTHON_VERSION) PYTHON_VERSION)

View File

@@ -106,7 +106,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 2.7: if PYTHON_VERSION == 2.7:
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
tokens, customize = Scanner27().disassemble(co) tokens, customize = Scanner27().ingest(co)
for t in tokens: for t in tokens:
print(t) print(t)
pass pass

View File

@@ -110,7 +110,7 @@ class Scanner3(Scanner):
(self.opc.POP_JUMP_IF_TRUE, self.opc.JUMP_ABSOLUTE)] (self.opc.POP_JUMP_IF_TRUE, self.opc.JUMP_ABSOLUTE)]
def disassemble(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Pick out tokens from an uncompyle6 code object, and transform them,
returning a list of uncompyle6 'Token's. returning a list of uncompyle6 'Token's.
@@ -837,7 +837,7 @@ if __name__ == "__main__":
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
from uncompyle6 import PYTHON_VERSION from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner3(PYTHON_VERSION).disassemble(co) tokens, customize = Scanner3(PYTHON_VERSION).ingest(co)
for t in tokens: for t in tokens:
print(t) print(t)
else: else:

View File

@@ -25,7 +25,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.2: if PYTHON_VERSION == 3.2:
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
tokens, customize = Scanner32().disassemble(co) tokens, customize = Scanner32().ingest(co)
for t in tokens: for t in tokens:
print(t) print(t)
pass pass

View File

@@ -25,7 +25,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.3: if PYTHON_VERSION == 3.3:
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
tokens, customize = Scanner33().disassemble(co) tokens, customize = Scanner33().ingest(co)
for t in tokens: for t in tokens:
print(t) print(t)
pass pass

View File

@@ -27,7 +27,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.4: if PYTHON_VERSION == 3.4:
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
tokens, customize = Scanner34().disassemble(co) tokens, customize = Scanner34().ingest(co)
for t in tokens: for t in tokens:
print(t) print(t)
pass pass

View File

@@ -26,7 +26,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.5: if PYTHON_VERSION == 3.5:
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
tokens, customize = Scanner35().disassemble(co) tokens, customize = Scanner35().ingest(co)
for t in tokens: for t in tokens:
print(t) print(t)
pass pass

View File

@@ -26,7 +26,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 3.6: if PYTHON_VERSION == 3.6:
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
tokens, customize = Scanner36().disassemble(co) tokens, customize = Scanner36().ingest(co)
for t in tokens: for t in tokens:
print(t.format()) print(t.format())
pass pass

View File

@@ -85,14 +85,14 @@ from uncompyle6.show import (
def align_deparse_code(version, co, out=sys.stderr, showasm=False, showast=False, def align_deparse_code(version, co, out=sys.stderr, showasm=False, showast=False,
showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False): showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False):
""" """
disassembles and deparses a given code block 'co' ingests and deparses a given code block 'co'
""" """
assert iscode(co) assert iscode(co)
# store final output stream for case of error # store final output stream for case of error
scanner = get_scanner(version, is_pypy=is_pypy) scanner = get_scanner(version, is_pypy=is_pypy)
tokens, customize = scanner.disassemble(co, code_objects=code_objects) tokens, customize = scanner.ingest(co, code_objects=code_objects)
maybe_show_asm(showasm, tokens) maybe_show_asm(showasm, tokens)
debug_parser = dict(PARSER_DEFAULT_DEBUG) debug_parser = dict(PARSER_DEFAULT_DEBUG)

View File

@@ -1738,7 +1738,7 @@ def deparse_code(version, co, out=StringIO(), showasm=False, showast=False,
example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc. example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
:param co: The code object to parse. :param co: The code object to parse.
:param out: File like object to write the output to. :param out: File like object to write the output to.
:param showasm: Flag which determines whether the disassembled code :param showasm: Flag which determines whether the ingestd code
is written to sys.stdout or not. (It is also to is written to sys.stdout or not. (It is also to
pass a file like object, into which the asm will be pass a file like object, into which the asm will be
written). written).
@@ -1758,9 +1758,9 @@ def deparse_code(version, co, out=StringIO(), showasm=False, showast=False,
# store final output stream for case of error # store final output stream for case of error
scanner = get_scanner(version, is_pypy=is_pypy) scanner = get_scanner(version, is_pypy=is_pypy)
tokens, customize = scanner.disassemble(co) tokens, customize = scanner.ingest(co)
tokens, customize = scanner.disassemble(co) tokens, customize = scanner.ingest(co)
maybe_show_asm(showasm, tokens) maybe_show_asm(showasm, tokens)
debug_parser = dict(PARSER_DEFAULT_DEBUG) debug_parser = dict(PARSER_DEFAULT_DEBUG)

View File

@@ -2279,14 +2279,14 @@ class SourceWalker(GenericASTTraversal, object):
def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False, def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False,
showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False): showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False):
""" """
disassembles and deparses a given code block 'co' ingests and deparses a given code block 'co'
""" """
assert iscode(co) assert iscode(co)
# store final output stream for case of error # store final output stream for case of error
scanner = get_scanner(version, is_pypy=is_pypy) scanner = get_scanner(version, is_pypy=is_pypy)
tokens, customize = scanner.disassemble(co, code_objects=code_objects) tokens, customize = scanner.ingest(co, code_objects=code_objects)
maybe_show_asm(showasm, tokens) maybe_show_asm(showasm, tokens)
debug_parser = dict(PARSER_DEFAULT_DEBUG) debug_parser = dict(PARSER_DEFAULT_DEBUG)

View File

@@ -6,7 +6,7 @@ def maybe_show_asm(showasm, tokens):
Show the asm based on the showasm flag (or file object), writing to the Show the asm based on the showasm flag (or file object), writing to the
appropriate stream depending on the type of the flag. appropriate stream depending on the type of the flag.
:param showasm: Flag which determines whether the disassembled code is :param showasm: Flag which determines whether the ingested code is
written to sys.stdout or not. (It is also to pass a file written to sys.stdout or not. (It is also to pass a file
like object, into which the asm will be written). like object, into which the asm will be written).
:param tokens: The asm tokens to show. :param tokens: The asm tokens to show.

View File

@@ -229,10 +229,10 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2,
# which would get confusing. # which would get confusing.
scanner.setTokenClass(Token) scanner.setTokenClass(Token)
try: try:
# disassemble both code-objects # ingest both code-objects
tokens1, customize = scanner.disassemble(code_obj1) tokens1, customize = scanner.ingest(code_obj1)
del customize # save memory del customize # save memory
tokens2, customize = scanner.disassemble(code_obj2) tokens2, customize = scanner.ingest(code_obj2)
del customize # save memory del customize # save memory
finally: finally:
scanner.resetTokenClass() # restore Token class scanner.resetTokenClass() # restore Token class