Revise "ingest" docstring

This commit is contained in:
rocky
2022-04-25 07:42:56 -04:00
parent 13266d1b56
commit c6642f5899
10 changed files with 131 additions and 26 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016-2018, 2021 by Rocky Bernstein # Copyright (c) 2016-2018, 2021-2022 by Rocky Bernstein
""" """
Python 1.5 bytecode decompiler massaging. Python 1.5 bytecode decompiler massaging.
@@ -28,10 +28,22 @@ class Scanner15(scan.Scanner21):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
""" """
tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm) tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm)
for t in tokens: for t in tokens:

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2019, 2021 by Rocky Bernstein # Copyright (c) 2019, 2021-2022 by Rocky Bernstein
""" """
Python 1.6 bytecode decompiler massaging. Python 1.6 bytecode decompiler massaging.
@@ -28,10 +28,22 @@ class Scanner16(scan.Scanner21):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
""" """
tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm) tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm)
for t in tokens: for t in tokens:

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015-2021 by Rocky Bernstein # Copyright (c) 2015-2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# #
@@ -183,15 +183,18 @@ class Scanner2(Scanner):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load - various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures - COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
- some EXTENDED_ARGS instructions are removed * BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016-2018, 2021 by Rocky Bernstein # Copyright (c) 2016-2018, 2021-2022 by Rocky Bernstein
""" """
Python 2.2 bytecode massaging. Python 2.2 bytecode massaging.
@@ -29,6 +29,24 @@ class Scanner22(scan.Scanner23):
return return
def ingest22(self, co, classname=None, code_objects={}, show_asm=None): def ingest22(self, co, classname=None, code_objects={}, show_asm=None):
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = self.parent_ingest(co, classname, code_objects, show_asm) tokens, customize = self.parent_ingest(co, classname, code_objects, show_asm)
tokens = [t for t in tokens if t.kind != 'SET_LINENO'] tokens = [t for t in tokens if t.kind != 'SET_LINENO']
return tokens, customize return tokens, customize

View File

@@ -49,14 +49,18 @@ class Scanner26(scan.Scanner2):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
returning a list of uncompyle6 'Token's. are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load - various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures - COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015-2019, 2021 by Rocky Bernstein # Copyright (c) 2015-2019, 2021-2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# #

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016-2018, 2021 by Rocky Bernstein # Copyright (c) 2016-2018, 2021-2022 by Rocky Bernstein
""" """
Python 3.6 bytecode decompiler scanner Python 3.6 bytecode decompiler scanner
@@ -24,6 +24,24 @@ class Scanner36(Scanner3):
return return
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm) tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm)
not_pypy36 = not (self.version[:2] == (3, 6) and self.is_pypy) not_pypy36 = not (self.version[:2] == (3, 6) and self.is_pypy)
for t in tokens: for t in tokens:

View File

@@ -45,6 +45,24 @@ class Scanner37(Scanner37Base):
def ingest( def ingest(
self, co, classname=None, code_objects={}, show_asm=None self, co, classname=None, code_objects={}, show_asm=None
) -> Tuple[list, dict]: ) -> Tuple[list, dict]:
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm) tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm)
new_tokens = [] new_tokens = []
for i, t in enumerate(tokens): for i, t in enumerate(tokens):

View File

@@ -183,20 +183,22 @@ class Scanner37Base(Scanner):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load - various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures - COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
- some EXTENDED_ARGS instructions are removed * BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take. cause specific rules for the specific number of arguments they take.
""" """
def tokens_append(j, token): def tokens_append(j, token):

View File

@@ -42,6 +42,24 @@ class Scanner38(Scanner37):
pass pass
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = super(Scanner38, self).ingest( tokens, customize = super(Scanner38, self).ingest(
co, classname, code_objects, show_asm co, classname, code_objects, show_asm
) )