Files
python-uncompyle6/uncompyle6/scanners/scanner37.py
2024-03-02 04:32:26 -05:00

201 lines
7.3 KiB
Python

# Copyright (c) 2016-2019, 2021-2023 by Rocky Bernstein
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Python 3.7 bytecode decompiler scanner.
Does some additional massaging of xdis-disassembled instructions to
make things easier for decompilation.
This sets up opcodes Python's 3.7 and calls a generalized
scanner routine for Python 3.
"""
from typing import Tuple
# bytecode verification, verify(), uses JUMP_OPs from here
from xdis.opcodes import opcode_37 as opc
from uncompyle6.scanner import CONST_COLLECTIONS, Token
from uncompyle6.scanners.scanner37base import Scanner37Base
# bytecode verification, verify(), uses JUMP_OPS from here
JUMP_OPs = opc.JUMP_OPS
class Scanner37(Scanner37Base):
def __init__(self, show_asm=None, debug="", is_pypy=False):
Scanner37Base.__init__(self, (3, 7), show_asm, debug, is_pypy)
self.debug = debug
return
pass
def bound_collection_from_tokens(
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
) -> list:
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictionaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However, the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return next_tokens + [t]
collection_start = i - count
for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CODE",
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
"LOAD_STR",
):
return next_tokens + [t]
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else.
new_tokens = next_tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset=f"{start_offset}_0",
linestart=False,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
linestart=tokens[j].linestart,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype=tokens[j].optype,
)
)
new_tokens.append(
Token(
opname=f"BUILD_{collection_type}",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
)
)
return new_tokens
def ingest(
self, bytecode, classname=None, code_objects={}, show_asm=None
) -> Tuple[list, dict]:
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = Scanner37Base.ingest(
self, bytecode, classname, code_objects, show_asm
)
new_tokens = []
for i, t in enumerate(tokens):
# things that smash new_tokens like BUILD_LIST have to come first.
if t.op in (
self.opc.BUILD_CONST_KEY_MAP,
self.opc.BUILD_LIST,
self.opc.BUILD_SET,
):
collection_type = (
"DICT"
if t.kind.startswith("BUILD_CONST_KEY_MAP")
else t.kind.split("_")[1]
)
new_tokens = self.bound_collection_from_tokens(
tokens, new_tokens, t, i, f"CONST_{collection_type}"
)
continue
# The lowest bit of flags indicates whether the
# var-keyword argument is placed at the top of the stack
if t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1:
t.kind = "CALL_FUNCTION_EX_KW"
pass
elif t.op == self.opc.BUILD_STRING:
t.kind = "BUILD_STRING_%s" % t.attr
elif t.op == self.opc.CALL_FUNCTION_KW:
t.kind = "CALL_FUNCTION_KW_%s" % t.attr
elif t.op == self.opc.FORMAT_VALUE:
if t.attr & 0x4:
t.kind = "FORMAT_VALUE_ATTR"
pass
elif t.op == self.opc.BUILD_MAP_UNPACK_WITH_CALL:
t.kind = "BUILD_MAP_UNPACK_WITH_CALL_%d" % t.attr
elif not self.is_pypy and t.op == self.opc.BUILD_TUPLE_UNPACK_WITH_CALL:
t.kind = "BUILD_TUPLE_UNPACK_WITH_CALL_%d" % t.attr
new_tokens.append(t)
return new_tokens, customize
if __name__ == "__main__":
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
if PYTHON_VERSION_TRIPLE[:2] == (3, 7):
import inspect
co = inspect.currentframe().f_code # type: ignore
tokens, customize = Scanner37().ingest(co)
for t in tokens:
print(t.format())
pass
else:
print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.")