Partial sync of 3.7 & 3.8 scanner with decompyle3

This commit is contained in:
rocky
2022-05-14 08:42:04 -04:00
parent 87fb83de08
commit 3a9fa652b4
3 changed files with 27 additions and 28 deletions

View File

@@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
""" """
Python 3.7 bytecode decompiler scanner Python 3.7 bytecode decompiler scanner.
Does some additional massaging of xdis-disassembled instructions to Does some additional massaging of xdis-disassembled instructions to
make things easier for decompilation. make things easier for decompilation.
@@ -33,10 +33,11 @@ from xdis.opcodes import opcode_37 as opc
# bytecode verification, verify(), uses JUMP_OPS from here # bytecode verification, verify(), uses JUMP_OPS from here
JUMP_OPs = opc.JUMP_OPS JUMP_OPs = opc.JUMP_OPS
class Scanner37(Scanner37Base): class Scanner37(Scanner37Base):
def __init__(self, show_asm=None, is_pypy: bool=False): def __init__(self, show_asm=None, debug="", is_pypy=False):
Scanner37Base.__init__(self, (3, 7), show_asm) Scanner37Base.__init__(self, (3, 7), show_asm, debug, is_pypy)
self.is_pypy = is_pypy self.debug = debug
return return
pass pass
@@ -139,7 +140,9 @@ class Scanner37(Scanner37Base):
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take. cause specific rules for the specific number of arguments they take.
""" """
tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm) tokens, customize = Scanner37Base.ingest(
self, co, classname, code_objects, show_asm
)
new_tokens = [] new_tokens = []
for i, t in enumerate(tokens): for i, t in enumerate(tokens):
# things that smash new_tokens like BUILD_LIST have to come first. # things that smash new_tokens like BUILD_LIST have to come first.
@@ -179,6 +182,7 @@ class Scanner37(Scanner37Base):
return new_tokens, customize return new_tokens, customize
if __name__ == "__main__": if __name__ == "__main__":
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
@@ -191,4 +195,4 @@ if __name__ == "__main__":
print(t.format()) print(t.format())
pass pass
else: else:
print("Need to be Python 3.7 to demo; I am version %s" % version_tuple_to_str()) print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.")

View File

@@ -29,7 +29,7 @@ For example:
Finally we save token information. Finally we save token information.
""" """
from typing import Any, Dict, List, Set from typing import Any, Dict, List, Set, Tuple
from xdis import iscode, instruction_size, Instruction from xdis import iscode, instruction_size, Instruction
from xdis.bytecode import _get_const_info from xdis.bytecode import _get_const_info
@@ -48,8 +48,10 @@ globals().update(op3.opmap)
class Scanner37Base(Scanner): class Scanner37Base(Scanner):
def __init__(self, version, show_asm=None, is_pypy=False): def __init__(self, version: Tuple[int], show_asm=None, debug="", is_pypy=False):
super(Scanner37Base, self).__init__(version, show_asm, is_pypy) super(Scanner37Base, self).__init__(version, show_asm, is_pypy)
self.debug = debug
self.is_pypy = is_pypy
# Create opcode classification sets # Create opcode classification sets
# Note: super initilization above initializes self.opc # Note: super initilization above initializes self.opc
@@ -888,16 +890,6 @@ class Scanner37Base(Scanner):
pass pass
return return
def is_jump_back(self, offset, extended_arg):
"""
Return True if the code at offset is some sort of jump back.
That is, it is ether "JUMP_FORWARD" or an absolute jump that
goes forward.
"""
if self.code[offset] != self.opc.JUMP_ABSOLUTE:
return False
return offset > self.get_target(offset, extended_arg)
def next_except_jump(self, start): def next_except_jump(self, start):
""" """
Return the next jump that was generated by an except SomeException: Return the next jump that was generated by an except SomeException:

View File

@@ -22,6 +22,8 @@ This sets up opcodes Python's 3.8 and calls a generalized
scanner routine for Python 3.7 and up. scanner routine for Python 3.7 and up.
""" """
from typing import Dict, Tuple
from uncompyle6.scanners.tok import off2int from uncompyle6.scanners.tok import off2int
from uncompyle6.scanners.scanner37 import Scanner37 from uncompyle6.scanners.scanner37 import Scanner37
from uncompyle6.scanners.scanner37base import Scanner37Base from uncompyle6.scanners.scanner37base import Scanner37Base
@@ -34,14 +36,16 @@ JUMP_OPs = opc.JUMP_OPS
class Scanner38(Scanner37): class Scanner38(Scanner37):
def __init__(self, show_asm=None): def __init__(self, show_asm=None, debug="", is_pypy=False):
Scanner37Base.__init__(self, (3, 8), show_asm) Scanner37Base.__init__(self, (3, 8), show_asm, debug, is_pypy)
self.debug = False self.debug = debug
return return
pass pass
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(
self, co, classname=None, code_objects={}, show_asm=None
) -> Tuple[list, dict]:
""" """
Create "tokens" the bytecode of an Python code object. Largely these Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing are the opcode name, but in some cases that has been modified to make parsing
@@ -69,7 +73,7 @@ class Scanner38(Scanner37):
# The value is where the loop ends. In current Python, # The value is where the loop ends. In current Python,
# JUMP_BACKS are always to loops. And blocks are ordered so that the # JUMP_BACKS are always to loops. And blocks are ordered so that the
# JUMP_BACK with the highest offset will be where the range ends. # JUMP_BACK with the highest offset will be where the range ends.
jump_back_targets = {} jump_back_targets: Dict[int, int] = {}
for token in tokens: for token in tokens:
if token.kind == "JUMP_BACK": if token.kind == "JUMP_BACK":
jump_back_targets[token.attr] = token.offset jump_back_targets[token.attr] = token.offset
@@ -88,7 +92,7 @@ class Scanner38(Scanner37):
if offset == next_end: if offset == next_end:
loop_ends.pop() loop_ends.pop()
if self.debug: if self.debug:
print("%sremove loop offset %s" % (" " * len(loop_ends), offset)) print(f"{' ' * len(loop_ends)}remove loop offset {offset}")
pass pass
next_end = ( next_end = (
loop_ends[-1] loop_ends[-1]
@@ -102,13 +106,12 @@ class Scanner38(Scanner37):
next_end = off2int(jump_back_targets[offset], prefer_last=False) next_end = off2int(jump_back_targets[offset], prefer_last=False)
if self.debug: if self.debug:
print( print(
"%sadding loop offset %s ending at %s" f"{' ' * len(loop_ends)}adding loop offset {offset} ending at {next_end}"
% (" " * len(loop_ends), offset, next_end)
) )
loop_ends.append(next_end) loop_ends.append(next_end)
# Turn JUMP opcodes into "BREAK_LOOP" opcodes. # Turn JUMP opcodes into "BREAK_LOOP" opcodes.
# FIXME: this should be replaced by proper control flow. # FIXME!!!!: this should be replaced by proper control flow.
if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE") and len(loop_ends): if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE") and len(loop_ends):
jump_target = token.attr jump_target = token.attr
@@ -162,4 +165,4 @@ if __name__ == "__main__":
print(t.format()) print(t.format())
pass pass
else: else:
print("Need to be Python 3.8 to demo; I am version %s" % version_tuple_to_str()) print(f"Need to be Python 3.8 to demo; I am version {version_tuple_to_str()}.")