Partial sync of 3.7 & 3.8 scanner with decompyle3

This commit is contained in:
rocky
2022-05-14 08:42:04 -04:00
parent 87fb83de08
commit 3a9fa652b4
3 changed files with 27 additions and 28 deletions

View File

@@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Python 3.7 bytecode decompiler scanner
Python 3.7 bytecode decompiler scanner.
Does some additional massaging of xdis-disassembled instructions to
make things easier for decompilation.
@@ -33,10 +33,11 @@ from xdis.opcodes import opcode_37 as opc
# bytecode verification, verify(), uses JUMP_OPS from here
JUMP_OPs = opc.JUMP_OPS
class Scanner37(Scanner37Base):
def __init__(self, show_asm=None, is_pypy: bool=False):
Scanner37Base.__init__(self, (3, 7), show_asm)
self.is_pypy = is_pypy
def __init__(self, show_asm=None, debug="", is_pypy=False):
Scanner37Base.__init__(self, (3, 7), show_asm, debug, is_pypy)
self.debug = debug
return
pass
@@ -139,7 +140,9 @@ class Scanner37(Scanner37Base):
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm)
tokens, customize = Scanner37Base.ingest(
self, co, classname, code_objects, show_asm
)
new_tokens = []
for i, t in enumerate(tokens):
# things that smash new_tokens like BUILD_LIST have to come first.
@@ -179,6 +182,7 @@ class Scanner37(Scanner37Base):
return new_tokens, customize
if __name__ == "__main__":
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
@@ -191,4 +195,4 @@ if __name__ == "__main__":
print(t.format())
pass
else:
print("Need to be Python 3.7 to demo; I am version %s" % version_tuple_to_str())
print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.")

View File

@@ -29,7 +29,7 @@ For example:
Finally we save token information.
"""
from typing import Any, Dict, List, Set
from typing import Any, Dict, List, Set, Tuple
from xdis import iscode, instruction_size, Instruction
from xdis.bytecode import _get_const_info
@@ -48,8 +48,10 @@ globals().update(op3.opmap)
class Scanner37Base(Scanner):
def __init__(self, version, show_asm=None, is_pypy=False):
def __init__(self, version: Tuple[int], show_asm=None, debug="", is_pypy=False):
super(Scanner37Base, self).__init__(version, show_asm, is_pypy)
self.debug = debug
self.is_pypy = is_pypy
# Create opcode classification sets
# Note: super initilization above initializes self.opc
@@ -888,16 +890,6 @@ class Scanner37Base(Scanner):
pass
return
def is_jump_back(self, offset, extended_arg):
"""
Return True if the code at offset is some sort of jump back.
That is, it is ether "JUMP_FORWARD" or an absolute jump that
goes forward.
"""
if self.code[offset] != self.opc.JUMP_ABSOLUTE:
return False
return offset > self.get_target(offset, extended_arg)
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:

View File

@@ -22,6 +22,8 @@ This sets up opcodes Python's 3.8 and calls a generalized
scanner routine for Python 3.7 and up.
"""
from typing import Dict, Tuple
from uncompyle6.scanners.tok import off2int
from uncompyle6.scanners.scanner37 import Scanner37
from uncompyle6.scanners.scanner37base import Scanner37Base
@@ -34,14 +36,16 @@ JUMP_OPs = opc.JUMP_OPS
class Scanner38(Scanner37):
def __init__(self, show_asm=None):
Scanner37Base.__init__(self, (3, 8), show_asm)
self.debug = False
def __init__(self, show_asm=None, debug="", is_pypy=False):
Scanner37Base.__init__(self, (3, 8), show_asm, debug, is_pypy)
self.debug = debug
return
pass
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
def ingest(
self, co, classname=None, code_objects={}, show_asm=None
) -> Tuple[list, dict]:
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
@@ -69,7 +73,7 @@ class Scanner38(Scanner37):
# The value is where the loop ends. In current Python,
# JUMP_BACKS are always to loops. And blocks are ordered so that the
# JUMP_BACK with the highest offset will be where the range ends.
jump_back_targets = {}
jump_back_targets: Dict[int, int] = {}
for token in tokens:
if token.kind == "JUMP_BACK":
jump_back_targets[token.attr] = token.offset
@@ -88,7 +92,7 @@ class Scanner38(Scanner37):
if offset == next_end:
loop_ends.pop()
if self.debug:
print("%sremove loop offset %s" % (" " * len(loop_ends), offset))
print(f"{' ' * len(loop_ends)}remove loop offset {offset}")
pass
next_end = (
loop_ends[-1]
@@ -102,13 +106,12 @@ class Scanner38(Scanner37):
next_end = off2int(jump_back_targets[offset], prefer_last=False)
if self.debug:
print(
"%sadding loop offset %s ending at %s"
% (" " * len(loop_ends), offset, next_end)
f"{' ' * len(loop_ends)}adding loop offset {offset} ending at {next_end}"
)
loop_ends.append(next_end)
# Turn JUMP opcodes into "BREAK_LOOP" opcodes.
# FIXME: this should be replaced by proper control flow.
# FIXME!!!!: this should be replaced by proper control flow.
if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE") and len(loop_ends):
jump_target = token.attr
@@ -162,4 +165,4 @@ if __name__ == "__main__":
print(t.format())
pass
else:
print("Need to be Python 3.8 to demo; I am version %s" % version_tuple_to_str())
print(f"Need to be Python 3.8 to demo; I am version {version_tuple_to_str()}.")