3.6+ extended arg handling; sync with decompyle3..

Use 3.8 scanner now.

TODO: Need to investigate what's up with 3.7/01_extended_arg.py
This commit is contained in:
rocky
2020-01-23 13:35:22 -05:00
parent eeb48818f3
commit 28a80a0132
4 changed files with 88 additions and 38 deletions

View File

@@ -7,7 +7,7 @@ SKIP_TESTS=(
[test_baseexception.py]=1 # [test_baseexception.py]=1 #
[test_bdb.py]=1 # [test_bdb.py]=1 #
[test_buffer.py]=1 # parse error [test_buffer.py]=1 # parse error
[test_builtin.py]=1 # parser error [test_builtin.py]=1 # parse error
[test_clinic.py]=1 # it fails on its own [test_clinic.py]=1 # it fails on its own
[test_cmath.py]=1 # test assertion failure [test_cmath.py]=1 # test assertion failure
[test_cmd_line.py]=1 # Interactive? [test_cmd_line.py]=1 # Interactive?

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2019 by Rocky Bernstein # Copyright (c) 2019-2020 by Rocky Bernstein
# #
# This program is free software: you can redistribute it and/or modify # This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
@@ -12,15 +12,17 @@
# #
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Python 3.8 bytecode decompiler scanner """
Python 3.8 bytecode decompiler scanner.
Does some token massaging of xdis-disassembled instructions to make Does some additional massaging of xdis-disassembled instructions to
things easier for decompilation. make things easier for decompilation.
This sets up opcodes Python's 3.8 and calls a generalized This sets up opcodes Python's 3.8 and calls a generalized
scanner routine for Python 3.7 and up. scanner routine for Python 3.7 and up.
""" """
from uncompyle6.scanners.tok import off2int
from uncompyle6.scanners.scanner37 import Scanner37 from uncompyle6.scanners.scanner37 import Scanner37
from uncompyle6.scanners.scanner37base import Scanner37Base from uncompyle6.scanners.scanner37base import Scanner37Base
@@ -34,6 +36,7 @@ JUMP_OPs = opc.JUMP_OPS
class Scanner38(Scanner37): class Scanner38(Scanner37):
def __init__(self, show_asm=None): def __init__(self, show_asm=None):
Scanner37Base.__init__(self, 3.8, show_asm) Scanner37Base.__init__(self, 3.8, show_asm)
self.debug = False
return return
pass pass
@@ -42,30 +45,77 @@ class Scanner38(Scanner37):
tokens, customize = super(Scanner38, self).ingest( tokens, customize = super(Scanner38, self).ingest(
co, classname, code_objects, show_asm co, classname, code_objects, show_asm
) )
# Hacky way to detect loop ranges.
# The key in jump_back_targets is the start of the loop.
# The value is where the loop ends. In current Python,
# JUMP_BACKS are always to loops. And blocks are ordered so that the
# JUMP_BACK with the highest offset will be where the range ends.
jump_back_targets = {}
for token in tokens:
if token.kind == "JUMP_BACK":
jump_back_targets[token.attr] = token.offset
pass
pass
if self.debug and jump_back_targets:
print(jump_back_targets)
loop_ends: List[int] = []
next_end = tokens[len(tokens)-1].off2int() + 10
for i, token in enumerate(tokens): for i, token in enumerate(tokens):
opname = token.kind opname = token.kind
if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE"): offset = token.offset
# Turn JUMPs into BREAK_LOOP if offset == next_end:
loop_ends.pop()
if self.debug:
print(f"{' ' * len(loop_ends)}remove loop offset {offset}")
pass
next_end = loop_ends[-1] if len(loop_ends) else tokens[len(tokens)-1].off2int() + 10
if offset in jump_back_targets:
next_end = off2int(jump_back_targets[offset], prefer_last=False)
if self.debug:
print(f"{' ' * len(loop_ends)}adding loop offset {offset} ending at {next_end}")
loop_ends.append(next_end)
# Turn JUMP opcodes into "BREAK_LOOP" opcodes.
# FIXME: this should be replaced by proper control flow.
if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE") and len(loop_ends):
jump_target = token.attr jump_target = token.attr
if opname == "JUMP_ABSOLUTE" and token.offset >= jump_target: if opname == "JUMP_ABSOLUTE" and jump_target <= next_end:
# Not a forward jump, so continue # Not a forward-enough jump to break out of the next loop, so continue.
# FIXME: Do we need "continue" detection? # FIXME: Do we need "continue" detection?
continue continue
# We also want to avoid confusing BREAK_LOOPS with parts of the
# grammar rules for loops. (Perhaps we should change the grammar.)
# Try to find an adjacent JUMP_BACK which is part of the normal loop end.
if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK": if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK":
# Sometimes the jump back is *after* the break... # Sometimes the jump back is after the "break" instruction..
jump_back_index = i + 1 jump_back_index = i + 1
else: else:
# and sometimes it is *before* where we jumped to. # and sometimes, because of jump-to-jump optimization, it is before the
# jump target instruction.
jump_back_index = self.offset2tok_index[jump_target] - 1 jump_back_index = self.offset2tok_index[jump_target] - 1
while tokens[jump_back_index].kind.startswith("COME_FROM_"): while tokens[jump_back_index].kind.startswith("COME_FROM_"):
jump_back_index -= 1 jump_back_index -= 1
pass pass
pass pass
jump_back_token = tokens[jump_back_index] jump_back_token = tokens[jump_back_index]
if (
# Is this a forward jump not next to a JUMP_BACK ? ...
break_loop = (
token.linestart
and jump_back_token != "JUMP_BACK"
)
# or if there is looping jump back, then that loop
# should start before where the "break" instruction sits.
if break_loop or (
jump_back_token == "JUMP_BACK" jump_back_token == "JUMP_BACK"
and jump_back_token.attr < token.offset and jump_back_token.attr < token.off2int()
): ):
token.kind = "BREAK_LOOP" token.kind = "BREAK_LOOP"
pass pass

View File

@@ -22,6 +22,28 @@ if PYTHON3:
intern = sys.intern intern = sys.intern
def off2int(offset, prefer_last=True):
if isinstance(offset, int):
return offset
else:
assert isinstance(offset, str)
offsets = list(map(int, offset.split("_")))
if len(offsets) == 1:
return offsets[0]
else:
assert len(offsets) == 2
offset_1, offset_2 = offsets
if offset_1 + 2 == offset_2:
# This is an instruction with an extended arg.
# For things that compare against offsets, we generally want the
# later offset.
return offset_2 if prefer_last else offset_1
else:
# Probably a "COME_FROM"-type offset, where the second number
# is just a count, and not really an offset.
return offset_1
class Token: class Token:
""" """
Class representing a byte-code instruction. Class representing a byte-code instruction.
@@ -44,14 +66,14 @@ class Token:
op=None, op=None,
has_arg=None, has_arg=None,
opc=None, opc=None,
has_extended_arg=False has_extended_arg=False,
): ):
self.kind = intern(opname) self.kind = intern(opname)
self.has_arg = has_arg self.has_arg = has_arg
self.attr = attr self.attr = attr
self.pattr = pattr self.pattr = pattr
if has_extended_arg: if has_extended_arg:
self.offset = "%d_%d" % (offset, offset+2) self.offset = "%d_%d" % (offset, offset + 2)
else: else:
self.offset = offset self.offset = offset
@@ -165,29 +187,7 @@ class Token:
raise IndexError raise IndexError
def off2int(self, prefer_last=True): def off2int(self, prefer_last=True):
if isinstance(self.offset, int): return off2int(self.offset)
return self.offset
else:
assert isinstance(self.offset, str)
offsets = list(map(int, self.offset.split("_")))
if len(offsets) == 1:
return offsets[0]
else:
assert len(offsets) == 2
offset_1, offset_2 = offsets
if offset_1 + 2 == offset_2:
# This is an instruction with an extended arg.
# For things that compare against offsets, we generally want the
# later offset.
if prefer_last:
return offset_2
else:
return offset_1
else:
# Probably a "COME_FROM"-type offset, where the second number
# is just a count, and not really an offset.
return offset_1
return(int(self.offset.split("_")[0]))
NoneToken = Token("LOAD_CONST", offset=-1, attr=None, pattr=None) NoneToken = Token("LOAD_CONST", offset=-1, attr=None, pattr=None)