You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-04 01:09:52 +08:00
3.6+ extended arg handling; sync with decompyle3..
Use 3.8 scanner now. TODO: Need to investigate what's up with 3.7/01_extended_arg.py
This commit is contained in:
@@ -7,7 +7,7 @@ SKIP_TESTS=(
|
||||
[test_baseexception.py]=1 #
|
||||
[test_bdb.py]=1 #
|
||||
[test_buffer.py]=1 # parse error
|
||||
[test_builtin.py]=1 # parser error
|
||||
[test_builtin.py]=1 # parse error
|
||||
[test_clinic.py]=1 # it fails on its own
|
||||
[test_cmath.py]=1 # test assertion failure
|
||||
[test_cmd_line.py]=1 # Interactive?
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2019 by Rocky Bernstein
|
||||
# Copyright (c) 2019-2020 by Rocky Bernstein
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@@ -12,15 +12,17 @@
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Python 3.8 bytecode decompiler scanner
|
||||
"""
|
||||
Python 3.8 bytecode decompiler scanner.
|
||||
|
||||
Does some token massaging of xdis-disassembled instructions to make
|
||||
things easier for decompilation.
|
||||
Does some additional massaging of xdis-disassembled instructions to
|
||||
make things easier for decompilation.
|
||||
|
||||
This sets up opcodes Python's 3.8 and calls a generalized
|
||||
scanner routine for Python 3.7 and up.
|
||||
"""
|
||||
|
||||
from uncompyle6.scanners.tok import off2int
|
||||
from uncompyle6.scanners.scanner37 import Scanner37
|
||||
from uncompyle6.scanners.scanner37base import Scanner37Base
|
||||
|
||||
@@ -34,6 +36,7 @@ JUMP_OPs = opc.JUMP_OPS
|
||||
class Scanner38(Scanner37):
|
||||
def __init__(self, show_asm=None):
|
||||
Scanner37Base.__init__(self, 3.8, show_asm)
|
||||
self.debug = False
|
||||
return
|
||||
|
||||
pass
|
||||
@@ -42,30 +45,77 @@ class Scanner38(Scanner37):
|
||||
tokens, customize = super(Scanner38, self).ingest(
|
||||
co, classname, code_objects, show_asm
|
||||
)
|
||||
|
||||
# Hacky way to detect loop ranges.
|
||||
# The key in jump_back_targets is the start of the loop.
|
||||
# The value is where the loop ends. In current Python,
|
||||
# JUMP_BACKS are always to loops. And blocks are ordered so that the
|
||||
# JUMP_BACK with the highest offset will be where the range ends.
|
||||
jump_back_targets = {}
|
||||
for token in tokens:
|
||||
if token.kind == "JUMP_BACK":
|
||||
jump_back_targets[token.attr] = token.offset
|
||||
pass
|
||||
pass
|
||||
|
||||
if self.debug and jump_back_targets:
|
||||
print(jump_back_targets)
|
||||
loop_ends: List[int] = []
|
||||
next_end = tokens[len(tokens)-1].off2int() + 10
|
||||
for i, token in enumerate(tokens):
|
||||
opname = token.kind
|
||||
if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE"):
|
||||
# Turn JUMPs into BREAK_LOOP
|
||||
offset = token.offset
|
||||
if offset == next_end:
|
||||
loop_ends.pop()
|
||||
if self.debug:
|
||||
print(f"{' ' * len(loop_ends)}remove loop offset {offset}")
|
||||
pass
|
||||
next_end = loop_ends[-1] if len(loop_ends) else tokens[len(tokens)-1].off2int() + 10
|
||||
|
||||
if offset in jump_back_targets:
|
||||
next_end = off2int(jump_back_targets[offset], prefer_last=False)
|
||||
if self.debug:
|
||||
print(f"{' ' * len(loop_ends)}adding loop offset {offset} ending at {next_end}")
|
||||
loop_ends.append(next_end)
|
||||
|
||||
# Turn JUMP opcodes into "BREAK_LOOP" opcodes.
|
||||
# FIXME: this should be replaced by proper control flow.
|
||||
if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE") and len(loop_ends):
|
||||
jump_target = token.attr
|
||||
|
||||
if opname == "JUMP_ABSOLUTE" and token.offset >= jump_target:
|
||||
# Not a forward jump, so continue
|
||||
if opname == "JUMP_ABSOLUTE" and jump_target <= next_end:
|
||||
# Not a forward-enough jump to break out of the next loop, so continue.
|
||||
# FIXME: Do we need "continue" detection?
|
||||
continue
|
||||
|
||||
# We also want to avoid confusing BREAK_LOOPS with parts of the
|
||||
# grammar rules for loops. (Perhaps we should change the grammar.)
|
||||
# Try to find an adjacent JUMP_BACK which is part of the normal loop end.
|
||||
|
||||
if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK":
|
||||
# Sometimes the jump back is *after* the break...
|
||||
# Sometimes the jump back is after the "break" instruction..
|
||||
jump_back_index = i + 1
|
||||
else:
|
||||
# and sometimes it is *before* where we jumped to.
|
||||
# and sometimes, because of jump-to-jump optimization, it is before the
|
||||
# jump target instruction.
|
||||
jump_back_index = self.offset2tok_index[jump_target] - 1
|
||||
while tokens[jump_back_index].kind.startswith("COME_FROM_"):
|
||||
jump_back_index -= 1
|
||||
pass
|
||||
pass
|
||||
jump_back_token = tokens[jump_back_index]
|
||||
if (
|
||||
|
||||
# Is this a forward jump not next to a JUMP_BACK ? ...
|
||||
break_loop = (
|
||||
token.linestart
|
||||
and jump_back_token != "JUMP_BACK"
|
||||
)
|
||||
|
||||
# or if there is looping jump back, then that loop
|
||||
# should start before where the "break" instruction sits.
|
||||
if break_loop or (
|
||||
jump_back_token == "JUMP_BACK"
|
||||
and jump_back_token.attr < token.offset
|
||||
and jump_back_token.attr < token.off2int()
|
||||
):
|
||||
token.kind = "BREAK_LOOP"
|
||||
pass
|
||||
|
@@ -22,6 +22,28 @@ if PYTHON3:
|
||||
intern = sys.intern
|
||||
|
||||
|
||||
def off2int(offset, prefer_last=True):
|
||||
if isinstance(offset, int):
|
||||
return offset
|
||||
else:
|
||||
assert isinstance(offset, str)
|
||||
offsets = list(map(int, offset.split("_")))
|
||||
if len(offsets) == 1:
|
||||
return offsets[0]
|
||||
else:
|
||||
assert len(offsets) == 2
|
||||
offset_1, offset_2 = offsets
|
||||
if offset_1 + 2 == offset_2:
|
||||
# This is an instruction with an extended arg.
|
||||
# For things that compare against offsets, we generally want the
|
||||
# later offset.
|
||||
return offset_2 if prefer_last else offset_1
|
||||
else:
|
||||
# Probably a "COME_FROM"-type offset, where the second number
|
||||
# is just a count, and not really an offset.
|
||||
return offset_1
|
||||
|
||||
|
||||
class Token:
|
||||
"""
|
||||
Class representing a byte-code instruction.
|
||||
@@ -44,7 +66,7 @@ class Token:
|
||||
op=None,
|
||||
has_arg=None,
|
||||
opc=None,
|
||||
has_extended_arg=False
|
||||
has_extended_arg=False,
|
||||
):
|
||||
self.kind = intern(opname)
|
||||
self.has_arg = has_arg
|
||||
@@ -165,29 +187,7 @@ class Token:
|
||||
raise IndexError
|
||||
|
||||
def off2int(self, prefer_last=True):
|
||||
if isinstance(self.offset, int):
|
||||
return self.offset
|
||||
else:
|
||||
assert isinstance(self.offset, str)
|
||||
offsets = list(map(int, self.offset.split("_")))
|
||||
if len(offsets) == 1:
|
||||
return offsets[0]
|
||||
else:
|
||||
assert len(offsets) == 2
|
||||
offset_1, offset_2 = offsets
|
||||
if offset_1 + 2 == offset_2:
|
||||
# This is an instruction with an extended arg.
|
||||
# For things that compare against offsets, we generally want the
|
||||
# later offset.
|
||||
if prefer_last:
|
||||
return offset_2
|
||||
else:
|
||||
return offset_1
|
||||
else:
|
||||
# Probably a "COME_FROM"-type offset, where the second number
|
||||
# is just a count, and not really an offset.
|
||||
return offset_1
|
||||
return(int(self.offset.split("_")[0]))
|
||||
return off2int(self.offset)
|
||||
|
||||
|
||||
NoneToken = Token("LOAD_CONST", offset=-1, attr=None, pattr=None)
|
||||
|
Reference in New Issue
Block a user