You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-04 01:09:52 +08:00
Simplify BREAK_LOOP detection...
by making more us of linestart. At least for now...
This commit is contained in:
@@ -495,7 +495,8 @@ class Scanner2(Scanner):
|
|||||||
|
|
||||||
if show_asm in ("both", "after"):
|
if show_asm in ("both", "after"):
|
||||||
print("\n# ---- tokenization:")
|
print("\n# ---- tokenization:")
|
||||||
for t in new_tokens:
|
# FIXME: t.format() is changing tokens!
|
||||||
|
for t in new_tokens.copy():
|
||||||
print(t.format(line_prefix=""))
|
print(t.format(line_prefix=""))
|
||||||
print()
|
print()
|
||||||
return new_tokens, customize
|
return new_tokens, customize
|
||||||
|
@@ -353,7 +353,8 @@ class Scanner26(scan.Scanner2):
|
|||||||
|
|
||||||
if show_asm in ("both", "after"):
|
if show_asm in ("both", "after"):
|
||||||
print("\n# ---- tokenization:")
|
print("\n# ---- tokenization:")
|
||||||
for t in tokens:
|
# FIXME: t.format() is changing tokens!
|
||||||
|
for t in tokens.copy():
|
||||||
print(t.format(line_prefix=""))
|
print(t.format(line_prefix=""))
|
||||||
print()
|
print()
|
||||||
return tokens, customize
|
return tokens, customize
|
||||||
|
@@ -797,7 +797,8 @@ class Scanner3(Scanner):
|
|||||||
|
|
||||||
if show_asm in ("both", "after"):
|
if show_asm in ("both", "after"):
|
||||||
print("\n# ---- tokenization:")
|
print("\n# ---- tokenization:")
|
||||||
for t in new_tokens:
|
# FIXME: t.format() is changing tokens!
|
||||||
|
for t in new_tokens.copy():
|
||||||
print(t.format(line_prefix=""))
|
print(t.format(line_prefix=""))
|
||||||
print()
|
print()
|
||||||
return new_tokens, customize
|
return new_tokens, customize
|
||||||
|
@@ -228,13 +228,13 @@ class Scanner37Base(Scanner):
|
|||||||
|
|
||||||
if show_asm in ("both", "before"):
|
if show_asm in ("both", "before"):
|
||||||
print("\n# ---- disassembly:")
|
print("\n# ---- disassembly:")
|
||||||
self.insts = bytecode.disassemble_bytes(
|
bytecode.disassemble_bytes(
|
||||||
co.co_code,
|
co.co_code,
|
||||||
varnames=co.co_varnames,
|
varnames=co.co_varnames,
|
||||||
names=co.co_names,
|
names=co.co_names,
|
||||||
constants=co.co_consts,
|
constants=co.co_consts,
|
||||||
cells=bytecode._cell_names,
|
cells=bytecode._cell_names,
|
||||||
linestarts=bytecode._linestarts,
|
line_starts=bytecode._linestarts,
|
||||||
asm_format="extended",
|
asm_format="extended",
|
||||||
filename=co.co_filename,
|
filename=co.co_filename,
|
||||||
show_source=True,
|
show_source=True,
|
||||||
@@ -481,12 +481,17 @@ class Scanner37Base(Scanner):
|
|||||||
next_opname = self.insts[i + 1].opname
|
next_opname = self.insts[i + 1].opname
|
||||||
|
|
||||||
# 'Continue's include jumps to loops that are not
|
# 'Continue's include jumps to loops that are not
|
||||||
# and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
|
# and the end of a block which follow with
|
||||||
# If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
|
# POP_BLOCK and COME_FROM_LOOP. If the
|
||||||
# then we'll take it as a "continue".
|
# JUMP_ABSOLUTE is to a FOR_ITER, and it is
|
||||||
is_continue = (
|
# followed by another JUMP_FORWARD then we'll take
|
||||||
self.insts[self.offset2inst_index[target]].opname == "FOR_ITER"
|
# it as a "continue".
|
||||||
and self.insts[i + 1].opname == "JUMP_FORWARD"
|
next_inst = self.insts[i + 1]
|
||||||
|
is_continue = self.insts[
|
||||||
|
self.offset2inst_index[target]
|
||||||
|
].opname == "FOR_ITER" and next_inst.opname in (
|
||||||
|
"JUMP_FORWARD",
|
||||||
|
"JUMP_ABSOLUTE",
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.version < (3, 8) and (
|
if self.version < (3, 8) and (
|
||||||
@@ -501,21 +506,65 @@ class Scanner37Base(Scanner):
|
|||||||
):
|
):
|
||||||
opname = "CONTINUE"
|
opname = "CONTINUE"
|
||||||
else:
|
else:
|
||||||
|
# "continue" versus "break_loop" dectction is more complicated
|
||||||
|
# because "continue" to an outer loop is really a "break loop"
|
||||||
opname = "JUMP_BACK"
|
opname = "JUMP_BACK"
|
||||||
|
|
||||||
# FIXME: this is a hack to catch stuff like:
|
# FIXME: this is a hack to catch stuff like:
|
||||||
# if x: continue
|
# if x: continue
|
||||||
# the "continue" is not on a new line.
|
# the "continue" is not on a new line.
|
||||||
# There are other situations where we don't catch
|
#
|
||||||
# CONTINUE as well.
|
# Another situation is where we have
|
||||||
if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval:
|
# for method in methods:
|
||||||
|
# for B in method:
|
||||||
|
# if c:
|
||||||
|
# return
|
||||||
|
# break # A "continue" but not the innermost one
|
||||||
|
if tokens[-1].kind == "JUMP_LOOP" and tokens[-1].attr <= argval:
|
||||||
if tokens[-2].kind == "BREAK_LOOP":
|
if tokens[-2].kind == "BREAK_LOOP":
|
||||||
del tokens[-1]
|
del tokens[-1]
|
||||||
|
j -= 1
|
||||||
|
else:
|
||||||
|
# "intern" is used because we are
|
||||||
|
# changing the *previous* token. A
|
||||||
|
# POP_TOP suggests a "break" rather
|
||||||
|
# than a "continue"?
|
||||||
|
if tokens[-2] == "POP_TOP" and (
|
||||||
|
is_continue and next_inst.argval != tokens[-1].attr
|
||||||
|
):
|
||||||
|
tokens[-1].kind = sys.intern("BREAK_LOOP")
|
||||||
else:
|
else:
|
||||||
# intern is used because we are changing the *previous* token
|
|
||||||
tokens[-1].kind = sys.intern("CONTINUE")
|
tokens[-1].kind = sys.intern("CONTINUE")
|
||||||
if last_op_was_break and opname == "CONTINUE":
|
last_continue = tokens[-1]
|
||||||
last_op_was_break = False
|
pass
|
||||||
continue
|
pass
|
||||||
|
pass
|
||||||
|
# elif (
|
||||||
|
# last_continue is not None
|
||||||
|
# and tokens[-1].kind == "JUMP_LOOP"
|
||||||
|
# and last_continue.attr <= tokens[-1].attr
|
||||||
|
# and last_continue.offset > tokens[-1].attr
|
||||||
|
# ):
|
||||||
|
# # Handle mis-characterized "CONTINUE"
|
||||||
|
# # We have a situation like:
|
||||||
|
# # loop ... for or while)
|
||||||
|
# # loop
|
||||||
|
# # if ...: # code below starts here
|
||||||
|
# # break # not continue
|
||||||
|
# #
|
||||||
|
# # POP_JUMP_IF_FALSE_LOOP # to outer loop
|
||||||
|
# # JUMP_LOOP # to inner loop
|
||||||
|
# # ...
|
||||||
|
# # JUMP_LOOP # to outer loop
|
||||||
|
# tokens[-2].kind = sys.intern("BREAK_LOOP")
|
||||||
|
# pass
|
||||||
|
|
||||||
|
# if last_op_was_break and opname == "CONTINUE":
|
||||||
|
# last_op_was_break = False
|
||||||
|
# continue
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
opname = "JUMP_FORWARD"
|
||||||
|
|
||||||
elif inst.offset in self.load_asserts:
|
elif inst.offset in self.load_asserts:
|
||||||
opname = "LOAD_ASSERT"
|
opname = "LOAD_ASSERT"
|
||||||
@@ -538,9 +587,10 @@ class Scanner37Base(Scanner):
|
|||||||
)
|
)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if show_asm in ("both", "after"):
|
if show_asm in ("both", "after") and self.version < (3, 8):
|
||||||
print("\n# ---- tokenization:")
|
print("\n# ---- tokenization:")
|
||||||
for t in tokens:
|
# FIXME: t.format() is changing tokens!
|
||||||
|
for t in tokens.copy():
|
||||||
print(t.format(line_prefix=""))
|
print(t.format(line_prefix=""))
|
||||||
print()
|
print()
|
||||||
return tokens, customize
|
return tokens, customize
|
||||||
|
@@ -24,13 +24,13 @@ scanner routine for Python 3.7 and up.
|
|||||||
|
|
||||||
from typing import Dict, Tuple
|
from typing import Dict, Tuple
|
||||||
|
|
||||||
from uncompyle6.scanners.tok import off2int
|
|
||||||
from uncompyle6.scanners.scanner37 import Scanner37
|
|
||||||
from uncompyle6.scanners.scanner37base import Scanner37Base
|
|
||||||
|
|
||||||
# bytecode verification, verify(), uses JUMP_OPs from here
|
# bytecode verification, verify(), uses JUMP_OPs from here
|
||||||
from xdis.opcodes import opcode_38 as opc
|
from xdis.opcodes import opcode_38 as opc
|
||||||
|
|
||||||
|
from uncompyle6.scanners.scanner37 import Scanner37
|
||||||
|
from uncompyle6.scanners.scanner37base import Scanner37Base
|
||||||
|
from uncompyle6.scanners.tok import off2int
|
||||||
|
|
||||||
# bytecode verification, verify(), uses JUMP_OPS from here
|
# bytecode verification, verify(), uses JUMP_OPS from here
|
||||||
JUMP_OPs = opc.JUMP_OPS
|
JUMP_OPs = opc.JUMP_OPS
|
||||||
|
|
||||||
@@ -121,35 +121,26 @@ class Scanner38(Scanner37):
|
|||||||
new_tokens.append(token)
|
new_tokens.append(token)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# We also want to avoid confusing BREAK_LOOPS with parts of the
|
j = i
|
||||||
# grammar rules for loops. (Perhaps we should change the grammar.)
|
while tokens[j - 1] in ("POP_TOP", "POP_BLOCK", "POP_EXCEPT"):
|
||||||
# Try to find an adjacent JUMP_BACK which is part of the normal loop end.
|
j -= 1
|
||||||
|
if tokens[j].linestart:
|
||||||
|
break
|
||||||
|
token_with_linestart = tokens[j]
|
||||||
|
|
||||||
if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK":
|
if token_with_linestart.linestart:
|
||||||
# Sometimes the jump back is after the "break" instruction..
|
|
||||||
jump_back_index = i + 1
|
|
||||||
else:
|
|
||||||
# and sometimes, because of jump-to-jump optimization, it is before the
|
|
||||||
# jump target instruction.
|
|
||||||
jump_back_index = self.offset2tok_index[jump_target] - 1
|
|
||||||
while tokens[jump_back_index].kind.startswith("COME_FROM_"):
|
|
||||||
jump_back_index -= 1
|
|
||||||
pass
|
|
||||||
pass
|
|
||||||
jump_back_token = tokens[jump_back_index]
|
|
||||||
|
|
||||||
# Is this a forward jump not next to a JUMP_BACK ? ...
|
|
||||||
break_loop = token.linestart and jump_back_token != "JUMP_BACK"
|
|
||||||
|
|
||||||
# or if there is looping jump back, then that loop
|
|
||||||
# should start before where the "break" instruction sits.
|
|
||||||
if break_loop or (
|
|
||||||
jump_back_token == "JUMP_BACK"
|
|
||||||
and jump_back_token.attr < token.off2int()
|
|
||||||
):
|
|
||||||
token.kind = "BREAK_LOOP"
|
token.kind = "BREAK_LOOP"
|
||||||
|
|
||||||
pass
|
pass
|
||||||
new_tokens.append(token)
|
new_tokens.append(token)
|
||||||
|
|
||||||
|
if show_asm in ("both", "after"):
|
||||||
|
print("\n# ---- tokenization:")
|
||||||
|
# FIXME: t.format() is changing tokens!
|
||||||
|
for t in new_tokens.copy():
|
||||||
|
print(t.format(line_prefix=""))
|
||||||
|
print()
|
||||||
|
|
||||||
return new_tokens, customize
|
return new_tokens, customize
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user