Merge branch 'python-3.0-to-3.2' into python-2.4-to-2.7

This commit is contained in:
rocky
2024-07-15 10:11:20 -04:00
19 changed files with 324 additions and 76 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,4 @@
# Next line is 1164
def foo():
name = "bar"
lambda x: compile(x, "<register %s's commit>" % name, "exec") if x else None

View File

@@ -0,0 +1,10 @@
# Adapted 3.5 from _bootstrap_external.py
def spec_from_file_location(loader, location):
if loader:
for _ in __file__:
if location:
break
else:
return None

View File

@@ -0,0 +1,74 @@
# From https://github.com/rocky/python-uncompyle6/issues/420
# Related to EXTENDED_ARG in whilestmt
ERRPR_CODE_DEFINE = {} # Remove this and things works
try:
print()
except Exception:
var1 = 0
var2 = 1
if var1 or var2:
times = 1
while times != False and self.scanner.is_open():
try:
try:
print()
except Exception:
print()
out = 0
count = 1
if out == 1:
break
elif out == 2:
count += 1
if times == 3:
self.func.emit({})
break
else:
continue
if out == 3 or out == b"":
if self.times == 3:
break
count += 1
if count == 3:
count = 0
if out == 4:
self.func.emit(ERRPR_CODE_DEFINE.ReceiedError())
else:
print()
break
continue
else:
count = 0
except Exception:
print("upper exception")
else:
try:
print("jump forward")
while True:
out = self.func.read(count)
if out == b"":
self.func.emit(ERRPR_CODE_DEFINE.ReceiedError())
break
continue
imagedata = out[0]
if imagedata == b"\x05":
self.func.emit(INFORMATION.UnsupportedImage())
break
continue
if imagedata == b"\x15":
self.func.emit(INFORMATION.NoneImage())
break
continue
if out[1] == False:
start_index = imagedata.find(b"BM6")
self.func.emit(imagedata[start_index:], False)
continue
(imagedata, all_code) = imagedata
self.func.emit({})
self.func.emit({})
self.func.emit({}) # remove {} and this works
break
except Exception:
pass

View File

@@ -53,6 +53,10 @@ class Python34Parser(Python33Parser):
_ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM _ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM
genexpr_func ::= LOAD_ARG _come_froms FOR_ITER store comp_iter JUMP_BACK genexpr_func ::= LOAD_ARG _come_froms FOR_ITER store comp_iter JUMP_BACK
if_exp_lambda ::= expr jmp_false expr return_if_lambda come_froms return_stmt_lambda LAMBDA_MARKER
return_if_lambda ::= RETURN_END_IF_LAMBDA come_froms
return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK
""" """
def customize_grammar_rules(self, tokens, customize): def customize_grammar_rules(self, tokens, customize):

View File

@@ -107,7 +107,6 @@ class Python35Parser(Python34Parser):
# Python 3.5+ does jump optimization # Python 3.5+ does jump optimization
# In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE.
return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK
return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM
return ::= return_expr RETURN_END_IF return ::= return_expr RETURN_END_IF

View File

@@ -52,6 +52,8 @@ class Python36Parser(Python35Parser):
for_block ::= l_stmts_opt come_from_loops JUMP_BACK for_block ::= l_stmts_opt come_from_loops JUMP_BACK
come_from_loops ::= COME_FROM_LOOP* come_from_loops ::= COME_FROM_LOOP*
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt
JUMP_BACK come_froms POP_BLOCK
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt whilestmt ::= SETUP_LOOP testexpr l_stmts_opt
JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt whilestmt ::= SETUP_LOOP testexpr l_stmts_opt

View File

@@ -491,7 +491,8 @@ class Scanner2(Scanner):
if show_asm in ("both", "after"): if show_asm in ("both", "after"):
print("\n# ---- tokenization:") print("\n# ---- tokenization:")
for t in new_tokens: # FIXME: t.format() is changing tokens!
for t in new_tokens.copy():
print(t.format(line_prefix="")) print(t.format(line_prefix=""))
print() print()
return new_tokens, customize return new_tokens, customize

View File

@@ -349,7 +349,8 @@ class Scanner26(Scanner2):
if show_asm in ("both", "after"): if show_asm in ("both", "after"):
print("\n# ---- tokenization:") print("\n# ---- tokenization:")
for t in tokens: # FIXME: t.format() is changing tokens!
for t in tokens.copy():
print(t.format(line_prefix="")) print(t.format(line_prefix=""))
print() print()
return tokens, customize return tokens, customize

View File

@@ -203,7 +203,7 @@ class Scanner3(Scanner):
self, insts, next_tokens, inst, t, i, collection_type self, insts, next_tokens, inst, t, i, collection_type
): ):
""" """
Try to a replace sequence of instruction that ends with a Try to replace a sequence of instruction that ends with a
BUILD_xxx with a sequence that can be parsed much faster, but BUILD_xxx with a sequence that can be parsed much faster, but
inserting the token boundary at the beginning of the sequence. inserting the token boundary at the beginning of the sequence.
""" """
@@ -285,7 +285,7 @@ class Scanner3(Scanner):
) )
return new_tokens return new_tokens
def bound_map_from_inst(self, insts, next_tokens, inst, t, i): def bound_map_from_inst(self, insts, next_tokens, t, i):
""" """
Try to a sequence of instruction that ends with a BUILD_MAP into Try to a sequence of instruction that ends with a BUILD_MAP into
a sequence that can be parsed much faster, but inserting the a sequence that can be parsed much faster, but inserting the
@@ -300,25 +300,19 @@ class Scanner3(Scanner):
if count < 5: if count < 5:
return None return None
if self.version >= (3, 5): # Newer Python BUILD_MAP argument's count is a
# Newer Python BUILD_MAP argument's count is a # key and value pair so it is multiplied by two.
# key and value pair so it is multiplied by two. collection_start = i - (count * 2)
collection_start = i - (count * 2) assert (count * 2) <= i
assert (count * 2) <= i
for j in range(collection_start, i, 2): for j in range(collection_start, i, 2):
if insts[j].opname not in ("LOAD_CONST",): if insts[j].opname not in ("LOAD_CONST",):
return None return None
if insts[j + 1].opname not in ("LOAD_CONST",): if insts[j + 1].opname not in ("LOAD_CONST",):
return None return None
collection_start = i - (2 * count) collection_start = i - (2 * count)
collection_enum = CONST_COLLECTIONS.index("CONST_MAP") collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
# else: Older Python count is sum of all key and value pairs
# Each pair is added individually like:
# LOAD_CONST ("Max-Age")
# LOAD_CONST ("max-age")
# STORE_MAP
# If we get here, all instructions before tokens[i] are LOAD_CONST and # If we get here, all instructions before tokens[i] are LOAD_CONST and
# we can replace add a boundary marker and change LOAD_CONST to # we can replace add a boundary marker and change LOAD_CONST to
@@ -331,7 +325,7 @@ class Scanner3(Scanner):
attr=collection_enum, attr=collection_enum,
pattr="CONST_MAP", pattr="CONST_MAP",
offset="%s_0" % start_offset, offset="%s_0" % start_offset,
linestart=False, linestart=insts[collection_start].starts_line,
has_arg=True, has_arg=True,
has_extended_arg=False, has_extended_arg=False,
opc=self.opc, opc=self.opc,
@@ -349,6 +343,7 @@ class Scanner3(Scanner):
has_arg=True, has_arg=True,
has_extended_arg=False, has_extended_arg=False,
opc=self.opc, opc=self.opc,
optype="pseudo",
) )
) )
new_tokens.append( new_tokens.append(
@@ -361,7 +356,7 @@ class Scanner3(Scanner):
has_arg=True, has_arg=True,
has_extended_arg=False, has_extended_arg=False,
opc=self.opc, opc=self.opc,
optype=insts[j + 1].optype, optype="pseudo",
) )
) )
new_tokens.append( new_tokens.append(
@@ -374,7 +369,93 @@ class Scanner3(Scanner):
has_arg=t.has_arg, has_arg=t.has_arg,
has_extended_arg=False, has_extended_arg=False,
opc=t.opc, opc=t.opc,
optype=t.optype, optype="pseudo",
)
)
return new_tokens
def bound_map_from_inst_pre35(
self, insts: list, next_tokens: list, t: Token, i: int
):
"""
Try to a sequence of instruction that ends with a BUILD_MAP into
a sequence that can be parsed much faster, but inserting the
token boundary at the beginning of the sequence.
"""
count = t.attr
assert isinstance(count, int)
# For small lists don't bother
if count < 10:
return None
# Older Python BUILD_MAP argument's count is a
# key and value pair and STORE_MAP. So it is multiplied by three.
collection_end = i + 1 + count * 3
for j in range(i + 1, collection_end, 3):
if insts[j].opname not in ("LOAD_CONST",):
return None
if insts[j + 1].opname not in ("LOAD_CONST",):
return None
if insts[j + 2].opname not in ("STORE_MAP",):
return None
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
new_tokens = next_tokens[:i]
start_offset = insts[i].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr="CONST_MAP",
offset="%s_0" % start_offset,
linestart=insts[i].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
for j in range(i + 1, collection_end, 3):
new_tokens.append(
Token(
opname="ADD_KEY",
attr=insts[j + 1].argval,
pattr=insts[j + 1].argrepr,
offset=insts[j + 1].offset,
linestart=insts[j + 1].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=insts[j].argval,
pattr=insts[j].argrepr,
offset=insts[j].offset,
linestart=insts[j].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
new_tokens.append(
Token(
opname="BUILD_DICT_OLDER",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
optype="pseudo",
) )
) )
return new_tokens return new_tokens
@@ -483,8 +564,17 @@ class Scanner3(Scanner):
last_op_was_break = False last_op_was_break = False
new_tokens = [] new_tokens = []
skip_end_offset = None
for i, inst in enumerate(self.insts): for i, inst in enumerate(self.insts):
# BUILD_MAP for < 3.5 can skip *forward* in instructions and
# replace them. So we use the below to get up to the position
# scanned and replaced forward
if skip_end_offset and inst.offset <= skip_end_offset:
continue
skip_end_offset = None
opname = inst.opname opname = inst.opname
argval = inst.argval argval = inst.argval
pattr = inst.argrepr pattr = inst.argrepr
@@ -517,17 +607,38 @@ class Scanner3(Scanner):
if try_tokens is not None: if try_tokens is not None:
new_tokens = try_tokens new_tokens = try_tokens
continue continue
elif opname in ("BUILD_MAP",) and self.version >= (3, 5):
try_tokens = self.bound_map_from_inst( elif opname in ("BUILD_MAP",):
bound_map_from_insts_fn = (
self.bound_map_from_inst_35
if self.version >= (3, 5)
else self.bound_map_from_inst_pre35
)
try_tokens = bound_map_from_insts_fn(
self.insts, self.insts,
new_tokens, new_tokens,
inst,
t, t,
i, i,
) )
if try_tokens is not None: if try_tokens is not None:
new_tokens = try_tokens if self.version < (3, 5):
continue assert try_tokens[-1] == "BUILD_DICT_OLDER"
prev_offset = inst.offset
for j in range(i, len(self.insts)):
if self.insts[j].opname == "STORE_NAME":
new_tokens = try_tokens
skip_end_offset = prev_offset
# Set a hacky sentinal to indicate skipping to the
# next instruction
opname = "EXTENDED_ARG"
break
prev_offset = self.insts[j].offset
pass
pass
else:
new_tokens = try_tokens
continue
pass
argval = inst.argval argval = inst.argval
op = inst.opcode op = inst.opcode
@@ -786,7 +897,8 @@ class Scanner3(Scanner):
if show_asm in ("both", "after"): if show_asm in ("both", "after"):
print("\n# ---- tokenization:") print("\n# ---- tokenization:")
for t in new_tokens: # FIXME: t.format() is changing tokens!
for t in new_tokens.copy():
print(t.format(line_prefix="")) print(t.format(line_prefix=""))
print() print()
return new_tokens, customize return new_tokens, customize

View File

@@ -225,13 +225,13 @@ class Scanner37Base(Scanner):
if show_asm in ("both", "before"): if show_asm in ("both", "before"):
print("\n# ---- disassembly:") print("\n# ---- disassembly:")
self.insts = bytecode.disassemble_bytes( bytecode.disassemble_bytes(
co.co_code, co.co_code,
varnames=co.co_varnames, varnames=co.co_varnames,
names=co.co_names, names=co.co_names,
constants=co.co_consts, constants=co.co_consts,
cells=bytecode._cell_names, cells=bytecode._cell_names,
linestarts=bytecode._linestarts, line_starts=bytecode._linestarts,
asm_format="extended", asm_format="extended",
filename=co.co_filename, filename=co.co_filename,
show_source=True, show_source=True,
@@ -478,12 +478,17 @@ class Scanner37Base(Scanner):
next_opname = self.insts[i + 1].opname next_opname = self.insts[i + 1].opname
# 'Continue's include jumps to loops that are not # 'Continue's include jumps to loops that are not
# and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP. # and the end of a block which follow with
# If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD # POP_BLOCK and COME_FROM_LOOP. If the
# then we'll take it as a "continue". # JUMP_ABSOLUTE is to a FOR_ITER, and it is
is_continue = ( # followed by another JUMP_FORWARD then we'll take
self.insts[self.offset2inst_index[target]].opname == "FOR_ITER" # it as a "continue".
and self.insts[i + 1].opname == "JUMP_FORWARD" next_inst = self.insts[i + 1]
is_continue = self.insts[
self.offset2inst_index[target]
].opname == "FOR_ITER" and next_inst.opname in (
"JUMP_FORWARD",
"JUMP_ABSOLUTE",
) )
if self.version < (3, 8) and ( if self.version < (3, 8) and (
@@ -498,21 +503,65 @@ class Scanner37Base(Scanner):
): ):
opname = "CONTINUE" opname = "CONTINUE"
else: else:
# "continue" versus "break_loop" dectction is more complicated
# because "continue" to an outer loop is really a "break loop"
opname = "JUMP_BACK" opname = "JUMP_BACK"
# FIXME: this is a hack to catch stuff like: # FIXME: this is a hack to catch stuff like:
# if x: continue # if x: continue
# the "continue" is not on a new line. # the "continue" is not on a new line.
# There are other situations where we don't catch #
# CONTINUE as well. # Another situation is where we have
if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval: # for method in methods:
# for B in method:
# if c:
# return
# break # A "continue" but not the innermost one
if tokens[-1].kind == "JUMP_LOOP" and tokens[-1].attr <= argval:
if tokens[-2].kind == "BREAK_LOOP": if tokens[-2].kind == "BREAK_LOOP":
del tokens[-1] del tokens[-1]
j -= 1
else: else:
# intern is used because we are changing the *previous* token # "intern" is used because we are
tokens[-1].kind = sys.intern("CONTINUE") # changing the *previous* token. A
if last_op_was_break and opname == "CONTINUE": # POP_TOP suggests a "break" rather
last_op_was_break = False # than a "continue"?
continue if tokens[-2] == "POP_TOP" and (
is_continue and next_inst.argval != tokens[-1].attr
):
tokens[-1].kind = sys.intern("BREAK_LOOP")
else:
tokens[-1].kind = sys.intern("CONTINUE")
last_continue = tokens[-1]
pass
pass
pass
# elif (
# last_continue is not None
# and tokens[-1].kind == "JUMP_LOOP"
# and last_continue.attr <= tokens[-1].attr
# and last_continue.offset > tokens[-1].attr
# ):
# # Handle mis-characterized "CONTINUE"
# # We have a situation like:
# # loop ... for or while)
# # loop
# # if ...: # code below starts here
# # break # not continue
# #
# # POP_JUMP_IF_FALSE_LOOP # to outer loop
# # JUMP_LOOP # to inner loop
# # ...
# # JUMP_LOOP # to outer loop
# tokens[-2].kind = sys.intern("BREAK_LOOP")
# pass
# if last_op_was_break and opname == "CONTINUE":
# last_op_was_break = False
# continue
pass
else:
opname = "JUMP_FORWARD"
elif inst.offset in self.load_asserts: elif inst.offset in self.load_asserts:
opname = "LOAD_ASSERT" opname = "LOAD_ASSERT"
@@ -535,9 +584,10 @@ class Scanner37Base(Scanner):
) )
pass pass
if show_asm in ("both", "after"): if show_asm in ("both", "after") and self.version < (3, 8):
print("\n# ---- tokenization:") print("\n# ---- tokenization:")
for t in tokens: # FIXME: t.format() is changing tokens!
for t in tokens.copy():
print(t.format(line_prefix="")) print(t.format(line_prefix=""))
print() print()
return tokens, customize return tokens, customize

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2019-2022 by Rocky Bernstein # Copyright (c) 2019-2022, 2024 by Rocky Bernstein
# #
# This program is free software: you can redistribute it and/or modify # This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
@@ -117,35 +117,26 @@ class Scanner38(Scanner37):
new_tokens.append(token) new_tokens.append(token)
continue continue
# We also want to avoid confusing BREAK_LOOPS with parts of the j = i
# grammar rules for loops. (Perhaps we should change the grammar.) while tokens[j - 1] in ("POP_TOP", "POP_BLOCK", "POP_EXCEPT"):
# Try to find an adjacent JUMP_BACK which is part of the normal loop end. j -= 1
if tokens[j].linestart:
break
token_with_linestart = tokens[j]
if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK": if token_with_linestart.linestart:
# Sometimes the jump back is after the "break" instruction..
jump_back_index = i + 1
else:
# and sometimes, because of jump-to-jump optimization, it is before the
# jump target instruction.
jump_back_index = self.offset2tok_index[jump_target] - 1
while tokens[jump_back_index].kind.startswith("COME_FROM_"):
jump_back_index -= 1
pass
pass
jump_back_token = tokens[jump_back_index]
# Is this a forward jump not next to a JUMP_BACK ? ...
break_loop = token.linestart and jump_back_token != "JUMP_BACK"
# or if there is looping jump back, then that loop
# should start before where the "break" instruction sits.
if break_loop or (
jump_back_token == "JUMP_BACK"
and jump_back_token.attr < token.off2int()
):
token.kind = "BREAK_LOOP" token.kind = "BREAK_LOOP"
pass pass
new_tokens.append(token) new_tokens.append(token)
if show_asm in ("both", "after"):
print("\n# ---- tokenization:")
# FIXME: t.format() is changing tokens!
for t in new_tokens.copy():
print(t.format(line_prefix=""))
print()
return new_tokens, customize return new_tokens, customize

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016-2021, 2023 by Rocky Bernstein # Copyright (c) 2016-2021, 2023-2024 by Rocky Bernstein
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock # Copyright (c) 1999 John Aycock
# #