Merge branch 'python-3.0-to-3.2' into python-2.4-to-2.7

This commit is contained in:
rocky
2024-07-15 10:11:20 -04:00
19 changed files with 324 additions and 76 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,4 @@
# Next line is 1164
def foo():
name = "bar"
lambda x: compile(x, "<register %s's commit>" % name, "exec") if x else None

View File

@@ -0,0 +1,10 @@
# Adapted 3.5 from _bootstrap_external.py
def spec_from_file_location(loader, location):
if loader:
for _ in __file__:
if location:
break
else:
return None

View File

@@ -0,0 +1,74 @@
# From https://github.com/rocky/python-uncompyle6/issues/420
# Related to EXTENDED_ARG in whilestmt
ERRPR_CODE_DEFINE = {} # Remove this and things works
try:
print()
except Exception:
var1 = 0
var2 = 1
if var1 or var2:
times = 1
while times != False and self.scanner.is_open():
try:
try:
print()
except Exception:
print()
out = 0
count = 1
if out == 1:
break
elif out == 2:
count += 1
if times == 3:
self.func.emit({})
break
else:
continue
if out == 3 or out == b"":
if self.times == 3:
break
count += 1
if count == 3:
count = 0
if out == 4:
self.func.emit(ERRPR_CODE_DEFINE.ReceiedError())
else:
print()
break
continue
else:
count = 0
except Exception:
print("upper exception")
else:
try:
print("jump forward")
while True:
out = self.func.read(count)
if out == b"":
self.func.emit(ERRPR_CODE_DEFINE.ReceiedError())
break
continue
imagedata = out[0]
if imagedata == b"\x05":
self.func.emit(INFORMATION.UnsupportedImage())
break
continue
if imagedata == b"\x15":
self.func.emit(INFORMATION.NoneImage())
break
continue
if out[1] == False:
start_index = imagedata.find(b"BM6")
self.func.emit(imagedata[start_index:], False)
continue
(imagedata, all_code) = imagedata
self.func.emit({})
self.func.emit({})
self.func.emit({}) # remove {} and this works
break
except Exception:
pass

View File

@@ -53,6 +53,10 @@ class Python34Parser(Python33Parser):
_ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM
genexpr_func ::= LOAD_ARG _come_froms FOR_ITER store comp_iter JUMP_BACK
if_exp_lambda ::= expr jmp_false expr return_if_lambda come_froms return_stmt_lambda LAMBDA_MARKER
return_if_lambda ::= RETURN_END_IF_LAMBDA come_froms
return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK
"""
def customize_grammar_rules(self, tokens, customize):

View File

@@ -107,7 +107,6 @@ class Python35Parser(Python34Parser):
# Python 3.5+ does jump optimization
# In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE.
return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK
return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM
return ::= return_expr RETURN_END_IF

View File

@@ -52,6 +52,8 @@ class Python36Parser(Python35Parser):
for_block ::= l_stmts_opt come_from_loops JUMP_BACK
come_from_loops ::= COME_FROM_LOOP*
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt
JUMP_BACK come_froms POP_BLOCK
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt
JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP
whilestmt ::= SETUP_LOOP testexpr l_stmts_opt

View File

@@ -491,7 +491,8 @@ class Scanner2(Scanner):
if show_asm in ("both", "after"):
print("\n# ---- tokenization:")
for t in new_tokens:
# FIXME: t.format() is changing tokens!
for t in new_tokens.copy():
print(t.format(line_prefix=""))
print()
return new_tokens, customize

View File

@@ -349,7 +349,8 @@ class Scanner26(Scanner2):
if show_asm in ("both", "after"):
print("\n# ---- tokenization:")
for t in tokens:
# FIXME: t.format() is changing tokens!
for t in tokens.copy():
print(t.format(line_prefix=""))
print()
return tokens, customize

View File

@@ -203,7 +203,7 @@ class Scanner3(Scanner):
self, insts, next_tokens, inst, t, i, collection_type
):
"""
Try to a replace sequence of instruction that ends with a
Try to replace a sequence of instruction that ends with a
BUILD_xxx with a sequence that can be parsed much faster, but
inserting the token boundary at the beginning of the sequence.
"""
@@ -285,7 +285,7 @@ class Scanner3(Scanner):
)
return new_tokens
def bound_map_from_inst(self, insts, next_tokens, inst, t, i):
def bound_map_from_inst(self, insts, next_tokens, t, i):
"""
Try to a sequence of instruction that ends with a BUILD_MAP into
a sequence that can be parsed much faster, but inserting the
@@ -300,25 +300,19 @@ class Scanner3(Scanner):
if count < 5:
return None
if self.version >= (3, 5):
# Newer Python BUILD_MAP argument's count is a
# key and value pair so it is multiplied by two.
collection_start = i - (count * 2)
assert (count * 2) <= i
# Newer Python BUILD_MAP argument's count is a
# key and value pair so it is multiplied by two.
collection_start = i - (count * 2)
assert (count * 2) <= i
for j in range(collection_start, i, 2):
if insts[j].opname not in ("LOAD_CONST",):
return None
if insts[j + 1].opname not in ("LOAD_CONST",):
return None
for j in range(collection_start, i, 2):
if insts[j].opname not in ("LOAD_CONST",):
return None
if insts[j + 1].opname not in ("LOAD_CONST",):
return None
collection_start = i - (2 * count)
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
# else: Older Python count is sum of all key and value pairs
# Each pair is added individually like:
# LOAD_CONST ("Max-Age")
# LOAD_CONST ("max-age")
# STORE_MAP
collection_start = i - (2 * count)
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
# If we get here, all instructions before tokens[i] are LOAD_CONST and
# we can replace add a boundary marker and change LOAD_CONST to
@@ -331,7 +325,7 @@ class Scanner3(Scanner):
attr=collection_enum,
pattr="CONST_MAP",
offset="%s_0" % start_offset,
linestart=False,
linestart=insts[collection_start].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
@@ -349,6 +343,7 @@ class Scanner3(Scanner):
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
new_tokens.append(
@@ -361,7 +356,7 @@ class Scanner3(Scanner):
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype=insts[j + 1].optype,
optype="pseudo",
)
)
new_tokens.append(
@@ -374,7 +369,93 @@ class Scanner3(Scanner):
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
optype=t.optype,
optype="pseudo",
)
)
return new_tokens
def bound_map_from_inst_pre35(
self, insts: list, next_tokens: list, t: Token, i: int
):
"""
Try to a sequence of instruction that ends with a BUILD_MAP into
a sequence that can be parsed much faster, but inserting the
token boundary at the beginning of the sequence.
"""
count = t.attr
assert isinstance(count, int)
# For small lists don't bother
if count < 10:
return None
# Older Python BUILD_MAP argument's count is a
# key and value pair and STORE_MAP. So it is multiplied by three.
collection_end = i + 1 + count * 3
for j in range(i + 1, collection_end, 3):
if insts[j].opname not in ("LOAD_CONST",):
return None
if insts[j + 1].opname not in ("LOAD_CONST",):
return None
if insts[j + 2].opname not in ("STORE_MAP",):
return None
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
new_tokens = next_tokens[:i]
start_offset = insts[i].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr="CONST_MAP",
offset="%s_0" % start_offset,
linestart=insts[i].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
for j in range(i + 1, collection_end, 3):
new_tokens.append(
Token(
opname="ADD_KEY",
attr=insts[j + 1].argval,
pattr=insts[j + 1].argrepr,
offset=insts[j + 1].offset,
linestart=insts[j + 1].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=insts[j].argval,
pattr=insts[j].argrepr,
offset=insts[j].offset,
linestart=insts[j].starts_line,
has_arg=True,
has_extended_arg=False,
opc=self.opc,
optype="pseudo",
)
)
new_tokens.append(
Token(
opname="BUILD_DICT_OLDER",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
linestart=t.linestart,
has_arg=t.has_arg,
has_extended_arg=False,
opc=t.opc,
optype="pseudo",
)
)
return new_tokens
@@ -483,8 +564,17 @@ class Scanner3(Scanner):
last_op_was_break = False
new_tokens = []
skip_end_offset = None
for i, inst in enumerate(self.insts):
# BUILD_MAP for < 3.5 can skip *forward* in instructions and
# replace them. So we use the below to get up to the position
# scanned and replaced forward
if skip_end_offset and inst.offset <= skip_end_offset:
continue
skip_end_offset = None
opname = inst.opname
argval = inst.argval
pattr = inst.argrepr
@@ -517,17 +607,38 @@ class Scanner3(Scanner):
if try_tokens is not None:
new_tokens = try_tokens
continue
elif opname in ("BUILD_MAP",) and self.version >= (3, 5):
try_tokens = self.bound_map_from_inst(
elif opname in ("BUILD_MAP",):
bound_map_from_insts_fn = (
self.bound_map_from_inst_35
if self.version >= (3, 5)
else self.bound_map_from_inst_pre35
)
try_tokens = bound_map_from_insts_fn(
self.insts,
new_tokens,
inst,
t,
i,
)
if try_tokens is not None:
new_tokens = try_tokens
continue
if self.version < (3, 5):
assert try_tokens[-1] == "BUILD_DICT_OLDER"
prev_offset = inst.offset
for j in range(i, len(self.insts)):
if self.insts[j].opname == "STORE_NAME":
new_tokens = try_tokens
skip_end_offset = prev_offset
# Set a hacky sentinal to indicate skipping to the
# next instruction
opname = "EXTENDED_ARG"
break
prev_offset = self.insts[j].offset
pass
pass
else:
new_tokens = try_tokens
continue
pass
argval = inst.argval
op = inst.opcode
@@ -786,7 +897,8 @@ class Scanner3(Scanner):
if show_asm in ("both", "after"):
print("\n# ---- tokenization:")
for t in new_tokens:
# FIXME: t.format() is changing tokens!
for t in new_tokens.copy():
print(t.format(line_prefix=""))
print()
return new_tokens, customize

View File

@@ -225,13 +225,13 @@ class Scanner37Base(Scanner):
if show_asm in ("both", "before"):
print("\n# ---- disassembly:")
self.insts = bytecode.disassemble_bytes(
bytecode.disassemble_bytes(
co.co_code,
varnames=co.co_varnames,
names=co.co_names,
constants=co.co_consts,
cells=bytecode._cell_names,
linestarts=bytecode._linestarts,
line_starts=bytecode._linestarts,
asm_format="extended",
filename=co.co_filename,
show_source=True,
@@ -478,12 +478,17 @@ class Scanner37Base(Scanner):
next_opname = self.insts[i + 1].opname
# 'Continue's include jumps to loops that are not
# and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
# If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
# then we'll take it as a "continue".
is_continue = (
self.insts[self.offset2inst_index[target]].opname == "FOR_ITER"
and self.insts[i + 1].opname == "JUMP_FORWARD"
# and the end of a block which follow with
# POP_BLOCK and COME_FROM_LOOP. If the
# JUMP_ABSOLUTE is to a FOR_ITER, and it is
# followed by another JUMP_FORWARD then we'll take
# it as a "continue".
next_inst = self.insts[i + 1]
is_continue = self.insts[
self.offset2inst_index[target]
].opname == "FOR_ITER" and next_inst.opname in (
"JUMP_FORWARD",
"JUMP_ABSOLUTE",
)
if self.version < (3, 8) and (
@@ -498,21 +503,65 @@ class Scanner37Base(Scanner):
):
opname = "CONTINUE"
else:
# "continue" versus "break_loop" dectction is more complicated
# because "continue" to an outer loop is really a "break loop"
opname = "JUMP_BACK"
# FIXME: this is a hack to catch stuff like:
# if x: continue
# the "continue" is not on a new line.
# There are other situations where we don't catch
# CONTINUE as well.
if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval:
#
# Another situation is where we have
# for method in methods:
# for B in method:
# if c:
# return
# break # A "continue" but not the innermost one
if tokens[-1].kind == "JUMP_LOOP" and tokens[-1].attr <= argval:
if tokens[-2].kind == "BREAK_LOOP":
del tokens[-1]
j -= 1
else:
# intern is used because we are changing the *previous* token
tokens[-1].kind = sys.intern("CONTINUE")
if last_op_was_break and opname == "CONTINUE":
last_op_was_break = False
continue
# "intern" is used because we are
# changing the *previous* token. A
# POP_TOP suggests a "break" rather
# than a "continue"?
if tokens[-2] == "POP_TOP" and (
is_continue and next_inst.argval != tokens[-1].attr
):
tokens[-1].kind = sys.intern("BREAK_LOOP")
else:
tokens[-1].kind = sys.intern("CONTINUE")
last_continue = tokens[-1]
pass
pass
pass
# elif (
# last_continue is not None
# and tokens[-1].kind == "JUMP_LOOP"
# and last_continue.attr <= tokens[-1].attr
# and last_continue.offset > tokens[-1].attr
# ):
# # Handle mis-characterized "CONTINUE"
# # We have a situation like:
# # loop ... for or while)
# # loop
# # if ...: # code below starts here
# # break # not continue
# #
# # POP_JUMP_IF_FALSE_LOOP # to outer loop
# # JUMP_LOOP # to inner loop
# # ...
# # JUMP_LOOP # to outer loop
# tokens[-2].kind = sys.intern("BREAK_LOOP")
# pass
# if last_op_was_break and opname == "CONTINUE":
# last_op_was_break = False
# continue
pass
else:
opname = "JUMP_FORWARD"
elif inst.offset in self.load_asserts:
opname = "LOAD_ASSERT"
@@ -535,9 +584,10 @@ class Scanner37Base(Scanner):
)
pass
if show_asm in ("both", "after"):
if show_asm in ("both", "after") and self.version < (3, 8):
print("\n# ---- tokenization:")
for t in tokens:
# FIXME: t.format() is changing tokens!
for t in tokens.copy():
print(t.format(line_prefix=""))
print()
return tokens, customize

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2019-2022 by Rocky Bernstein
# Copyright (c) 2019-2022, 2024 by Rocky Bernstein
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -117,35 +117,26 @@ class Scanner38(Scanner37):
new_tokens.append(token)
continue
# We also want to avoid confusing BREAK_LOOPS with parts of the
# grammar rules for loops. (Perhaps we should change the grammar.)
# Try to find an adjacent JUMP_BACK which is part of the normal loop end.
j = i
while tokens[j - 1] in ("POP_TOP", "POP_BLOCK", "POP_EXCEPT"):
j -= 1
if tokens[j].linestart:
break
token_with_linestart = tokens[j]
if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK":
# Sometimes the jump back is after the "break" instruction..
jump_back_index = i + 1
else:
# and sometimes, because of jump-to-jump optimization, it is before the
# jump target instruction.
jump_back_index = self.offset2tok_index[jump_target] - 1
while tokens[jump_back_index].kind.startswith("COME_FROM_"):
jump_back_index -= 1
pass
pass
jump_back_token = tokens[jump_back_index]
# Is this a forward jump not next to a JUMP_BACK ? ...
break_loop = token.linestart and jump_back_token != "JUMP_BACK"
# or if there is looping jump back, then that loop
# should start before where the "break" instruction sits.
if break_loop or (
jump_back_token == "JUMP_BACK"
and jump_back_token.attr < token.off2int()
):
if token_with_linestart.linestart:
token.kind = "BREAK_LOOP"
pass
new_tokens.append(token)
if show_asm in ("both", "after"):
print("\n# ---- tokenization:")
# FIXME: t.format() is changing tokens!
for t in new_tokens.copy():
print(t.format(line_prefix=""))
print()
return new_tokens, customize

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016-2021, 2023 by Rocky Bernstein
# Copyright (c) 2016-2021, 2023-2024 by Rocky Bernstein
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
#