From ea36ff9bb13ce7041dc0c77e7155677abca6b647 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 13 Jul 2024 15:57:24 -0400 Subject: [PATCH 1/9] Add grammar rule involving RETURN_END_IF --- test/simple_source/bug35/02_for_else_bug.py | 10 ++++++++++ uncompyle6/parsers/parse35.py | 2 ++ uncompyle6/semantics/consts.py | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 test/simple_source/bug35/02_for_else_bug.py diff --git a/test/simple_source/bug35/02_for_else_bug.py b/test/simple_source/bug35/02_for_else_bug.py new file mode 100644 index 00000000..c8f85ad8 --- /dev/null +++ b/test/simple_source/bug35/02_for_else_bug.py @@ -0,0 +1,10 @@ +# Adapted 3.5 from _bootstrap_external.py + + +def spec_from_file_location(loader, location): + if loader: + for _ in __file__: + if location: + break + else: + return None diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py index fd2bd5cc..e5bc9b10 100644 --- a/uncompyle6/parsers/parse35.py +++ b/uncompyle6/parsers/parse35.py @@ -111,6 +111,8 @@ class Python35Parser(Python34Parser): return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM + return ::= return_expr RETURN_END_IF + jb_else ::= JUMP_BACK ELSE ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suitec ifelsestmtl ::= testexpr c_stmts_opt jb_else else_suitel diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index 7b9cd7d1..7ea9967b 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -431,7 +431,7 @@ TABLE_DIRECT = { "mkfuncdeco": ("%|@%c\n%c", (0, "expr"), 1), # A custom rule in n_function def distinguishes whether to call this or # function_def_async - "mkfuncdeco0": ("%|def %c\n", (0, "mkfunc")), + "mkfuncdeco0": ("%|def %c\n", (0, ("mkfunc", "mkfunc_annotate"))), # In cases where we desire an explict new line. # After docstrings which are followed by a "def" is From 7787166ddfaf8d849d1d9312d3ad7135aceacb3b Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 13 Jul 2024 15:57:24 -0400 Subject: [PATCH 2/9] Add grammar rule involving RETURN_END_IF --- test/bytecode_3.5/02_for_else_bug.pyc | Bin 0 -> 301 bytes test/simple_source/bug35/02_for_else_bug.py | 10 ++++++++++ uncompyle6/parsers/parse35.py | 2 ++ uncompyle6/semantics/consts.py | 2 +- 4 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 test/bytecode_3.5/02_for_else_bug.pyc create mode 100644 test/simple_source/bug35/02_for_else_bug.py diff --git a/test/bytecode_3.5/02_for_else_bug.pyc b/test/bytecode_3.5/02_for_else_bug.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ace423162f9be783d1773c539ef26c88162d7ad7 GIT binary patch literal 301 zcmYk0!Ait16h&`Zr&9(3?gqCm2D))XaVH|^T7+2)2olCNm11gYG6jWk=NI@F{!+Gn zgBw@gR7QN{+`Q!F-Mp!klZUVOIso`2e~~adCw`YwaS6NuH%t;NinNUN7%7SQ1@SY= z25|d$-oqNWyp?!Oun}+)*H02lIxQlFxGF@*m8u(~l`7JZ8M~^q3%Rl7YTek!Ak+uk zh?d_o-?SaydE2{E&+q#CPD->ub79Bx8^$b=-L b#Ep6448uPB@=r3>LVnuXsyF&<=#I@lP;o$h literal 0 HcmV?d00001 diff --git a/test/simple_source/bug35/02_for_else_bug.py b/test/simple_source/bug35/02_for_else_bug.py new file mode 100644 index 00000000..c8f85ad8 --- /dev/null +++ b/test/simple_source/bug35/02_for_else_bug.py @@ -0,0 +1,10 @@ +# Adapted 3.5 from _bootstrap_external.py + + +def spec_from_file_location(loader, location): + if loader: + for _ in __file__: + if location: + break + else: + return None diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py index fd2bd5cc..e5bc9b10 100644 --- a/uncompyle6/parsers/parse35.py +++ b/uncompyle6/parsers/parse35.py @@ -111,6 +111,8 @@ class Python35Parser(Python34Parser): return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM + return ::= return_expr RETURN_END_IF + jb_else ::= JUMP_BACK ELSE ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suitec ifelsestmtl ::= testexpr c_stmts_opt jb_else else_suitel diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index 7b9cd7d1..7ea9967b 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -431,7 +431,7 @@ TABLE_DIRECT = { "mkfuncdeco": ("%|@%c\n%c", (0, "expr"), 1), # A custom rule in n_function def distinguishes whether to call this or # function_def_async - "mkfuncdeco0": ("%|def %c\n", (0, "mkfunc")), + "mkfuncdeco0": ("%|def %c\n", (0, ("mkfunc", "mkfunc_annotate"))), # In cases where we desire an explict new line. # After docstrings which are followed by a "def" is From 389fc2360a238deea3c0664e5ebf033c94a3007d Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 13 Jul 2024 21:42:49 -0400 Subject: [PATCH 3/9] 3.6 bug related to large whilestmt --- test/bytecode_3.6/09_long_whilestmt.pyc | Bin 0 -> 1120 bytes test/simple_source/bug36/09_long_whilestmt.py | 74 ++++++++++++++++++ uncompyle6/parsers/parse36.py | 2 + 3 files changed, 76 insertions(+) create mode 100644 test/bytecode_3.6/09_long_whilestmt.pyc create mode 100644 test/simple_source/bug36/09_long_whilestmt.py diff --git a/test/bytecode_3.6/09_long_whilestmt.pyc b/test/bytecode_3.6/09_long_whilestmt.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7612eb31e20b1f7f1460b1762342405a4c797951 GIT binary patch literal 1120 zcmZ`%&rcIU6rP!Ge{GkxAXM?h5sVQPg3%CyN=YzKnyLXNCe7M;#Vze@ncYIm(Z-vH zCjJ9ny&KOS`2#%d)r-B#&696-+eS^?$-I5@zHh$w=FO~3O___gKh*s+A>YX#-*xmi zaeB`fAr9fBX94XKN)E_?$yI{@J23}LZBdL-A_$KO$_v1)>{0KsXO7k6%}7g~MM_cph)8y*Rnw?r0@@R%-j|8Ig?$$nF_5Y1vXohcEn5|JK|##BGl zl!>UaGC5zDhUOlS)Admws9GCWFOs@~Bg77IVz;Oyy&phDO2@lFsG9||PkDM$S@+*; zW!!hVi#VAlJO??E4y*b&;^iCDQB@;ePM9J7b%M|Qg=7Eb3EqP~=TFEru89ohpB=Gt zFe|KgOf&k7MwxT37|-#!h!J)5e|}WM0lo}bn7oUPs2|Zij&9R>PL`&I`jHJOn(ZMz zEi#Ouxxz_?2AYdpr`-m%0Non2eaDp-R-@BuFVtOiZYy!6H^xId=jokK_SQ;E1wLJ@ zRLYg=)BUYtb*s3&Qz{1WwsNE&q>E!EZk*ZbUTE(Jao=fy7Z@Hi>p{}1*;0byNyn?Y zZID6C?f5~w=5|oQsCQ&7Frel5+^Rqg91umNToo7!Y{An`%RUBS`!>()l(zRPdyil2 z?3Z}{rS$MHuJS?bXbMl4TnV9zqK*_in&Gwny{h!;=M&QpUGFS=~3W%m~Wnnz8qaiEu#*r*yg%Gij Ir$?Ut21SklmH+?% literal 0 HcmV?d00001 diff --git a/test/simple_source/bug36/09_long_whilestmt.py b/test/simple_source/bug36/09_long_whilestmt.py new file mode 100644 index 00000000..1cf98b6b --- /dev/null +++ b/test/simple_source/bug36/09_long_whilestmt.py @@ -0,0 +1,74 @@ +# From https://github.com/rocky/python-uncompyle6/issues/420 +# Related to EXTENDED_ARG in whilestmt +ERRPR_CODE_DEFINE = {} # Remove this and things works + +try: + print() +except Exception: + var1 = 0 + var2 = 1 + if var1 or var2: + times = 1 + while times != False and self.scanner.is_open(): + try: + try: + print() + except Exception: + print() + + out = 0 + count = 1 + if out == 1: + break + elif out == 2: + count += 1 + if times == 3: + self.func.emit({}) + break + else: + continue + if out == 3 or out == b"": + if self.times == 3: + break + count += 1 + if count == 3: + count = 0 + if out == 4: + self.func.emit(ERRPR_CODE_DEFINE.ReceiedError()) + else: + print() + break + continue + else: + count = 0 + except Exception: + print("upper exception") + else: + try: + print("jump forward") + while True: + out = self.func.read(count) + if out == b"": + self.func.emit(ERRPR_CODE_DEFINE.ReceiedError()) + break + continue + imagedata = out[0] + if imagedata == b"\x05": + self.func.emit(INFORMATION.UnsupportedImage()) + break + continue + if imagedata == b"\x15": + self.func.emit(INFORMATION.NoneImage()) + break + continue + if out[1] == False: + start_index = imagedata.find(b"BM6") + self.func.emit(imagedata[start_index:], False) + continue + (imagedata, all_code) = imagedata + self.func.emit({}) + self.func.emit({}) + self.func.emit({}) # remove {} and this works + break + except Exception: + pass diff --git a/uncompyle6/parsers/parse36.py b/uncompyle6/parsers/parse36.py index a76a3749..09980695 100644 --- a/uncompyle6/parsers/parse36.py +++ b/uncompyle6/parsers/parse36.py @@ -53,6 +53,8 @@ class Python36Parser(Python35Parser): for_block ::= l_stmts_opt come_from_loops JUMP_BACK come_from_loops ::= COME_FROM_LOOP* + whilestmt ::= SETUP_LOOP testexpr l_stmts_opt + JUMP_BACK come_froms POP_BLOCK whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP whilestmt ::= SETUP_LOOP testexpr l_stmts_opt From 9c6f2ee838d492babb54b0719506d393cd2bcd67 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 13 Jul 2024 22:29:58 -0400 Subject: [PATCH 4/9] Improve 3.4 ifelse inside a lambda --- uncompyle6/parsers/parse34.py | 4 ++++ uncompyle6/parsers/parse35.py | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/uncompyle6/parsers/parse34.py b/uncompyle6/parsers/parse34.py index caae5e2e..53cdd8ea 100644 --- a/uncompyle6/parsers/parse34.py +++ b/uncompyle6/parsers/parse34.py @@ -53,6 +53,10 @@ class Python34Parser(Python33Parser): _ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM genexpr_func ::= LOAD_ARG _come_froms FOR_ITER store comp_iter JUMP_BACK + + if_exp_lambda ::= expr jmp_false expr return_if_lambda come_froms return_stmt_lambda LAMBDA_MARKER + + return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK """ def customize_grammar_rules(self, tokens, customize): diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py index e5bc9b10..929d920d 100644 --- a/uncompyle6/parsers/parse35.py +++ b/uncompyle6/parsers/parse35.py @@ -108,7 +108,6 @@ class Python35Parser(Python34Parser): # Python 3.5+ does jump optimization # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. - return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM return ::= return_expr RETURN_END_IF From 04da2fb8dfa20f119c2b84c3f487f168a7b8564e Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 13 Jul 2024 22:29:58 -0400 Subject: [PATCH 5/9] Improve 3.4 ifelse inside a lambda Fixes #426 --- test/bytecode_3.3/03_ifelse_in_lambda.pyc | Bin 0 -> 577 bytes test/bytecode_3.4/03_ifelse_in_lambda.pyc | Bin 0 -> 414 bytes test/bytecode_3.5/03_ifelse_in_lambda.pyc | Bin 0 -> 414 bytes test/simple_source/bug34/03_ifelse_in_lambda.py | 4 ++++ uncompyle6/parsers/parse34.py | 4 ++++ uncompyle6/parsers/parse35.py | 1 - 6 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 test/bytecode_3.3/03_ifelse_in_lambda.pyc create mode 100644 test/bytecode_3.4/03_ifelse_in_lambda.pyc create mode 100644 test/bytecode_3.5/03_ifelse_in_lambda.pyc create mode 100644 test/simple_source/bug34/03_ifelse_in_lambda.py diff --git a/test/bytecode_3.3/03_ifelse_in_lambda.pyc b/test/bytecode_3.3/03_ifelse_in_lambda.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7628206f9671abb45bfe1574cabaff24d936783a GIT binary patch literal 577 zcmb7AO-sZu5S?sAMHB>a4||Zkh{eOMP>-ef3wr6zOX)|t6w;m88omjWhXHUzp9#cNjW{vc*lf4F* zy6KcmjkbLum$_X9x67L#tt+XFOzSpPS(BI9qIqGG!tp y?nLA~w!q!x_X4|m^JBn;CpD}=VqdryMll{|`=M+(GH+KSH)Nwrt7PQdSY!j+266WQ literal 0 HcmV?d00001 diff --git a/test/bytecode_3.4/03_ifelse_in_lambda.pyc b/test/bytecode_3.4/03_ifelse_in_lambda.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01df985d072edbf0d3d7ac709b8d9c9d1bd1be54 GIT binary patch literal 414 zcmYLF%SyvQ6ur}lHHd;BcF~1&BN7*_P?sV21=(cRO~_+X0y9mRNw5|-qWD<*75-AT z?)?B)o=K&>%sKaP=bXzhKgTCW_w&!|T@9e_0CNh;aTvW2Eg%LIc|5~(R~*X1 zj6pDd5WNu<%zAT9uS5(RBF_mIJWX`Q{*$&lz!_jJK#xRvOieGa=bdPT70;ZaKF`C= z$k$?7nnvi!rMWVbtWv6Mg3b|Zk$J2;#Go#vfMjcsvr3fsqTg#%s#=QJRISd$ENz$b z+u6-LE(;-zh|5(h6P4!4w0`JZf2#xgHUrS+yr`)LJvV5 hN5cN0u_Izbrd{+99^3b!t?DDLud2M2BIt8A_6KC@Qds~1 literal 0 HcmV?d00001 diff --git a/test/bytecode_3.5/03_ifelse_in_lambda.pyc b/test/bytecode_3.5/03_ifelse_in_lambda.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33b8757f6958d6caf66ebe5534b3a39cf98d75f9 GIT binary patch literal 414 zcmYLF%SyvQ6ur}lwFm`4tk8vYBN7*_5SJnN1=(cRO~_+X0+UXdNw5|-qWD<*75-AT z{)H>gq|#pIoO`%)&SjX3@%ZGy`zoG^=m*%>0O}qvr=T2%(JRpsVnC70Gfelzp)AZ4 z1mh>sJ5j-`H|O+5#IPZ9op8z1L}% zxb@<)5YmXaT*or0(ma{hkKM@M>A=3t0JJ?Xl$!feWr;KaMDGps5$+D;GHnxj2k literal 0 HcmV?d00001 diff --git a/test/simple_source/bug34/03_ifelse_in_lambda.py b/test/simple_source/bug34/03_ifelse_in_lambda.py new file mode 100644 index 00000000..b498a009 --- /dev/null +++ b/test/simple_source/bug34/03_ifelse_in_lambda.py @@ -0,0 +1,4 @@ +# Next line is 1164 +def foo(): + name = "bar" + lambda x: compile(x, "" % name, "exec") if x else None diff --git a/uncompyle6/parsers/parse34.py b/uncompyle6/parsers/parse34.py index caae5e2e..a381448a 100644 --- a/uncompyle6/parsers/parse34.py +++ b/uncompyle6/parsers/parse34.py @@ -53,6 +53,10 @@ class Python34Parser(Python33Parser): _ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM genexpr_func ::= LOAD_ARG _come_froms FOR_ITER store comp_iter JUMP_BACK + + if_exp_lambda ::= expr jmp_false expr return_if_lambda return_stmt_lambda LAMBDA_MARKER + return_if_lambda ::= RETURN_END_IF_LAMBDA come_froms + return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK """ def customize_grammar_rules(self, tokens, customize): diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py index e5bc9b10..929d920d 100644 --- a/uncompyle6/parsers/parse35.py +++ b/uncompyle6/parsers/parse35.py @@ -108,7 +108,6 @@ class Python35Parser(Python34Parser): # Python 3.5+ does jump optimization # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. - return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM return ::= return_expr RETURN_END_IF From d731d32c11729182573d06dbb389aaf3e1d7a4b3 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 14 Jul 2024 14:45:25 -0400 Subject: [PATCH 6/9] Simplify BREAK_LOOP detection... by making more us of linestart. At least for now... --- uncompyle6/scanners/scanner2.py | 3 +- uncompyle6/scanners/scanner26.py | 3 +- uncompyle6/scanners/scanner3.py | 3 +- uncompyle6/scanners/scanner37base.py | 86 ++++++++++++++++++++++------ uncompyle6/scanners/scanner38.py | 49 +++++++--------- 5 files changed, 94 insertions(+), 50 deletions(-) diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 265bda5d..03f905a6 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -495,7 +495,8 @@ class Scanner2(Scanner): if show_asm in ("both", "after"): print("\n# ---- tokenization:") - for t in new_tokens: + # FIXME: t.format() is changing tokens! + for t in new_tokens.copy(): print(t.format(line_prefix="")) print() return new_tokens, customize diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index c37b7fde..797db2f4 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -353,7 +353,8 @@ class Scanner26(scan.Scanner2): if show_asm in ("both", "after"): print("\n# ---- tokenization:") - for t in tokens: + # FIXME: t.format() is changing tokens! + for t in tokens.copy(): print(t.format(line_prefix="")) print() return tokens, customize diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 51f43ac0..738cf650 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -797,7 +797,8 @@ class Scanner3(Scanner): if show_asm in ("both", "after"): print("\n# ---- tokenization:") - for t in new_tokens: + # FIXME: t.format() is changing tokens! + for t in new_tokens.copy(): print(t.format(line_prefix="")) print() return new_tokens, customize diff --git a/uncompyle6/scanners/scanner37base.py b/uncompyle6/scanners/scanner37base.py index 6c3e22d9..cd5d5107 100644 --- a/uncompyle6/scanners/scanner37base.py +++ b/uncompyle6/scanners/scanner37base.py @@ -228,13 +228,13 @@ class Scanner37Base(Scanner): if show_asm in ("both", "before"): print("\n# ---- disassembly:") - self.insts = bytecode.disassemble_bytes( + bytecode.disassemble_bytes( co.co_code, varnames=co.co_varnames, names=co.co_names, constants=co.co_consts, cells=bytecode._cell_names, - linestarts=bytecode._linestarts, + line_starts=bytecode._linestarts, asm_format="extended", filename=co.co_filename, show_source=True, @@ -481,12 +481,17 @@ class Scanner37Base(Scanner): next_opname = self.insts[i + 1].opname # 'Continue's include jumps to loops that are not - # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP. - # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD - # then we'll take it as a "continue". - is_continue = ( - self.insts[self.offset2inst_index[target]].opname == "FOR_ITER" - and self.insts[i + 1].opname == "JUMP_FORWARD" + # and the end of a block which follow with + # POP_BLOCK and COME_FROM_LOOP. If the + # JUMP_ABSOLUTE is to a FOR_ITER, and it is + # followed by another JUMP_FORWARD then we'll take + # it as a "continue". + next_inst = self.insts[i + 1] + is_continue = self.insts[ + self.offset2inst_index[target] + ].opname == "FOR_ITER" and next_inst.opname in ( + "JUMP_FORWARD", + "JUMP_ABSOLUTE", ) if self.version < (3, 8) and ( @@ -501,21 +506,65 @@ class Scanner37Base(Scanner): ): opname = "CONTINUE" else: + # "continue" versus "break_loop" dectction is more complicated + # because "continue" to an outer loop is really a "break loop" opname = "JUMP_BACK" + # FIXME: this is a hack to catch stuff like: # if x: continue # the "continue" is not on a new line. - # There are other situations where we don't catch - # CONTINUE as well. - if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval: + # + # Another situation is where we have + # for method in methods: + # for B in method: + # if c: + # return + # break # A "continue" but not the innermost one + if tokens[-1].kind == "JUMP_LOOP" and tokens[-1].attr <= argval: if tokens[-2].kind == "BREAK_LOOP": del tokens[-1] + j -= 1 else: - # intern is used because we are changing the *previous* token - tokens[-1].kind = sys.intern("CONTINUE") - if last_op_was_break and opname == "CONTINUE": - last_op_was_break = False - continue + # "intern" is used because we are + # changing the *previous* token. A + # POP_TOP suggests a "break" rather + # than a "continue"? + if tokens[-2] == "POP_TOP" and ( + is_continue and next_inst.argval != tokens[-1].attr + ): + tokens[-1].kind = sys.intern("BREAK_LOOP") + else: + tokens[-1].kind = sys.intern("CONTINUE") + last_continue = tokens[-1] + pass + pass + pass + # elif ( + # last_continue is not None + # and tokens[-1].kind == "JUMP_LOOP" + # and last_continue.attr <= tokens[-1].attr + # and last_continue.offset > tokens[-1].attr + # ): + # # Handle mis-characterized "CONTINUE" + # # We have a situation like: + # # loop ... for or while) + # # loop + # # if ...: # code below starts here + # # break # not continue + # # + # # POP_JUMP_IF_FALSE_LOOP # to outer loop + # # JUMP_LOOP # to inner loop + # # ... + # # JUMP_LOOP # to outer loop + # tokens[-2].kind = sys.intern("BREAK_LOOP") + # pass + + # if last_op_was_break and opname == "CONTINUE": + # last_op_was_break = False + # continue + pass + else: + opname = "JUMP_FORWARD" elif inst.offset in self.load_asserts: opname = "LOAD_ASSERT" @@ -538,9 +587,10 @@ class Scanner37Base(Scanner): ) pass - if show_asm in ("both", "after"): + if show_asm in ("both", "after") and self.version < (3, 8): print("\n# ---- tokenization:") - for t in tokens: + # FIXME: t.format() is changing tokens! + for t in tokens.copy(): print(t.format(line_prefix="")) print() return tokens, customize diff --git a/uncompyle6/scanners/scanner38.py b/uncompyle6/scanners/scanner38.py index 98fb090d..022f38fa 100644 --- a/uncompyle6/scanners/scanner38.py +++ b/uncompyle6/scanners/scanner38.py @@ -24,13 +24,13 @@ scanner routine for Python 3.7 and up. from typing import Dict, Tuple -from uncompyle6.scanners.tok import off2int -from uncompyle6.scanners.scanner37 import Scanner37 -from uncompyle6.scanners.scanner37base import Scanner37Base - # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_38 as opc +from uncompyle6.scanners.scanner37 import Scanner37 +from uncompyle6.scanners.scanner37base import Scanner37Base +from uncompyle6.scanners.tok import off2int + # bytecode verification, verify(), uses JUMP_OPS from here JUMP_OPs = opc.JUMP_OPS @@ -121,35 +121,26 @@ class Scanner38(Scanner37): new_tokens.append(token) continue - # We also want to avoid confusing BREAK_LOOPS with parts of the - # grammar rules for loops. (Perhaps we should change the grammar.) - # Try to find an adjacent JUMP_BACK which is part of the normal loop end. + j = i + while tokens[j - 1] in ("POP_TOP", "POP_BLOCK", "POP_EXCEPT"): + j -= 1 + if tokens[j].linestart: + break + token_with_linestart = tokens[j] - if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK": - # Sometimes the jump back is after the "break" instruction.. - jump_back_index = i + 1 - else: - # and sometimes, because of jump-to-jump optimization, it is before the - # jump target instruction. - jump_back_index = self.offset2tok_index[jump_target] - 1 - while tokens[jump_back_index].kind.startswith("COME_FROM_"): - jump_back_index -= 1 - pass - pass - jump_back_token = tokens[jump_back_index] - - # Is this a forward jump not next to a JUMP_BACK ? ... - break_loop = token.linestart and jump_back_token != "JUMP_BACK" - - # or if there is looping jump back, then that loop - # should start before where the "break" instruction sits. - if break_loop or ( - jump_back_token == "JUMP_BACK" - and jump_back_token.attr < token.off2int() - ): + if token_with_linestart.linestart: token.kind = "BREAK_LOOP" + pass new_tokens.append(token) + + if show_asm in ("both", "after"): + print("\n# ---- tokenization:") + # FIXME: t.format() is changing tokens! + for t in new_tokens.copy(): + print(t.format(line_prefix="")) + print() + return new_tokens, customize From 164437016513d7f401c0eaf728ab62cf6d479bf3 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 15 Jul 2024 09:46:48 -0400 Subject: [PATCH 7/9] Handle long dict litereals in 3.4- better... We detect them in tokenization and turn this into pseudo instructions --- test/bytecode_3.3/03_map.pyc | Bin 746 -> 746 bytes test/bytecode_3.4/03_map.pyc | Bin 632 -> 632 bytes uncompyle6/scanners/scanner3.py | 165 +++++++++++++++++++++++++------ uncompyle6/scanners/scanner38.py | 2 +- uncompyle6/scanners/tok.py | 2 +- 5 files changed, 139 insertions(+), 30 deletions(-) diff --git a/test/bytecode_3.3/03_map.pyc b/test/bytecode_3.3/03_map.pyc index 9712c1eaa8444333271ba545d686fc6aa5e39583..ef181d74d0449df07af52f790cdf2bf3d8e3a53e 100644 GIT binary patch delta 17 YcmaFG`ihlf9uF^ diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 69ca691c..f8cbf0a1 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -297,9 +297,8 @@ class Scanner3(Scanner): ) return new_tokens - def bound_map_from_inst( - self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int - ): + # FIXME: consider moving to scanner35 + def bound_map_from_inst_35(self, insts: list, next_tokens: list, t: Token, i: int): """ Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can be parsed much faster, but inserting the @@ -314,25 +313,19 @@ class Scanner3(Scanner): if count < 5: return None - if self.version >= (3, 5): - # Newer Python BUILD_MAP argument's count is a - # key and value pair so it is multiplied by two. - collection_start = i - (count * 2) - assert (count * 2) <= i + # Newer Python BUILD_MAP argument's count is a + # key and value pair so it is multiplied by two. + collection_start = i - (count * 2) + assert (count * 2) <= i - for j in range(collection_start, i, 2): - if insts[j].opname not in ("LOAD_CONST",): - return None - if insts[j + 1].opname not in ("LOAD_CONST",): - return None + for j in range(collection_start, i, 2): + if insts[j].opname not in ("LOAD_CONST",): + return None + if insts[j + 1].opname not in ("LOAD_CONST",): + return None - collection_start = i - (2 * count) - collection_enum = CONST_COLLECTIONS.index("CONST_MAP") - # else: Older Python count is sum of all key and value pairs - # Each pair is added individually like: - # LOAD_CONST ("Max-Age") - # LOAD_CONST ("max-age") - # STORE_MAP + collection_start = i - (2 * count) + collection_enum = CONST_COLLECTIONS.index("CONST_MAP") # If we get here, all instructions before tokens[i] are LOAD_CONST and # we can replace add a boundary marker and change LOAD_CONST to @@ -345,7 +338,7 @@ class Scanner3(Scanner): attr=collection_enum, pattr="CONST_MAP", offset="%s_0" % start_offset, - linestart=False, + linestart=insts[collection_start].starts_line, has_arg=True, has_extended_arg=False, opc=self.opc, @@ -363,6 +356,7 @@ class Scanner3(Scanner): has_arg=True, has_extended_arg=False, opc=self.opc, + optype="pseudo", ) ) new_tokens.append( @@ -375,7 +369,7 @@ class Scanner3(Scanner): has_arg=True, has_extended_arg=False, opc=self.opc, - optype=insts[j + 1].optype, + optype="pseudo", ) ) new_tokens.append( @@ -388,7 +382,93 @@ class Scanner3(Scanner): has_arg=t.has_arg, has_extended_arg=False, opc=t.opc, - optype=t.optype, + optype="pseudo", + ) + ) + return new_tokens + + def bound_map_from_inst_pre35( + self, insts: list, next_tokens: list, t: Token, i: int + ): + """ + Try to a sequence of instruction that ends with a BUILD_MAP into + a sequence that can be parsed much faster, but inserting the + token boundary at the beginning of the sequence. + """ + count = t.attr + assert isinstance(count, int) + + # For small lists don't bother + if count < 10: + return None + + # Older Python BUILD_MAP argument's count is a + # key and value pair and STORE_MAP. So it is multiplied by three. + collection_end = i + 1 + count * 3 + + for j in range(i + 1, collection_end, 3): + if insts[j].opname not in ("LOAD_CONST",): + return None + if insts[j + 1].opname not in ("LOAD_CONST",): + return None + if insts[j + 2].opname not in ("STORE_MAP",): + return None + + collection_enum = CONST_COLLECTIONS.index("CONST_MAP") + + new_tokens = next_tokens[:i] + start_offset = insts[i].offset + new_tokens.append( + Token( + opname="COLLECTION_START", + attr=collection_enum, + pattr="CONST_MAP", + offset="%s_0" % start_offset, + linestart=insts[i].starts_line, + has_arg=True, + has_extended_arg=False, + opc=self.opc, + optype="pseudo", + ) + ) + for j in range(i + 1, collection_end, 3): + new_tokens.append( + Token( + opname="ADD_KEY", + attr=insts[j + 1].argval, + pattr=insts[j + 1].argrepr, + offset=insts[j + 1].offset, + linestart=insts[j + 1].starts_line, + has_arg=True, + has_extended_arg=False, + opc=self.opc, + optype="pseudo", + ) + ) + new_tokens.append( + Token( + opname="ADD_VALUE", + attr=insts[j].argval, + pattr=insts[j].argrepr, + offset=insts[j].offset, + linestart=insts[j].starts_line, + has_arg=True, + has_extended_arg=False, + opc=self.opc, + optype="pseudo", + ) + ) + new_tokens.append( + Token( + opname="BUILD_DICT_OLDER", + attr=t.attr, + pattr=t.pattr, + offset=t.offset, + linestart=t.linestart, + has_arg=t.has_arg, + has_extended_arg=False, + opc=t.opc, + optype="pseudo", ) ) return new_tokens @@ -494,8 +574,17 @@ class Scanner3(Scanner): last_op_was_break = False new_tokens = [] + skip_end_offset = None for i, inst in enumerate(self.insts): + + # BUILD_MAP for < 3.5 can skip *forward* in instructions and + # replace them. So we use the below to get up to the position + # scanned and replaced forward + if skip_end_offset and inst.offset <= skip_end_offset: + continue + skip_end_offset = None + opname = inst.opname argval = inst.argval pattr = inst.argrepr @@ -529,17 +618,37 @@ class Scanner3(Scanner): if try_tokens is not None: new_tokens = try_tokens continue - elif opname in ("BUILD_MAP",) and self.version >= (3, 5): - try_tokens = self.bound_map_from_inst( + elif opname in ("BUILD_MAP",): + bound_map_from_insts_fn = ( + self.bound_map_from_inst_35 + if self.version >= (3, 5) + else self.bound_map_from_inst_pre35 + ) + try_tokens = bound_map_from_insts_fn( self.insts, new_tokens, - inst, t, i, ) if try_tokens is not None: - new_tokens = try_tokens - continue + if self.version < (3, 5): + assert try_tokens[-1] == "BUILD_DICT_OLDER" + prev_offset = inst.offset + for j in range(i, len(self.insts)): + if self.insts[j].opname == "STORE_NAME": + new_tokens = try_tokens + skip_end_offset = prev_offset + # Set a hacky sentinal to indicate skipping to the + # next instruction + opname = "EXTENDED_ARG" + break + prev_offset = self.insts[j].offset + pass + pass + else: + new_tokens = try_tokens + continue + pass argval = inst.argval op = inst.opcode diff --git a/uncompyle6/scanners/scanner38.py b/uncompyle6/scanners/scanner38.py index 062ab668..55873c46 100644 --- a/uncompyle6/scanners/scanner38.py +++ b/uncompyle6/scanners/scanner38.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022 by Rocky Bernstein +# Copyright (c) 2019-2022, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index efa93a8a..2cb6d587 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2021, 2023 by Rocky Bernstein +# Copyright (c) 2016-2021, 2023-2024 by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock # From 81922bdb23fb3f9f2dc3ac35e09e5c9cc7e834c9 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 15 Jul 2024 10:01:32 -0400 Subject: [PATCH 8/9] Handle long dict litereals in 3.4- better... Bracket in pseudo op COLLECTION_START ... BUILD_xx --- uncompyle6/scanners/scanner3.py | 166 +++++++++++++++++++++++++------ uncompyle6/scanners/scanner38.py | 2 +- 2 files changed, 139 insertions(+), 29 deletions(-) diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 738cf650..41d51efa 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -216,7 +216,7 @@ class Scanner3(Scanner): collection_type: str, ) -> Optional[list]: """ - Try to a replace sequence of instruction that ends with a + Try to replace a sequence of instruction that ends with a BUILD_xxx with a sequence that can be parsed much faster, but inserting the token boundary at the beginning of the sequence. """ @@ -298,8 +298,8 @@ class Scanner3(Scanner): ) return new_tokens - def bound_map_from_inst( - self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int + def bound_map_from_inst_35( + self, insts: list, next_tokens: list, t: Token, i: int ) -> Optional[list]: """ Try to a sequence of instruction that ends with a BUILD_MAP into @@ -315,25 +315,19 @@ class Scanner3(Scanner): if count < 5: return None - if self.version >= (3, 5): - # Newer Python BUILD_MAP argument's count is a - # key and value pair so it is multiplied by two. - collection_start = i - (count * 2) - assert (count * 2) <= i + # Newer Python BUILD_MAP argument's count is a + # key and value pair so it is multiplied by two. + collection_start = i - (count * 2) + assert (count * 2) <= i - for j in range(collection_start, i, 2): - if insts[j].opname not in ("LOAD_CONST",): - return None - if insts[j + 1].opname not in ("LOAD_CONST",): - return None + for j in range(collection_start, i, 2): + if insts[j].opname not in ("LOAD_CONST",): + return None + if insts[j + 1].opname not in ("LOAD_CONST",): + return None - collection_start = i - (2 * count) - collection_enum = CONST_COLLECTIONS.index("CONST_MAP") - # else: Older Python count is sum of all key and value pairs - # Each pair is added individually like: - # LOAD_CONST ("Max-Age") - # LOAD_CONST ("max-age") - # STORE_MAP + collection_start = i - (2 * count) + collection_enum = CONST_COLLECTIONS.index("CONST_MAP") # If we get here, all instructions before tokens[i] are LOAD_CONST and # we can replace add a boundary marker and change LOAD_CONST to @@ -346,7 +340,7 @@ class Scanner3(Scanner): attr=collection_enum, pattr="CONST_MAP", offset=f"{start_offset}_0", - linestart=False, + linestart=insts[collection_start].starts_line, has_arg=True, has_extended_arg=False, opc=self.opc, @@ -364,6 +358,7 @@ class Scanner3(Scanner): has_arg=True, has_extended_arg=False, opc=self.opc, + optype="pseudo", ) ) new_tokens.append( @@ -376,7 +371,7 @@ class Scanner3(Scanner): has_arg=True, has_extended_arg=False, opc=self.opc, - optype=insts[j + 1].optype, + optype="pseudo", ) ) new_tokens.append( @@ -389,7 +384,93 @@ class Scanner3(Scanner): has_arg=t.has_arg, has_extended_arg=False, opc=t.opc, - optype=t.optype, + optype="pseudo", + ) + ) + return new_tokens + + def bound_map_from_inst_pre35( + self, insts: list, next_tokens: list, t: Token, i: int + ): + """ + Try to a sequence of instruction that ends with a BUILD_MAP into + a sequence that can be parsed much faster, but inserting the + token boundary at the beginning of the sequence. + """ + count = t.attr + assert isinstance(count, int) + + # For small lists don't bother + if count < 10: + return None + + # Older Python BUILD_MAP argument's count is a + # key and value pair and STORE_MAP. So it is multiplied by three. + collection_end = i + 1 + count * 3 + + for j in range(i + 1, collection_end, 3): + if insts[j].opname not in ("LOAD_CONST",): + return None + if insts[j + 1].opname not in ("LOAD_CONST",): + return None + if insts[j + 2].opname not in ("STORE_MAP",): + return None + + collection_enum = CONST_COLLECTIONS.index("CONST_MAP") + + new_tokens = next_tokens[:i] + start_offset = insts[i].offset + new_tokens.append( + Token( + opname="COLLECTION_START", + attr=collection_enum, + pattr="CONST_MAP", + offset=f"{start_offset}_0", + linestart=insts[i].starts_line, + has_arg=True, + has_extended_arg=False, + opc=self.opc, + optype="pseudo", + ) + ) + for j in range(i + 1, collection_end, 3): + new_tokens.append( + Token( + opname="ADD_KEY", + attr=insts[j + 1].argval, + pattr=insts[j + 1].argrepr, + offset=insts[j + 1].offset, + linestart=insts[j + 1].starts_line, + has_arg=True, + has_extended_arg=False, + opc=self.opc, + optype="pseudo", + ) + ) + new_tokens.append( + Token( + opname="ADD_VALUE", + attr=insts[j].argval, + pattr=insts[j].argrepr, + offset=insts[j].offset, + linestart=insts[j].starts_line, + has_arg=True, + has_extended_arg=False, + opc=self.opc, + optype="pseudo", + ) + ) + new_tokens.append( + Token( + opname="BUILD_DICT_OLDER", + attr=t.attr, + pattr=t.pattr, + offset=t.offset, + linestart=t.linestart, + has_arg=t.has_arg, + has_extended_arg=False, + opc=t.opc, + optype="pseudo", ) ) return new_tokens @@ -497,8 +578,16 @@ class Scanner3(Scanner): last_op_was_break = False new_tokens = [] + skip_end_offset = None for i, inst in enumerate(self.insts): + # BUILD_MAP for < 3.5 can skip *forward* in instructions and + # replace them. So we use the below to get up to the position + # scanned and replaced forward + if skip_end_offset and inst.offset <= skip_end_offset: + continue + skip_end_offset = None + opname = inst.opname argval = inst.argval pattr = inst.argrepr @@ -532,17 +621,38 @@ class Scanner3(Scanner): if try_tokens is not None: new_tokens = try_tokens continue - elif opname in ("BUILD_MAP",) and self.version >= (3, 5): - try_tokens = self.bound_map_from_inst( + + elif opname in ("BUILD_MAP",): + bound_map_from_insts_fn = ( + self.bound_map_from_inst_35 + if self.version >= (3, 5) + else self.bound_map_from_inst_pre35 + ) + try_tokens = bound_map_from_insts_fn( self.insts, new_tokens, - inst, t, i, ) if try_tokens is not None: - new_tokens = try_tokens - continue + if self.version < (3, 5): + assert try_tokens[-1] == "BUILD_DICT_OLDER" + prev_offset = inst.offset + for j in range(i, len(self.insts)): + if self.insts[j].opname == "STORE_NAME": + new_tokens = try_tokens + skip_end_offset = prev_offset + # Set a hacky sentinal to indicate skipping to the + # next instruction + opname = "EXTENDED_ARG" + break + prev_offset = self.insts[j].offset + pass + pass + else: + new_tokens = try_tokens + continue + pass argval = inst.argval op = inst.opcode diff --git a/uncompyle6/scanners/scanner38.py b/uncompyle6/scanners/scanner38.py index 022f38fa..a5a0410d 100644 --- a/uncompyle6/scanners/scanner38.py +++ b/uncompyle6/scanners/scanner38.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022 by Rocky Bernstein +# Copyright (c) 2019-2022, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by From b7e1c51243a4907d23923d99fe029031dbe09559 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 15 Jul 2024 10:06:30 -0400 Subject: [PATCH 9/9] Merge --- uncompyle6/scanners/scanner3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index f4cd8222..e613e875 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -299,7 +299,7 @@ class Scanner3(Scanner): def bound_map_from_inst_35( self, insts: list, next_tokens: list, t: Token, i: int - ) -> Optional[list]: + ): """ Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can be parsed much faster, but inserting the @@ -424,7 +424,7 @@ class Scanner3(Scanner): opname="COLLECTION_START", attr=collection_enum, pattr="CONST_MAP", - offset=f"{start_offset}_0", + offset="%s_0" % start_offset, linestart=insts[i].starts_line, has_arg=True, has_extended_arg=False,