From c6ddefcef514cd94c000845805db508d85857f49 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 8 Dec 2019 13:12:05 -0500 Subject: [PATCH] Merging in recent 3.7 and 3.8 improvements from decompyle6 This rebases 3.7, 3.8 ...decompilation off of 3.7ish rather than a 3.4 base. Add more 3.7 and 3.8 tests --- pytest/test_grammar.py | 16 +- ...ns.pyc => 15_mixed_expressions.pyc-notyet} | Bin test/bytecode_3.7_run/00_chained-compare.pyc | Bin 0 -> 231 bytes .../03_ifelse_chained_for.pyc | Bin 0 -> 562 bytes test/bytecode_3.7_run/06_while_return.pyc | Bin 0 -> 675 bytes .../bytecode_3.7_run/14_mixed_expressions.pyc | Bin 0 -> 851 bytes test/bytecode_3.8_run/00_chained-compare.pyc | Bin 231 -> 235 bytes test/bytecode_3.8_run/00_docstring.pyc | Bin 0 -> 2274 bytes test/bytecode_3.8_run/01_chained_compare.pyc | Bin 0 -> 783 bytes test/bytecode_3.8_run/01_extra_iter.pyc | Bin 0 -> 554 bytes test/bytecode_3.8_run/01_fstring.pyc | Bin 0 -> 1609 bytes test/bytecode_3.8_run/04_def_annotate.pyc | Bin 0 -> 9426 bytes test/bytecode_3.8_run/04_for_no_jump_back.pyc | Bin 0 -> 446 bytes .../bytecode_3.8_run/14_mixed_expressions.pyc | Bin 0 -> 855 bytes .../bug37/03_ifelse_chained_for.py | 24 + test/simple_source/bug38/01_named_expr.py | 5 + .../bug38/04_for_no_jump_back.py | 17 + .../expression/14_mixed_expressions.py | 53 + uncompyle6/parsers/parse37.py | 1239 +++++++++++++++- uncompyle6/parsers/parse37base.py | 1304 +++++++++++++++++ uncompyle6/parsers/parse38.py | 123 +- uncompyle6/scanners/scanner37.py | 34 +- uncompyle6/scanners/scanner37base.py | 1110 ++++++++++++++ uncompyle6/scanners/scanner38.py | 48 +- uncompyle6/scanners/tok.py | 7 + uncompyle6/semantics/consts.py | 25 +- uncompyle6/semantics/customize3.py | 4 +- uncompyle6/semantics/customize38.py | 17 +- uncompyle6/semantics/pysource.py | 15 +- uncompyle6/semantics/transform.py | 27 +- 30 files changed, 3971 insertions(+), 97 deletions(-) rename test/bytecode_2.7_run/{15_mixed_expressions.pyc => 15_mixed_expressions.pyc-notyet} (100%) create mode 100644 test/bytecode_3.7_run/00_chained-compare.pyc create mode 100644 test/bytecode_3.7_run/03_ifelse_chained_for.pyc create mode 100644 test/bytecode_3.7_run/06_while_return.pyc create mode 100644 test/bytecode_3.7_run/14_mixed_expressions.pyc create mode 100644 test/bytecode_3.8_run/00_docstring.pyc create mode 100644 test/bytecode_3.8_run/01_chained_compare.pyc create mode 100644 test/bytecode_3.8_run/01_extra_iter.pyc create mode 100644 test/bytecode_3.8_run/01_fstring.pyc create mode 100644 test/bytecode_3.8_run/04_def_annotate.pyc create mode 100644 test/bytecode_3.8_run/04_for_no_jump_back.pyc create mode 100644 test/bytecode_3.8_run/14_mixed_expressions.pyc create mode 100644 test/simple_source/bug37/03_ifelse_chained_for.py create mode 100644 test/simple_source/bug38/01_named_expr.py create mode 100644 test/simple_source/bug38/04_for_no_jump_back.py create mode 100644 test/simple_source/expression/14_mixed_expressions.py create mode 100644 uncompyle6/parsers/parse37base.py create mode 100644 uncompyle6/scanners/scanner37base.py diff --git a/pytest/test_grammar.py b/pytest/test_grammar.py index e2f3d2e7..5248d8a1 100644 --- a/pytest/test_grammar.py +++ b/pytest/test_grammar.py @@ -20,8 +20,12 @@ def test_grammar(): (lhs, rhs, tokens, right_recursive, dup_rhs) = p.check_sets() # We have custom rules that create the below - expect_lhs = set(["pos_arg", "attribute"]) + expect_lhs = set(["pos_arg"]) + if PYTHON_VERSION < 3.8: + if PYTHON_VERSION < 3.7: + expect_lhs.add("attribute") + expect_lhs.add("get_iter") else: expect_lhs.add("async_with_as_stmt") @@ -31,7 +35,7 @@ def test_grammar(): expect_right_recursive = set([("designList", ("store", "DUP_TOP", "designList"))]) - if PYTHON_VERSION < 3.7: + if PYTHON_VERSION <= 3.7: unused_rhs.add("call") if PYTHON_VERSION > 2.6: @@ -50,9 +54,11 @@ def test_grammar(): ) ) if PYTHON_VERSION >= 3.0: - expect_lhs.add("annotate_arg") - expect_lhs.add("annotate_tuple") - unused_rhs.add("mkfunc_annotate") + if PYTHON_VERSION < 3.7: + expect_lhs.add("annotate_arg") + expect_lhs.add("annotate_tuple") + unused_rhs.add("mkfunc_annotate") + unused_rhs.add("dict_comp") unused_rhs.add("classdefdeco1") unused_rhs.add("tryelsestmtl") diff --git a/test/bytecode_2.7_run/15_mixed_expressions.pyc b/test/bytecode_2.7_run/15_mixed_expressions.pyc-notyet similarity index 100% rename from test/bytecode_2.7_run/15_mixed_expressions.pyc rename to test/bytecode_2.7_run/15_mixed_expressions.pyc-notyet diff --git a/test/bytecode_3.7_run/00_chained-compare.pyc b/test/bytecode_3.7_run/00_chained-compare.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e27d3021c42c4959a7aec77fdea5637c2f3b319c GIT binary patch literal 231 zcmX|5y{f`c3{Fl<5k>JC+&X9rE)F6p=-}XTw>lJULoM2#lG7rtzKySRR;T+4cQO^O zU&!|ZA0g|0zY7BH1&3Pn)rNmKVwSRICzO500R<667^w^hTyWr`ctZGfO7g695lSzG z3?pf^dQoOuI}c3p2YmI5a&Ygv7TMy>BH6*p7~(qD<%U`F6WEi=hyHw%7uA(wqZ>{r pI5pWc48kx@&d0o@be!m-Ix%;!Hr1O>cWKx3W6Y^&7H<#OzMmkm2{(ifeS(Z zz<<#nn60ZU`~g?qsakt3=P)@r+&Lth6B9*XOkaJTTsZ(A_TdjXKGd1|iG_gh0ggce zIpmTX!o47I=IGf=yhC<#lpk?de%-NQ*5mB%h?^%bMBZWj)`;&?Vd&ouecnxC;1@Xm zXniUkzKT76$n1r$oHE!n*fJOxj0`G+v9i4e z^0lBl=iNy9I_arEF131LZFOn6=6BjM($Wu3o1Iva-%iwG_r{cBKZu%IlWf=Q4qave z6j;F`>)$wzaMU=z$?Ok2RJKXRgq`sH&BCiI+g#b6d-Fe8Gr6I)RF|D3-c~A6|0RD; ZCC(ae?L47gB-aPG+bg`OGTN!|d;>f&c})NS literal 0 HcmV?d00001 diff --git a/test/bytecode_3.7_run/06_while_return.pyc b/test/bytecode_3.7_run/06_while_return.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7384dbf3ae6365834eb7798d5f224eabec81b950 GIT binary patch literal 675 zcma)4&1w`u5U%Q(*_~ao%Nj%kk3wKBi9Z_=8ARjHF$Y&r7GX)Y8y%9J3Dq+(k?~@D z86UuxnX4e|D|qs&SrRultLdulZ|d)>p1r}KPmud}e}4FFh<@qiQX(9lVf!yomMnj# zDOqC!SaxJXtjKmQV2_O7D$TbQ8rg1(c7dkX&=|fE&^Im0n&HB<(e?_7fT+)@BDOc7 zq(2YQfQs4}`vtb&gE~uVlC)+?&qtPPwtSoJ5Ys!#Z-!LTQD3$hcSiRUdQWvAkFEJ? zYBPb4C1elwmyAqHu&0EZ8&fC6%j`7sS&_dIDI^ISK4|!`;rTSqtHsQ_y!=7ZMZlje zpDj*~-OLx2%-zmm_3`2E&b>#o&z~}&;>t?$?UVCnkmXsK9hC5Nx$Pj-kD!SAY& z?wF86mL9OVwxXi;itZC!&c>iS2Ho-h(Y5O|MQO^4rUW=AMs%Q}@7%IkDXrKHul4(2 z6PVwCT5nkN*ZG(A^-`0*-A{s+Mff?EIp literal 0 HcmV?d00001 diff --git a/test/bytecode_3.7_run/14_mixed_expressions.pyc b/test/bytecode_3.7_run/14_mixed_expressions.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a67177d9cfabae9e0656ff80d1dc880944f374f4 GIT binary patch literal 851 zcmY+C&ui2`6vy98CYxBZaoxt+TD^HGXj?(>VClwcK~c71i)cVXai+_5e`Lw*mR%1b z)`Le8FN%MFf(QQq5B>?BbG0Bj2p&a*_03Gy>V&*}-{*bb%Y?~VxjY59KJI;gP_qEO zs${Y-hY)Z1fdFveqzw@W*d-k1%#+Rmx8~s_L{w1DTDCmbx1JJiM~s&Q;}AfL%1|-P z>;Qz#oqd9AOS6*6c`1N>!d>L;zx)W2L;g(9okBly^u5yg(Z~@F_ojiDk-gKs4)?Eu z=EsnI(moIOQxR+)xQFbEX2&Cjd-9M~ksWB}8+Hcta~k>YfBR4A{ue}6H4TjWv~l|_ ziWT$y#he-7RMR!(S8uy)yoTPJ&{g0S?b|Jgf7AF*X!@j}v^Md*oRC|_XS((GbFv8w zGr2m$dXDD?4J852hK)C`)hdULw*z>-a&YN!Q3A-UW_KsEy4&4H>^#b%@MAnO&65~} zIYr(4HXe_2^?UqZ597}{Lz>$diWw{{#LrspsG5sm!Obi=l9{(BQrS$pF}^d2{M+}p zu5Z-iyX$v0Z*0^v|B93%?PGmCO_MYu!_3}kC)<5&?qfQ74`~&r>F{e=Haoqxh-ETJ zABrV0?4?4g;Y$}T#+~L+@c59)#oj1$S2_tFw8g60uDpi9C9~9Hj8sTKX7Q?6O1nfe Usx(s>s;XALRxa^r6}gFj01CgtBLDyZ literal 0 HcmV?d00001 diff --git a/test/bytecode_3.8_run/00_chained-compare.pyc b/test/bytecode_3.8_run/00_chained-compare.pyc index 39d6db471fad0329e6bd61abdd5c03b398bcceae..0dec6d9fe4ec7edec44f2f7d164f40f79ad33f10 100644 GIT binary patch delta 29 fcmaFP_?l5Pl$V!_0SJn?^`lFG^kWcbqTn?EUXTVk delta 25 dcmaFO_?%JDlb4r^0SMwt^rB0E^y7)5*8olJ27&+p diff --git a/test/bytecode_3.8_run/00_docstring.pyc b/test/bytecode_3.8_run/00_docstring.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a64cdf185947483bb225c3bb835afea5246748cf GIT binary patch literal 2274 zcmbW2OOMk?5P)sx<;l#1HwUC$wTy&g6hdZrE@DQDP$Uj0NDN%iAj`Nq6GLK$?lw+_L@I2nk)Z;3siMXFry$VwB;&Sr@QN`PIdMD)z!Lz?aqxe5C2^< zj8A0bn}x=o_{z5^z<6MQ3D!Tx0~1P6Mr}a_s;Em)0~>W2mY|Nh0?V+1x(ch%KwX1F za2T}>N8l*xB{&AhQP;Wow_zIG(z2vwS<8x+RV`~;+FCAYStki6whZs&=d=AZ4%v7Z zgnW<;feUtyJ&)9YH66#s2ez983)Y#ef5kRnudGK(aVt zyx-@cVoA(g#a}45w_75=Yl3`=5{Vi0^s7RBm;eT)!xYWZ_M&KlViPurIWCA|y^(W4 z?iO|7Ef&4g^K|qV3Zh>|6x!}@(UZL>nvQVq+XZ;8>wZ6>Xm}cZ+tJVL=uT~xp{j<~x6at+<|Z4x9P+rCI+OR4-N}EGkCXo$4}ZFZZ-#rN0QV63 zab~y`LG~};{;?45-iN)9dmj#>UM)bSBH7BJlD#-;@D7VA?$5Nf+im@Ue?IMJR@Ty%J+*#uFoHtTcMqfab4bg?ey23 zYixuw`Yq<{WX2AD53=uDZ@l`<;XNdh(^8#0dW@x6vLsb90*-tv5FEk7VTxt^+HB6j z&{SKbF$L1h0kxQ$_4+#M@iT=-h@ED=B<6Zy`6L>TIkac%63bmK)1i{heh>{gY;;;I zP>PSn3XQ9jzFG1<3SJdt#1YJZ%ajpxjTw!yX?+pY*YlRAGpS!+B!Wu4prR_M)Cnpm rf{vz6ctNS8XU@Gg?CYZh(jniZdrCG@lr6htmvc|uuG_y=&4bTZWD=*t literal 0 HcmV?d00001 diff --git a/test/bytecode_3.8_run/01_chained_compare.pyc b/test/bytecode_3.8_run/01_chained_compare.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51796abebd85d5d6fd563a23845e11986c92c1a9 GIT binary patch literal 783 zcmah{OK;Oa5T02(scX_AP*oKPBny>DzEBWITnZ`zE*!!kJS(cm@vK^7$2RNuKxG6X z{2Bg|ubliBxP+PQLj?}ZYIh!H;lXnS@Kc&6oOXNr~V;|P5v0d zOEK~jXCk)4`w8!A$OmBqE{9Sy8>CXKZsvS*4&;#9gav*>0%)5wcJADr#79w*YnsHxtc;AlU4Ck>{1ux-J+4qm8Nf|6ebzBI_*}$3C%DZ{114aS zcuI0hNSYvb5ptKDF*o1ROE~pwvRp2|bn=1)8mbnf)+weRtb20E*MDJ6S28bBAf z(8&1z04AUWw(b0VHs;Xa0-wUc@Uv>zZq&7tNhq^Tm^#K2kRLssqZ+feK%G%142{fz z8|`E|4>^;)7OBPLzd+-4&OHS>!0Tu(v)>80PO7{#@`sE<*LK#juo*@&E)*9g#2e^)X^wn=|W08gc literal 0 HcmV?d00001 diff --git a/test/bytecode_3.8_run/01_extra_iter.pyc b/test/bytecode_3.8_run/01_extra_iter.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af3c17b9c60874308746ee14259ad2f9f3e94bb8 GIT binary patch literal 554 zcmcJL%}N6?6or$_^oL?aT)A{1#e(hh2TRemD(cpaC{ie;PU19lrkN%gX-m6NU%)r0 zOJBp4FW_djzC~Bwh)8kog>$&~Lhd2oRW_SJP#-t%oi~iorx{i&0mCs!?SWC$Mif)x zkv3+STGW0*3@r#vyx=qRhzCUNYU=^hjiKmP+JUbXI&%3U@Fh+VFkEQXUf)9|^{hFmLU z($io`M9>;Ynqi4^7kRr)Myj1;0 zGL36AxFOShlj&}VVh1SUjOQdn|B}p|(SgNnF(z%4$Hl~XUgjBznR&kRwvTvsK+lU}_5x&~ zLd2cxi1#q!?=0f3@PZOAi8($d=J~jIUzVOuoZpmV*|`=5mK?{p4=`?AEb>WF<+tP{ zo_Gm&F5_xvWRMdd?$H+zf0gCD$ID__mc>VBOwD^$uj-zhvItV^X`X;XuQowuEe_#cr5L)r*2wBE7WW#fj>Mo!GksFSWgD?{NJ= zI`^i}!7G>QVbwbvx^RZMRyC~RRme8}Ygp6JxP;BrZTCYziJ~B`U&7KQUB=Y4G{3g_ zeYH{DNz1Rz*Q%vPO+!n=t_EL&(C{6n>ms&ru3Ec(N=vTJwPL^9jVY>1ZtUP^z2u^? zoD+(pIk{_egIXVhk`|{)@ zwTDbPg|?B>Gkj)$nZq)vwfgY_w^keC5*B=OzR@x~z5wzFcg&C+Q^Xy%e8sE?IPmUy?fE5Tp8x;KnJd6w0`eGlJd31*>37N}aBSWn$tVoi&-4d|MQEDe zJTjTjh$`!yD71`}y_Bk0D7AMN+HJ~P=3jyBM%bar^mc39sCBEJ>H zTgZ%DQ(Bv^4RO)zY19p|8N1z;fr#HO;EeKoYSNdH5MV&@&&4wK6}9L;>|Zs9`t*~x zkZ4`^(ZV*Cz1?eRHYDAtadY%`789?pzry;kx>=Ucn6VbmRyg&7a- z22rP}qJD@dn32hvQl8;nP2iBrpm#R*Bhe4!mccyUM#2yo2GfX(Lnon}E5aDeKoN?} iMg0U6sj<(XozWt5GVe*05X~#bjLow!-g5GeV53P7e}DGxF^g7)8#* zD4SHdD%+%3WUF70O@2hS>20dw(mFd9NxtuN_q;S31Vwu4-nq|n&OP5b=ia`3r?+>b zh36mp|9UsrW?BDarSa)PWfUoxa4kz7`<8Es?c3r!^~4o#DN}G{A#Rn-Q&+Omg*I1y zm5lUsWVdWUYsU9#ts9?aWRvtPd4;SjwzO}SEg0P;Kd@c4K6PZ9Y)4BsYW+`b*&zd% zw?TdlY8z!I`gh6e=-(v2kzU#T)K13jk-bY@g&x_5ah|*Z>2mTWX7{>CqIMOv*HODB*HOEU+HTZt$Oou>fZ86^K9rlN-9&9KY9BS##^e_I zZlP}<`aYIVQ2PY6H&FXj#!(wbEr;4|xr5pr)ZSFh?T6%_0qxJ_3&?jso=}@;`w}u2 zVhkmrBO>Wq&4hJ2-O+{!Plc<{D`t))t(8kWN8%Dvo! zMBo9F;mWtTZX+VM?P!1Q7*(H zkHl0ru(yrShVd+xXoM?CC88cCRXP;-P#7CY1z<&NXvZ1*`Okmivtu;o?X8>LadJ-C zZ9y4Ywo5HKFa+oMa+qEWQvsGeL+!MZse#n6-V>aZ3V>j4t-L5Rvo&R_yc=krq!20X zew4@ZPF=7j^CKusmpFquiIFY4O_;j%*^x*4i8<=J>JXmVP9lj>4WOL274%6IYY)4b z6iHeN8J3+UcFuEbj9x=|Noqp_q9TbY#Q+G~vL@1Mi>%q~z#`M!>F z-j3|YlPNF$(>4jD^4g3-Xe~OS4VQMynPia=l!PL8x)SESLI%za-`RgnWnVy{vYXOyur=c=^h!oIYV*ZQRS!(_g z>pN5Mrm8_$@aC!}G+d>57g^*zcqBo4kWZZ;RkE=hL#ZO$MA;j+E|vm+*^UDR)sQ|c zGhIUJ5DX)`9C@=<@uLkB^X0N{u<^GFU{}wQi1N10DU=uXl`1(n!2GOV33K68Da>JO zF3rz`K`yN3PC_6?g_^yi-0V@rhaSZvj#z0dv7^i!1PBd7V5<~fxmq1QhmKSb3boJ^ zPWd^RemE_DMF>v#c+QW!DSu{eVJPhjswQ`6c0LGm6CBKCCURGD$MOq%InMY2#LrdB zF*TllO1X*0IYgq^2{oJ$-Wuj8t7lO$s^-Lt?cG>W-d^avta2rEbUn+--lfD^oY}=P z-Uq38eV+Lx%9BV<4|$Cu1>6dKt6+DA{=lY$K7JoQ!IWrUz&+~%w@1>5J>gHplZ$|yOQ$nwzdAsOl4%1 zsqjGDWGXNZM~#K1;^7@FBiWj%bj7A(-o`9MYsHlaH=#|-h=Udi`mE1hl$vgeYBWlw zjlJ9i$rPM+_A;_wdolD#bCFpK8-k;ezJm>PU^8Y2U<1)Qx;N1A0g^5N=(7IG&^`VN z(M2>+x0_ayb*K@_B05Zd9fvG-aU>N5!SyglnmLj8s@vlCDe0p=qtw;}Fr zWR+g6hlCjBXYFG4$rp!N!5t+71^gmQEe3WOZ3UzR=4b6-?!dVN%wwSXWl9+^v;HzL z#}jSLP9G|11CshRJ+#TMafmBpU&cW$#x}bS?@rzF{|fN8kTg-VVFXOvaoLTXP$3#c z`CzqRb2W&BDx{{BS#;ta)bb8&cL@gW7$}?1b8x^`eH??~Wi%H!x}kQU^vIK;o7*l~P8$W1XNiJueH=*_Q&z za06d8vjaNj9454G5%j1kQYUQAt`{4_)6AL?uF}A!9Wou*bU@~FP@PRF0~yv=Aw#1d zgnRs_7Rz!A%;zEo>T}pX0?bo??hI@8uAblt}yU8Zt0WGxd+hXss#ChNZ+d}6@Y_Di>cUhr}mcd$!hZcNz3{KTr zGHAi)w%|Hj(%TdjU>^v_M`7~T_u+gNSId>kaI~eEe(IX8Rs!4&Y`zc#z6nvgVoWu@ zyQm*zu_hk%TNl<6W_Z$hgrpbFmc31XhOIaht4O-9=$VeoPalfMy1wvI|EW5{SRHPn z4kuasV3@X0L9sf}vS=Z8>0?J4EMko->+F3Wnbg%z6#s2#Jd>u z15)!6**b~T^l(L^NS$_!?QO1hn(UoKUhQGb@cp*NgQ|bdOVyf3>fdv56cBGt@3mRT zdV^MSQZt`|(webrse#$MFT|`v=;9gfNlF-@i}e+%BXTE72H#;^Tnn2LTe;2^YD?q5 zih4h9;qM{gIWW3HODSO90@m{_%0D01=il$csLH4tTFBVXWqq7cv~sRtL{q!evryzr zP2(u^78>z4!9Gq~*jzfCdz&AH4(E7%mv=CogX%*JcgKZI4TB0B^lp;h0@M#M}8f?HpuKCvms`AX73;?Y%X5Da`nQUo41QMZ+vv) zwtlJ5PX>n9^^qsjlcC;h>z$=uZRrh&z69%=jvnsx@SyvY2JAS@F-#dcvRZ-BkU&Tq z;yyu-y3fV;splCU0zo$5_PYc4?R2-fn~?^Py4`NC8|{6bXFIlIIsfg+^tpZRMjOBX j^kg?@R{#35{b=7~583<0Q3bi? literal 0 HcmV?d00001 diff --git a/test/bytecode_3.8_run/04_for_no_jump_back.pyc b/test/bytecode_3.8_run/04_for_no_jump_back.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e42d91e4e95891a50848950b95a9fefc54fb209d GIT binary patch literal 446 zcmY+9zfJ-{5XNWj_Kph)6vh~1tn9E5qZSm#NcTRS4Si6a@?L%A z4?VLx!xz+`{679r!Sra22F2Zw>CHbHaEE~Dy`!qshFX;PZ1fw>ou`5ePP4}{hfC;g zFZ?;uhQb9-#oQx-JWU&TkKDm;EknM!YNqExR5> ztOt)GUKIZT1rPoK9{dwL=W0Q65Il+q>zkRZtrPO{eV_MzFB2yBDwQd~^=aqF!@33V zO(h2xG6?XN9|-^lPTCNHfNjEI&OGTHaBCipLr4YXtYynHed`(FcF1^1Fb+Pns04UT#Z*MsOw&XDFdhGGT_3h}F+msQQdu;65t9Ldbx5vh!mZiMelLhsJT z`i<2_bbsaU+RfEQ=3S9eq!+m+WbIAoT(jFBqw$t+$~OKFE_ UMwMnNLsiqt)5;+ptsyt@FIptRHUIzs literal 0 HcmV?d00001 diff --git a/test/simple_source/bug37/03_ifelse_chained_for.py b/test/simple_source/bug37/03_ifelse_chained_for.py new file mode 100644 index 00000000..e82d978a --- /dev/null +++ b/test/simple_source/bug37/03_ifelse_chained_for.py @@ -0,0 +1,24 @@ +# From decompyle3/semantics/customize3.py +# The bug is handling "for" loop inside the +# chained compare ifelse +def n_classdef3(a, b, c, l): + r = 1 + if 3.0 <= a <= 3.2: + for n in l: + if b: + break + elif c: + r = 2 + pass + pass + else: + r = 3 + pass + return r + +assert n_classdef3(10, True, True, []) == 3 +assert n_classdef3(0, False, True, []) == 3 +assert n_classdef3(3.1, True, True, []) == 1 +assert n_classdef3(3.1, True, False, [1]) == 1 +assert n_classdef3(3.1, True, True, [2]) == 1 +assert n_classdef3(3.1, False, True, [3]) == 2 diff --git a/test/simple_source/bug38/01_named_expr.py b/test/simple_source/bug38/01_named_expr.py new file mode 100644 index 00000000..30893d77 --- /dev/null +++ b/test/simple_source/bug38/01_named_expr.py @@ -0,0 +1,5 @@ +(x, y) = "foo", 0 +if x := __name__: + y = 1 +assert x == "__main__", "Walrus operator changes value" +assert y == 1, "Walrus operator branch taken" diff --git a/test/simple_source/bug38/04_for_no_jump_back.py b/test/simple_source/bug38/04_for_no_jump_back.py new file mode 100644 index 00000000..84d425dc --- /dev/null +++ b/test/simple_source/bug38/04_for_no_jump_back.py @@ -0,0 +1,17 @@ +# from mult_by_const/instruction.py +# Bug in 3.8 was handling no JUMP_BACK in "for" loop. It is +# in the "if" instead +def instruction_sequence_value(instrs, a, b): + for instr in instrs: + if a: + a = 6 + elif b: + return 0 + pass + + return a + +assert instruction_sequence_value([], True, True) == 1 +assert instruction_sequence_value([1], True, True) == 6 +assert instruction_sequence_value([1], False, True) == 0 +assert instruction_sequence_value([1], False, False) == False diff --git a/test/simple_source/expression/14_mixed_expressions.py b/test/simple_source/expression/14_mixed_expressions.py new file mode 100644 index 00000000..f3f20a55 --- /dev/null +++ b/test/simple_source/expression/14_mixed_expressions.py @@ -0,0 +1,53 @@ +# Covers a large number of operators +# +# This code is RUNNABLE! + +import sys +PYTHON_VERSION = sys.version_info[0] + (sys.version_info[1] / 10.0) + +assert PYTHON_VERSION >= 3.7 + +# some floats (from 01_float.py) + +x = 1e300 +assert 0.0 == x * 0 +assert x * 1e300 == float("inf") +assert str(float("inf") * 0.0) == "nan" +assert str(float("-inf") * 0.0) == "nan" +assert -1e300 * 1e300 == float("-inf") + +# Complex (adapted from 02_complex.py) +y = 5j +assert y ** 2 == -25 +y **= 3 +assert y == (-0-125j) + + +# Tests BINARY_TRUE_DIVIDE and INPLACE_TRUE_DIVIDE (from 02_try_divide.py) +x = 2 +assert 4 / x == 2 + +x = 5 +assert x / 2 == 2.5 +x = 3 +x /= 2 +assert x == 1.5 + +x = 2 +assert 4 // x == 2 +x = 7 +x //= 2 +assert x == 3 + +x = 3 +assert x % 2 == 1 +x %= 2 +assert x == 1 + +assert x << 2 == 4 +x <<= 3 +assert x == 8 + +assert x >> 1 == 4 +x >>= 1 +assert x == 4 diff --git a/uncompyle6/parsers/parse37.py b/uncompyle6/parsers/parse37.py index 1859ba66..4e80de69 100644 --- a/uncompyle6/parsers/parse37.py +++ b/uncompyle6/parsers/parse37.py @@ -13,28 +13,592 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . """ -spark grammar differences over Python 3.6 for Python 3.7 +Python 3.7 grammar for the spark Earley-algorithm parser. """ from __future__ import print_function -from uncompyle6.parser import PythonParserSingle +from uncompyle6.parser import PythonParserSingle, nop_func from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6.parsers.parse36 import Python36Parser +from uncompyle6.parsers.parse37base import Python37BaseParser -class Python37Parser(Python36Parser): +class Python37Parser(Python37BaseParser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python37Parser, self).__init__(debug_parser) self.customized = {} + ############################################### + # Python 3.7 grammar rules + ############################################### + def p_start(self, args): + """ + # The start or goal symbol + stmts ::= sstmt+ + """ + + def p_call_stmt(self, args): + """ + # eval-mode compilation. Single-mode interactive compilation + # adds another rule. + call_stmt ::= expr POP_TOP + """ + + def p_stmt(self, args): + """ + pass ::= + + _stmts ::= stmt+ + + # statements with continue + c_stmts ::= _stmts + c_stmts ::= _stmts lastc_stmt + c_stmts ::= lastc_stmt + c_stmts ::= continues + + lastc_stmt ::= iflaststmt + lastc_stmt ::= forelselaststmt + lastc_stmt ::= ifelsestmtc + + c_stmts_opt ::= c_stmts + c_stmts_opt ::= pass + + # statements inside a loop + l_stmts ::= _stmts + l_stmts ::= returns + l_stmts ::= continues + l_stmts ::= _stmts lastl_stmt + l_stmts ::= lastl_stmt + + lastl_stmt ::= iflaststmtl + lastl_stmt ::= ifelsestmtl + lastl_stmt ::= forelselaststmtl + lastl_stmt ::= tryelsestmtl + + l_stmts_opt ::= l_stmts + l_stmts_opt ::= pass + + suite_stmts ::= _stmts + suite_stmts ::= returns + suite_stmts ::= continues + + suite_stmts_opt ::= suite_stmts + + # passtmt is needed for semantic actions to add "pass" + suite_stmts_opt ::= pass + + else_suite ::= suite_stmts + else_suitel ::= l_stmts + else_suitec ::= c_stmts + else_suitec ::= returns + + stmt ::= assert + + stmt ::= classdef + stmt ::= call_stmt + + stmt ::= ifstmt + stmt ::= ifelsestmt + + stmt ::= whilestmt + stmt ::= while1stmt + stmt ::= whileelsestmt + stmt ::= while1elsestmt + stmt ::= for + stmt ::= forelsestmt + stmt ::= try_except + stmt ::= tryelsestmt + stmt ::= tryfinallystmt + + stmt ::= del_stmt + del_stmt ::= DELETE_FAST + del_stmt ::= DELETE_NAME + del_stmt ::= DELETE_GLOBAL + + stmt ::= return + return ::= ret_expr RETURN_VALUE + + # "returns" nonterminal is a sequence of statements that ends in a RETURN statement. + # In later Python versions with jump optimization, this can cause JUMPs + # that would normally appear to be omitted. + + returns ::= return + returns ::= _stmts return + + stmt ::= genexpr_func + genexpr_func ::= LOAD_FAST _come_froms FOR_ITER store comp_iter JUMP_BACK + """ + pass + + def p_expr(self, args): + """ + expr ::= _mklambda + expr ::= LOAD_CODE + expr ::= LOAD_CONST + expr ::= LOAD_DEREF + expr ::= LOAD_FAST + expr ::= LOAD_GLOBAL + expr ::= LOAD_NAME + expr ::= LOAD_STR + expr ::= binary_expr + expr ::= list + expr ::= compare + expr ::= dict + expr ::= and + expr ::= or + expr ::= unary_expr + expr ::= call + expr ::= unary_not + expr ::= subscript + expr ::= subscript2 + expr ::= yield + expr ::= generator_exp + + binary_expr ::= expr expr binary_op + binary_op ::= BINARY_ADD + binary_op ::= BINARY_MULTIPLY + binary_op ::= BINARY_AND + binary_op ::= BINARY_OR + binary_op ::= BINARY_XOR + binary_op ::= BINARY_SUBTRACT + binary_op ::= BINARY_TRUE_DIVIDE + binary_op ::= BINARY_FLOOR_DIVIDE + binary_op ::= BINARY_MODULO + binary_op ::= BINARY_LSHIFT + binary_op ::= BINARY_RSHIFT + binary_op ::= BINARY_POWER + + unary_expr ::= expr unary_op + unary_op ::= UNARY_POSITIVE + unary_op ::= UNARY_NEGATIVE + unary_op ::= UNARY_INVERT + + unary_not ::= expr UNARY_NOT + + subscript ::= expr expr BINARY_SUBSCR + + get_iter ::= expr GET_ITER + + yield ::= expr YIELD_VALUE + + _mklambda ::= mklambda + + expr ::= conditional + + ret_expr ::= expr + ret_expr ::= ret_and + ret_expr ::= ret_or + + ret_expr_or_cond ::= ret_expr + ret_expr_or_cond ::= ret_cond + + stmt ::= return_lambda + + return_lambda ::= ret_expr RETURN_VALUE_LAMBDA LAMBDA_MARKER + return_lambda ::= ret_expr RETURN_VALUE_LAMBDA + + compare ::= compare_chained + compare ::= compare_single + compare_single ::= expr expr COMPARE_OP + + # A compare_chained is two comparisions like x <= y <= z + compare_chained ::= expr compare_chained1 ROT_TWO POP_TOP _come_froms + compare_chained2 ::= expr COMPARE_OP JUMP_FORWARD + + # Non-null kvlist items are broken out in the indiviual grammars + kvlist ::= + + # Positional arguments in make_function + pos_arg ::= expr + """ + + def p_function_def(self, args): + """ + stmt ::= function_def + function_def ::= mkfunc store + stmt ::= function_def_deco + function_def_deco ::= mkfuncdeco store + mkfuncdeco ::= expr mkfuncdeco CALL_FUNCTION_1 + mkfuncdeco ::= expr mkfuncdeco0 CALL_FUNCTION_1 + mkfuncdeco0 ::= mkfunc + load_closure ::= load_closure LOAD_CLOSURE + load_closure ::= LOAD_CLOSURE + """ + + def p_generator_exp(self, args): + """ + """ + + def p_jump(self, args): + """ + _jump ::= JUMP_ABSOLUTE + _jump ::= JUMP_FORWARD + _jump ::= JUMP_BACK + + # Zero or more COME_FROMs - loops can have this + _come_froms ::= COME_FROM* + _come_froms ::= _come_froms COME_FROM_LOOP + + # One or more COME_FROMs - joins of tryelse's have this + come_froms ::= COME_FROM+ + + # Zero or one COME_FROM + # And/or expressions have this + come_from_opt ::= COME_FROM? + """ + + def p_augmented_assign(self, args): + """ + stmt ::= aug_assign1 + stmt ::= aug_assign2 + + # This is odd in that other aug_assign1's have only 3 slots + # The store isn't used as that's supposed to be also + # indicated in the first expr + aug_assign1 ::= expr expr + inplace_op store + aug_assign1 ::= expr expr + inplace_op ROT_THREE STORE_SUBSCR + aug_assign2 ::= expr DUP_TOP LOAD_ATTR expr + inplace_op ROT_TWO STORE_ATTR + + inplace_op ::= INPLACE_ADD + inplace_op ::= INPLACE_SUBTRACT + inplace_op ::= INPLACE_MULTIPLY + inplace_op ::= INPLACE_TRUE_DIVIDE + inplace_op ::= INPLACE_FLOOR_DIVIDE + inplace_op ::= INPLACE_MODULO + inplace_op ::= INPLACE_POWER + inplace_op ::= INPLACE_LSHIFT + inplace_op ::= INPLACE_RSHIFT + inplace_op ::= INPLACE_AND + inplace_op ::= INPLACE_XOR + inplace_op ::= INPLACE_OR + """ + + def p_assign(self, args): + """ + stmt ::= assign + assign ::= expr DUP_TOP designList + assign ::= expr store + + stmt ::= assign2 + stmt ::= assign3 + assign2 ::= expr expr ROT_TWO store store + assign3 ::= expr expr expr ROT_THREE ROT_TWO store store store + """ + + def p_forstmt(self, args): + """ + get_for_iter ::= GET_ITER _come_froms FOR_ITER + + for_block ::= l_stmts_opt _come_froms JUMP_BACK + + forelsestmt ::= SETUP_LOOP expr get_for_iter store + for_block POP_BLOCK else_suite _come_froms + + forelselaststmt ::= SETUP_LOOP expr get_for_iter store + for_block POP_BLOCK else_suitec _come_froms + + forelselaststmtl ::= SETUP_LOOP expr get_for_iter store + for_block POP_BLOCK else_suitel _come_froms + """ + + def p_import20(self, args): + """ + stmt ::= import + stmt ::= import_from + stmt ::= import_from_star + stmt ::= importmultiple + + importlist ::= importlist alias + importlist ::= alias + alias ::= IMPORT_NAME store + alias ::= IMPORT_FROM store + alias ::= IMPORT_NAME attributes store + + import ::= LOAD_CONST LOAD_CONST alias + import_from_star ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR + import_from ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist POP_TOP + importmultiple ::= LOAD_CONST LOAD_CONST alias imports_cont + + imports_cont ::= import_cont+ + import_cont ::= LOAD_CONST LOAD_CONST alias + + attributes ::= LOAD_ATTR+ + """ + + def p_list_comprehension(self, args): + """ + expr ::= list_comp + + list_iter ::= list_for + list_iter ::= list_if + list_iter ::= list_if_not + list_iter ::= lc_body + + list_if ::= expr jmp_false list_iter + list_if_not ::= expr jmp_true list_iter + """ + + def p_set_comp(self, args): + """ + comp_iter ::= comp_for + comp_body ::= gen_comp_body + gen_comp_body ::= expr YIELD_VALUE POP_TOP + + comp_if ::= expr jmp_false comp_iter + """ + + def p_store(self, args): + """ + # Note. The below is right-recursive: + designList ::= store store + designList ::= store DUP_TOP designList + + ## Can we replace with left-recursive, and redo with: + ## + ## designList ::= designLists store store + ## designLists ::= designLists store DUP_TOP + ## designLists ::= + ## Will need to redo semantic actiion + + store ::= STORE_FAST + store ::= STORE_NAME + store ::= STORE_GLOBAL + store ::= STORE_DEREF + store ::= expr STORE_ATTR + store ::= store_subscript + store_subscript ::= expr expr STORE_SUBSCR + store ::= unpack + """ + + def p_32on(self, args): + """ + conditional ::= expr jmp_false expr jump_forward_else expr COME_FROM + + # compare_chained2 is used in a "chained_compare": x <= y <= z + # used exclusively in compare_chained + compare_chained2 ::= expr COMPARE_OP RETURN_VALUE + compare_chained2 ::= expr COMPARE_OP RETURN_VALUE_LAMBDA + + # Python < 3.5 no POP BLOCK + whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK COME_FROM_LOOP + + # Python 3.5+ has jump optimization to remove the redundant + # jump_excepts. But in 3.3 we need them added + + except_handler ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts + END_FINALLY + + tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + except_handler else_suite + jump_excepts come_from_except_clauses + + jump_excepts ::= jump_except+ + + subscript2 ::= expr expr DUP_TOP_TWO BINARY_SUBSCR + + # Python 3.2+ has more loop optimization that removes + # JUMP_FORWARD in some cases, and hence we also don't + # see COME_FROM + _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD _come_froms + + kv3 ::= expr expr STORE_MAP + """ + return + + def p_33on(self, args): + """ + # Python 3.3+ adds yield from. + expr ::= yield_from + yield_from ::= expr GET_YIELD_FROM_ITER LOAD_CONST YIELD_FROM + + # We do the grammar hackery below for semantics + # actions that want c_stmts_opt at index 1 + + # Python 3.5+ has jump optimization to remove the redundant + # jump_excepts. But in 3.3 we need them added + + try_except ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + except_handler + jump_excepts come_from_except_clauses + """ + + def p_34on(self, args): + """ + whilestmt ::= setup_loop testexpr returns come_froms POP_BLOCK COME_FROM_LOOP + + # Seems to be needed starting 3.4.4 or so + while1stmt ::= setup_loop l_stmts + COME_FROM JUMP_BACK POP_BLOCK COME_FROM_LOOP + while1stmt ::= setup_loop l_stmts + POP_BLOCK COME_FROM_LOOP + + # FIXME the below masks a bug in not detecting COME_FROM_LOOP + # grammar rules with COME_FROM -> COME_FROM_LOOP already exist + whileelsestmt ::= setup_loop testexpr l_stmts_opt JUMP_BACK POP_BLOCK + else_suitel COME_FROM + + while1elsestmt ::= setup_loop l_stmts JUMP_BACK _come_froms POP_BLOCK else_suitel + COME_FROM_LOOP + + # Python 3.4+ optimizes the trailing two JUMPS away + + _ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD _come_froms + """ + + def p_35on(self, args): + """ + + while1elsestmt ::= setup_loop l_stmts JUMP_BACK + POP_BLOCK else_suite COME_FROM_LOOP + + # The following rule is for Python 3.5+ where we can have stuff like + # while .. + # if + # ... + # the end of the if will jump back to the loop and there will be a COME_FROM + # after the jump + l_stmts ::= lastl_stmt come_froms l_stmts + + # Python 3.5+ Await statement + expr ::= await_expr + await_expr ::= expr GET_AWAITABLE LOAD_CONST YIELD_FROM + + stmt ::= await_stmt + await_stmt ::= await_expr POP_TOP + + # Python 3.5+ async additions + + inplace_op ::= INPLACE_MATRIX_MULTIPLY + binary_op ::= BINARY_MATRIX_MULTIPLY + + # Python 3.5+ does jump optimization + # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. + + return_if_stmt ::= ret_expr RETURN_END_IF POP_BLOCK + return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM + + jb_else ::= JUMP_BACK ELSE + jb_else ::= JUMP_BACK COME_FROM + ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suitec + ifelsestmtl ::= testexpr c_stmts_opt jb_else else_suitel + + # 3.5 Has jump optimization which can route the end of an + # "if/then" back to to a loop just before an else. + jump_absolute_else ::= jb_else + jump_absolute_else ::= CONTINUE ELSE + + # Our hacky "ELSE" determination doesn't do a good job and really + # determine the start of an "else". It could also be the end of an + # "if-then" which ends in a "continue". Perhaps with real control-flow + # analysis we'll sort this out. Or call "ELSE" something more appropriate. + _ifstmts_jump ::= c_stmts_opt ELSE + + # ifstmt ::= testexpr c_stmts_opt + + iflaststmt ::= testexpr c_stmts_opt JUMP_FORWARD + """ + + def p_36misc(self, args): + """ + sstmt ::= sstmt RETURN_LAST + + # 3.6 redoes how return_closure works. FIXME: Isolate to LOAD_CLOSURE + return_closure ::= LOAD_CLOSURE DUP_TOP STORE_NAME RETURN_VALUE RETURN_LAST + + for_block ::= l_stmts_opt come_from_loops JUMP_BACK + come_from_loops ::= COME_FROM_LOOP* + + whilestmt ::= setup_loop testexpr l_stmts_opt + JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP + whilestmt ::= setup_loop testexpr l_stmts_opt + come_froms JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP + + # 3.6 due to jump optimization, we sometimes add RETURN_END_IF where + # RETURN_VALUE is meant. Specifcally this can happen in + # ifelsestmt -> ...else_suite _. suite_stmts... (last) stmt + return ::= ret_expr RETURN_END_IF + return ::= ret_expr RETURN_VALUE COME_FROM + return_stmt_lambda ::= ret_expr RETURN_VALUE_LAMBDA COME_FROM + + # A COME_FROM is dropped off because of JUMP-to-JUMP optimization + and ::= expr jmp_false expr + and ::= expr jmp_false expr jmp_false + + jf_cf ::= JUMP_FORWARD COME_FROM + cf_jf_else ::= come_froms JUMP_FORWARD ELSE + + conditional ::= expr jmp_false expr jf_cf expr COME_FROM + + async_for_stmt ::= setup_loop expr + GET_AITER + LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST + YIELD_FROM + store + POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT DUP_TOP + LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_FALSE + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_BLOCK + JUMP_ABSOLUTE END_FINALLY COME_FROM + for_block POP_BLOCK + COME_FROM_LOOP + + # Adds a COME_FROM_ASYNC_WITH over 3.5 + # FIXME: remove corresponding rule for 3.5? + + except_suite ::= c_stmts_opt COME_FROM POP_EXCEPT jump_except COME_FROM + + jb_cfs ::= come_from_opt JUMP_BACK come_froms + ifelsestmtl ::= testexpr c_stmts_opt jb_cfs else_suitel + ifelsestmtl ::= testexpr c_stmts_opt cf_jf_else else_suitel + + # In 3.6+, A sequence of statements ending in a RETURN can cause + # JUMP_FORWARD END_FINALLY to be omitted from try middle + + except_return ::= POP_TOP POP_TOP POP_TOP returns + except_handler ::= JUMP_FORWARD COME_FROM_EXCEPT except_return + + # Try middle following a returns + except_handler36 ::= COME_FROM_EXCEPT except_stmts END_FINALLY + + stmt ::= try_except36 + try_except36 ::= SETUP_EXCEPT returns except_handler36 + opt_come_from_except + try_except36 ::= SETUP_EXCEPT suite_stmts + try_except36 ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + except_handler36 come_from_opt + + # 3.6 omits END_FINALLY sometimes + except_handler36 ::= COME_FROM_EXCEPT except_stmts + except_handler36 ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts + except_handler ::= jmp_abs COME_FROM_EXCEPT except_stmts + + stmt ::= tryfinally36 + tryfinally36 ::= SETUP_FINALLY returns + COME_FROM_FINALLY suite_stmts + tryfinally36 ::= SETUP_FINALLY returns + COME_FROM_FINALLY suite_stmts_opt END_FINALLY + except_suite_finalize ::= SETUP_FINALLY returns + COME_FROM_FINALLY suite_stmts_opt END_FINALLY _jump + + stmt ::= tryfinally_return_stmt + tryfinally_return_stmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST + COME_FROM_FINALLY + + compare_chained2 ::= expr COMPARE_OP come_froms JUMP_FORWARD + """ + def p_37misc(self, args): """ - # Where does the POP_TOP really belong? stmt ::= import37 stmt ::= async_for_stmt37 + stmt ::= async_for_stmt + stmt ::= async_forelse_stmt + + # Where does the POP_TOP really belong? import37 ::= import POP_TOP - async_for_stmt ::= SETUP_LOOP expr + async_for_stmt ::= setup_loop expr GET_AITER SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM @@ -48,7 +612,7 @@ class Python37Parser(Python36Parser): COME_FROM_LOOP # Order of LOAD_CONST YIELD_FROM is switched from 3.6 to save a LOAD_CONST - async_for_stmt37 ::= SETUP_LOOP expr + async_for_stmt37 ::= setup_loop expr GET_AITER SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM @@ -60,7 +624,7 @@ class Python37Parser(Python36Parser): POP_TOP POP_BLOCK COME_FROM_LOOP - async_forelse_stmt ::= SETUP_LOOP expr + async_forelse_stmt ::= setup_loop expr GET_AITER SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM @@ -85,9 +649,6 @@ class Python37Parser(Python36Parser): # sort this out better. except_suite ::= c_stmts_opt POP_EXCEPT jump_except ELSE - # FIXME: generalize and specialize - call ::= expr CALL_METHOD_0 - testtrue ::= compare_chained37 testfalse ::= compare_chained37_false @@ -101,7 +662,7 @@ class Python37Parser(Python36Parser): compare_chained1a_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE compare_chained1a_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE - compare_chained2a_37 ELSE POP_TOP COME_FROM + compare_chained2a_37 COME_FROM POP_TOP COME_FROM compare_chained1b_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE compare_chained2b_37 POP_TOP JUMP_FORWARD COME_FROM compare_chained1c_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE @@ -109,14 +670,17 @@ class Python37Parser(Python36Parser): compare_chained1_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE compare_chained2c_37 POP_TOP JUMP_FORWARD COME_FROM + compare_chained1_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE + compare_chained2b_37 POP_TOP _jump COME_FROM + compare_chained2_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE - compare_chained2a_false_37 ELSE POP_TOP JUMP_BACK COME_FROM + compare_chained2a_false_37 POP_TOP JUMP_BACK COME_FROM - compare_chained2a_37 ::= expr COMPARE_OP POP_JUMP_IF_TRUE JUMP_FORWARD - compare_chained2a_37 ::= expr COMPARE_OP POP_JUMP_IF_TRUE JUMP_BACK - compare_chained2a_false_37 ::= expr COMPARE_OP POP_JUMP_IF_FALSE jf_cfs + compare_chained2a_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_TRUE JUMP_FORWARD + compare_chained2a_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_TRUE JUMP_BACK + compare_chained2a_false_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE jf_cfs - compare_chained2b_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE JUMP_FORWARD ELSE + compare_chained2b_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE JUMP_FORWARD COME_FROM compare_chained2b_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE JUMP_FORWARD compare_chained2c_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP come_from_opt POP_JUMP_IF_FALSE @@ -133,20 +697,655 @@ class Python37Parser(Python36Parser): _ifstmts_jump ::= c_stmts_opt come_froms and_not ::= expr jmp_false expr POP_JUMP_IF_TRUE + testfalse ::= and_not expr ::= if_exp_37a expr ::= if_exp_37b - if_exp_37a ::= and_not expr JUMP_FORWARD COME_FROM expr COME_FROM + if_exp_37a ::= and_not expr JUMP_FORWARD come_froms expr COME_FROM if_exp_37b ::= expr jmp_false expr POP_JUMP_IF_FALSE jump_forward_else expr """ + def p_comprehension3(self, args): + """ + # Python3 scanner adds LOAD_LISTCOMP. Python3 does list comprehension like + # other comprehensions (set, dictionary). + + # Our "continue" heuristic - in two successive JUMP_BACKS, the first + # one may be a continue - sometimes classifies a JUMP_BACK + # as a CONTINUE. The two are kind of the same in a comprehension. + + comp_for ::= expr get_for_iter store comp_iter CONTINUE + comp_for ::= expr get_for_iter store comp_iter JUMP_BACK + + for_iter ::= _come_froms FOR_ITER + + list_comp ::= BUILD_LIST_0 list_iter + lc_body ::= expr LIST_APPEND + list_for ::= expr for_iter store list_iter jb_or_c + + # This is seen in PyPy, but possibly it appears on other Python 3? + list_if ::= expr jmp_false list_iter COME_FROM + list_if_not ::= expr jmp_true list_iter COME_FROM + + jb_or_c ::= JUMP_BACK + jb_or_c ::= CONTINUE + + stmt ::= set_comp_func + + set_comp_func ::= BUILD_SET_0 LOAD_FAST for_iter store comp_iter + JUMP_BACK RETURN_VALUE RETURN_LAST + + set_comp_func ::= BUILD_SET_0 LOAD_FAST for_iter store comp_iter + COME_FROM JUMP_BACK RETURN_VALUE RETURN_LAST + + comp_body ::= dict_comp_body + comp_body ::= set_comp_body + dict_comp_body ::= expr expr MAP_ADD + set_comp_body ::= expr SET_ADD + + # See also common Python p_list_comprehension + """ + + def p_dict_comp3(self, args): + """" + expr ::= dict_comp + stmt ::= dict_comp_func + dict_comp_func ::= BUILD_MAP_0 LOAD_FAST for_iter store + comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST + + comp_iter ::= comp_if + comp_iter ::= comp_if_not + comp_if_not ::= expr jmp_true comp_iter + comp_iter ::= comp_body + """ + + def p_grammar(self, args): + """ + sstmt ::= stmt + sstmt ::= ifelsestmtr + sstmt ::= return RETURN_LAST + + return_if_stmts ::= return_if_stmt come_from_opt + return_if_stmts ::= _stmts return_if_stmt _come_froms + return_if_stmt ::= ret_expr RETURN_END_IF + returns ::= _stmts return_if_stmt + + stmt ::= break + break ::= BREAK_LOOP + + stmt ::= continue + continue ::= CONTINUE + continues ::= _stmts lastl_stmt continue + continues ::= lastl_stmt continue + continues ::= continue + + + kwarg ::= LOAD_STR expr + kwargs ::= kwarg+ + + classdef ::= build_class store + + # FIXME: we need to add these because don't detect this properly + # in custom rules. Specifically if one of the exprs is CALL_FUNCTION + # then we'll mistake that for the final CALL_FUNCTION. + # We can fix by triggering on the CALL_FUNCTION op + # Python3 introduced LOAD_BUILD_CLASS + # Other definitions are in a custom rule + build_class ::= LOAD_BUILD_CLASS mkfunc expr call CALL_FUNCTION_3 + build_class ::= LOAD_BUILD_CLASS mkfunc expr call expr CALL_FUNCTION_4 + + stmt ::= classdefdeco + classdefdeco ::= classdefdeco1 store + + expr ::= LOAD_ASSERT + assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 COME_FROM + stmt ::= assert2 + assert2 ::= assert_expr jmp_true LOAD_ASSERT expr + CALL_FUNCTION_1 RAISE_VARARGS_1 COME_FROM + + assert_expr ::= expr + assert_expr ::= assert_expr_or + assert_expr ::= assert_expr_and + assert_expr_or ::= assert_expr jmp_true expr + assert_expr_and ::= assert_expr jmp_false expr + + ifstmt ::= testexpr _ifstmts_jump + + testexpr ::= testfalse + testexpr ::= testtrue + testfalse ::= expr jmp_false + testtrue ::= expr jmp_true + + _ifstmts_jump ::= return_if_stmts + _ifstmts_jump ::= c_stmts_opt COME_FROM + + iflaststmt ::= testexpr c_stmts + iflaststmt ::= testexpr c_stmts JUMP_ABSOLUTE + + iflaststmtl ::= testexpr c_stmts JUMP_BACK + iflaststmtl ::= testexpr c_stmts JUMP_BACK COME_FROM_LOOP + iflaststmtl ::= testexpr c_stmts JUMP_BACK POP_BLOCK + + # These are used to keep parse tree indices the same + jump_forward_else ::= JUMP_FORWARD ELSE + jump_forward_else ::= JUMP_FORWARD COME_FROM + jump_absolute_else ::= JUMP_ABSOLUTE ELSE + jump_absolute_else ::= JUMP_ABSOLUTE _come_froms + jump_absolute_else ::= come_froms _jump COME_FROM + + # Note: in if/else kinds of statements, we err on the side + # of missing "else" clauses. Therefore we include grammar + # rules with and without ELSE. + + ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD + else_suite opt_come_from_except + ifelsestmt ::= testexpr c_stmts_opt jump_forward_else + else_suite _come_froms + + # This handles the case where a "JUMP_ABSOLUTE" is part + # of an inner if in c_stmts_opt + ifelsestmt ::= testexpr c_stmts_opt come_froms + else_suite come_froms + + # ifelsestmt ::= testexpr c_stmts_opt jump_forward_else + # pass _come_froms + + ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec + ifelsestmtc ::= testexpr c_stmts_opt jump_absolute_else else_suitec + + ifelsestmtr ::= testexpr return_if_stmts returns + + ifelsestmtl ::= testexpr c_stmts_opt cf_jump_back else_suitel + + cf_jump_back ::= COME_FROM JUMP_BACK + + # FIXME: this feels like a hack. Is it just 1 or two + # COME_FROMs? the parsed tree for this and even with just the + # one COME_FROM for Python 2.7 seems to associate the + # COME_FROM targets from the wrong places + + # this is nested inside a try_except + tryfinallystmt ::= SETUP_FINALLY suite_stmts_opt + POP_BLOCK LOAD_CONST + COME_FROM_FINALLY suite_stmts_opt END_FINALLY + + except_handler ::= jmp_abs COME_FROM except_stmts + _come_froms END_FINALLY + except_handler ::= jmp_abs COME_FROM_EXCEPT except_stmts + _come_froms END_FINALLY + + # FIXME: remove this + except_handler ::= JUMP_FORWARD COME_FROM except_stmts + come_froms END_FINALLY come_from_opt + + except_stmts ::= except_stmts except_stmt + except_stmts ::= except_stmt + + except_stmt ::= except_cond1 except_suite come_from_opt + except_stmt ::= except_cond2 except_suite come_from_opt + except_stmt ::= except_cond2 except_suite_finalize + except_stmt ::= except + + ## FIXME: what's except_pop_except? + except_stmt ::= except_pop_except + + # Python3 introduced POP_EXCEPT + except_suite ::= c_stmts_opt POP_EXCEPT jump_except + jump_except ::= JUMP_ABSOLUTE + jump_except ::= JUMP_BACK + jump_except ::= JUMP_FORWARD + jump_except ::= CONTINUE + + # This is used in Python 3 in + # "except ... as e" to remove 'e' after the c_stmts_opt finishes + except_suite_finalize ::= SETUP_FINALLY c_stmts_opt except_var_finalize + END_FINALLY _jump + + except_var_finalize ::= POP_BLOCK POP_EXCEPT LOAD_CONST COME_FROM_FINALLY + LOAD_CONST store del_stmt + + except_suite ::= returns + + except_cond1 ::= DUP_TOP expr COMPARE_OP + jmp_false POP_TOP POP_TOP POP_TOP + + except_cond2 ::= DUP_TOP expr COMPARE_OP + jmp_false POP_TOP store POP_TOP come_from_opt + + except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt POP_EXCEPT _jump + except ::= POP_TOP POP_TOP POP_TOP returns + + jmp_abs ::= JUMP_ABSOLUTE + jmp_abs ::= JUMP_BACK + + """ + + def p_misc3(self, args): + """ + except_handler ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts + come_froms END_FINALLY + + for_block ::= l_stmts_opt COME_FROM_LOOP JUMP_BACK + for_block ::= l_stmts + for_block ::= l_stmts JUMP_BACK + iflaststmtl ::= testexpr c_stmts + """ + + def p_come_from3(self, args): + """ + opt_come_from_except ::= COME_FROM_EXCEPT + opt_come_from_except ::= _come_froms + opt_come_from_except ::= come_from_except_clauses + + come_from_except_clauses ::= COME_FROM_EXCEPT_CLAUSE+ + """ + + def p_jump3(self, args): + """ + jmp_false ::= POP_JUMP_IF_FALSE + jmp_true ::= POP_JUMP_IF_TRUE + + # FIXME: Common with 2.7 + ret_and ::= expr JUMP_IF_FALSE_OR_POP ret_expr_or_cond COME_FROM + ret_or ::= expr JUMP_IF_TRUE_OR_POP ret_expr_or_cond COME_FROM + ret_cond ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF COME_FROM ret_expr_or_cond + + or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM + or ::= expr JUMP_IF_TRUE expr COME_FROM + and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM + and ::= expr JUMP_IF_FALSE expr COME_FROM + + ## FIXME: Is the below needed or is it covered above?? + and ::= expr jmp_false expr COME_FROM + or ::= expr jmp_true expr COME_FROM + + # compare_chained1 is used exclusively in chained_compare + compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP + compare_chained1 COME_FROM + compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP + compare_chained2 COME_FROM + """ + + def p_stmt3(self, args): + """ + stmt ::= if_expr_lambda + stmt ::= conditional_not_lambda + if_expr_lambda ::= expr jmp_false expr return_if_lambda + return_stmt_lambda LAMBDA_MARKER + conditional_not_lambda + ::= expr jmp_true expr return_if_lambda + return_stmt_lambda LAMBDA_MARKER + + return_stmt_lambda ::= ret_expr RETURN_VALUE_LAMBDA + return_if_lambda ::= RETURN_END_IF_LAMBDA + + stmt ::= return_closure + return_closure ::= LOAD_CLOSURE RETURN_VALUE RETURN_LAST + + stmt ::= whileTruestmt + ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite _come_froms + """ + + def p_loop_stmt3(self, args): + """ + setup_loop ::= SETUP_LOOP _come_froms + for ::= setup_loop expr get_for_iter store for_block POP_BLOCK + for ::= setup_loop expr get_for_iter store for_block POP_BLOCK + COME_FROM_LOOP + + + forelsestmt ::= setup_loop expr get_for_iter store for_block POP_BLOCK else_suite + COME_FROM_LOOP + + forelselaststmt ::= setup_loop expr get_for_iter store for_block POP_BLOCK else_suitec + COME_FROM_LOOP + + forelselaststmtl ::= setup_loop expr get_for_iter store for_block POP_BLOCK else_suitel + COME_FROM_LOOP + + whilestmt ::= setup_loop testexpr l_stmts_opt COME_FROM JUMP_BACK POP_BLOCK + COME_FROM_LOOP + + whilestmt ::= setup_loop testexpr l_stmts_opt JUMP_BACK POP_BLOCK + COME_FROM_LOOP + + whilestmt ::= setup_loop testexpr returns POP_BLOCK + COME_FROM_LOOP + + while1elsestmt ::= setup_loop l_stmts JUMP_BACK + else_suitel + + whileelsestmt ::= setup_loop testexpr l_stmts_opt JUMP_BACK POP_BLOCK + else_suitel COME_FROM_LOOP + + whileTruestmt ::= setup_loop l_stmts_opt JUMP_BACK POP_BLOCK + COME_FROM_LOOP + + # FIXME: Python 3.? starts adding branch optimization? Put this starting there. + + while1stmt ::= setup_loop l_stmts COME_FROM_LOOP + while1stmt ::= setup_loop l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP + + while1elsestmt ::= setup_loop l_stmts JUMP_BACK + else_suite COME_FROM_LOOP + + # FIXME: investigate - can code really produce a NOP? + for ::= setup_loop expr get_for_iter store for_block POP_BLOCK NOP + COME_FROM_LOOP + """ + + def p_generator_exp3(self, args): + """ + load_genexpr ::= LOAD_GENEXPR + load_genexpr ::= BUILD_TUPLE_1 LOAD_GENEXPR LOAD_STR + """ + + def p_expr3(self, args): + """ + expr ::= conditionalnot + conditionalnot ::= expr jmp_true expr jump_forward_else expr COME_FROM + + # a JUMP_FORWARD to another JUMP_FORWARD can get turned into + # a JUMP_ABSOLUTE with no COME_FROM + conditional ::= expr jmp_false expr jump_absolute_else expr + + # if_expr_true are for conditions which always evaluate true + # There is dead or non-optional remnants of the condition code though, + # and we use that to match on to reconstruct the source more accurately + expr ::= if_expr_true + if_expr_true ::= expr JUMP_FORWARD expr COME_FROM + """ + def customize_grammar_rules(self, tokens, customize): super(Python37Parser, self).customize_grammar_rules(tokens, customize) + self.check_reduce["call_kw"] = "AST" + + for i, token in enumerate(tokens): + opname = token.kind + + if opname == "LOAD_ASSERT": + if "PyPy" in customize: + rules_str = """ + stmt ::= JUMP_IF_NOT_DEBUG stmts COME_FROM + """ + self.add_unique_doc_rules(rules_str, customize) + elif opname == "FORMAT_VALUE": + rules_str = """ + expr ::= formatted_value1 + formatted_value1 ::= expr FORMAT_VALUE + """ + self.add_unique_doc_rules(rules_str, customize) + elif opname == "FORMAT_VALUE_ATTR": + rules_str = """ + expr ::= formatted_value2 + formatted_value2 ::= expr expr FORMAT_VALUE_ATTR + """ + self.add_unique_doc_rules(rules_str, customize) + elif opname == "MAKE_FUNCTION_8": + if "LOAD_DICTCOMP" in self.seen_ops: + # Is there something general going on here? + rule = """ + dict_comp ::= load_closure LOAD_DICTCOMP LOAD_STR + MAKE_FUNCTION_8 expr + GET_ITER CALL_FUNCTION_1 + """ + self.addRule(rule, nop_func) + elif "LOAD_SETCOMP" in self.seen_ops: + rule = """ + set_comp ::= load_closure LOAD_SETCOMP LOAD_STR + MAKE_FUNCTION_8 expr + GET_ITER CALL_FUNCTION_1 + """ + self.addRule(rule, nop_func) + + elif opname == "BEFORE_ASYNC_WITH": + rules_str = """ + stmt ::= async_with_stmt + async_with_as_stmt ::= expr + BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM + SETUP_ASYNC_WITH store + suite_stmts_opt + POP_BLOCK LOAD_CONST + COME_FROM_ASYNC_WITH + WITH_CLEANUP_START + GET_AWAITABLE LOAD_CONST YIELD_FROM + WITH_CLEANUP_FINISH END_FINALLY + stmt ::= async_with_as_stmt + async_with_stmt ::= expr + BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM + SETUP_ASYNC_WITH POP_TOP suite_stmts_opt + POP_BLOCK LOAD_CONST + COME_FROM_ASYNC_WITH + WITH_CLEANUP_START + GET_AWAITABLE LOAD_CONST YIELD_FROM + WITH_CLEANUP_FINISH END_FINALLY + """ + self.addRule(rules_str, nop_func) + + elif opname.startswith("BUILD_STRING"): + v = token.attr + rules_str = """ + expr ::= joined_str + joined_str ::= %sBUILD_STRING_%d + """ % ( + "expr " * v, + v, + ) + self.add_unique_doc_rules(rules_str, customize) + if "FORMAT_VALUE_ATTR" in self.seen_ops: + rules_str = """ + formatted_value_attr ::= expr expr FORMAT_VALUE_ATTR expr BUILD_STRING + expr ::= formatted_value_attr + """ + self.add_unique_doc_rules(rules_str, customize) + elif opname.startswith("BUILD_MAP_UNPACK_WITH_CALL"): + v = token.attr + rule = "build_map_unpack_with_call ::= %s%s" % ("expr " * v, opname) + self.addRule(rule, nop_func) + elif opname.startswith("BUILD_TUPLE_UNPACK_WITH_CALL"): + v = token.attr + rule = ( + "build_tuple_unpack_with_call ::= " + + "expr1024 " * int(v // 1024) + + "expr32 " * int((v // 32) % 32) + + "expr " * (v % 32) + + opname + ) + self.addRule(rule, nop_func) + rule = "starred ::= %s %s" % ("expr " * v, opname) + self.addRule(rule, nop_func) + elif opname == "SETUP_WITH": + rules_str = """ + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + # Removes POP_BLOCK LOAD_CONST from 3.6- + withasstmt ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + """ + if self.version < 3.8: + rules_str += """ + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK + LOAD_CONST + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + """ + else: + rules_str += """ + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK + BEGIN_FINALLY COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH + END_FINALLY + """ + self.addRule(rules_str, nop_func) + pass + pass + + def custom_classfunc_rule(self, opname, token, customize, next_token): + + args_pos, args_kw = self.get_pos_kw(token) + + # Additional exprs for * and ** args: + # 0 if neither + # 1 for CALL_FUNCTION_VAR or CALL_FUNCTION_KW + # 2 for * and ** args (CALL_FUNCTION_VAR_KW). + # Yes, this computation based on instruction name is a little bit hoaky. + nak = (len(opname) - len("CALL_FUNCTION")) // 3 + uniq_param = args_kw + args_pos + + if frozenset(("GET_AWAITABLE", "YIELD_FROM")).issubset(self.seen_ops): + rule = ( + "async_call ::= expr " + + ("pos_arg " * args_pos) + + ("kwarg " * args_kw) + + "expr " * nak + + token.kind + + " GET_AWAITABLE LOAD_CONST YIELD_FROM" + ) + self.add_unique_rule(rule, token.kind, uniq_param, customize) + self.add_unique_rule( + "expr ::= async_call", token.kind, uniq_param, customize + ) + + if opname.startswith("CALL_FUNCTION_KW"): + self.addRule("expr ::= call_kw36", nop_func) + values = "expr " * token.attr + rule = "call_kw36 ::= expr {values} LOAD_CONST {opname}".format(**locals()) + self.add_unique_rule(rule, token.kind, token.attr, customize) + elif opname == "CALL_FUNCTION_EX_KW": + # Note: this doesn't exist in 3.7 and later + self.addRule( + """expr ::= call_ex_kw4 + call_ex_kw4 ::= expr + expr + expr + CALL_FUNCTION_EX_KW + """, + nop_func, + ) + if "BUILD_MAP_UNPACK_WITH_CALL" in self.seen_op_basenames: + self.addRule( + """expr ::= call_ex_kw + call_ex_kw ::= expr expr build_map_unpack_with_call + CALL_FUNCTION_EX_KW + """, + nop_func, + ) + if "BUILD_TUPLE_UNPACK_WITH_CALL" in self.seen_op_basenames: + # FIXME: should this be parameterized by EX value? + self.addRule( + """expr ::= call_ex_kw3 + call_ex_kw3 ::= expr + build_tuple_unpack_with_call + expr + CALL_FUNCTION_EX_KW + """, + nop_func, + ) + if "BUILD_MAP_UNPACK_WITH_CALL" in self.seen_op_basenames: + # FIXME: should this be parameterized by EX value? + self.addRule( + """expr ::= call_ex_kw2 + call_ex_kw2 ::= expr + build_tuple_unpack_with_call + build_map_unpack_with_call + CALL_FUNCTION_EX_KW + """, + nop_func, + ) + + elif opname == "CALL_FUNCTION_EX": + self.addRule( + """ + expr ::= call_ex + starred ::= expr + call_ex ::= expr starred CALL_FUNCTION_EX + """, + nop_func, + ) + if "BUILD_MAP_UNPACK_WITH_CALL" in self.seen_ops: + self.addRule( + """ + expr ::= call_ex_kw + call_ex_kw ::= expr expr + build_map_unpack_with_call CALL_FUNCTION_EX + """, + nop_func, + ) + if "BUILD_TUPLE_UNPACK_WITH_CALL" in self.seen_ops: + self.addRule( + """ + expr ::= call_ex_kw3 + call_ex_kw3 ::= expr + build_tuple_unpack_with_call + %s + CALL_FUNCTION_EX + """ + % "expr " + * token.attr, + nop_func, + ) + pass + + # FIXME: Is this right? + self.addRule( + """ + expr ::= call_ex_kw4 + call_ex_kw4 ::= expr + expr + expr + CALL_FUNCTION_EX + """, + nop_func, + ) + pass + else: + super(Python37Parser, self).custom_classfunc_rule( + opname, token, customize, next_token + ) + + def reduce_is_invalid(self, rule, ast, tokens, first, last): + invalid = super(Python37Parser, self).reduce_is_invalid( + rule, ast, tokens, first, last + ) + if invalid: + return invalid + if rule[0] == "call_kw": + # Make sure we don't derive call_kw + nt = ast[0] + while not isinstance(nt, Token): + if nt[0] == "call_kw": + return True + nt = nt[0] + pass + pass + return False + +def info(args): + # Check grammar + p = Python37Parser() + if len(args) > 0: + arg = args[0] + if arg == "3.7": + from decompyle3.parser.parse37 import Python37Parser + + p = Python37Parser() + elif arg == "3.8": + from decompyle3.parser.parse38 import Python38Parser + + p = Python38Parser() + else: + raise RuntimeError("Only 3.7 and 3.8 supported") + p.check_grammar() + if len(sys.argv) > 1 and sys.argv[1] == "dump": + print("-" * 50) + p.dump_grammar() + class Python37ParserSingle(Python37Parser, PythonParserSingle): pass -if __name__ == '__main__': + +if __name__ == "__main__": # Check grammar # FIXME: DRY this with other parseXX.py routines p = Python37Parser() @@ -174,7 +1373,9 @@ if __name__ == '__main__': remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) import sys + if len(sys.argv) > 1: from spark_parser.spark import rule2str + for rule in sorted(p.rule2name.items()): print(rule2str(rule[0])) diff --git a/uncompyle6/parsers/parse37base.py b/uncompyle6/parsers/parse37base.py new file mode 100644 index 00000000..d604430c --- /dev/null +++ b/uncompyle6/parsers/parse37base.py @@ -0,0 +1,1304 @@ +# Copyright (c) 2016-2017, 2019 Rocky Bernstein +""" +Python 3.7 base code. We keep non-custom-generated grammar rules out of this file. +""" +from uncompyle6.scanners.tok import Token +from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func +from uncompyle6.parsers.treenode import SyntaxTree +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + + +class Python37BaseParser(PythonParser): + def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): + self.added_rules = set() + super(Python37BaseParser, self).__init__( + SyntaxTree, "stmts", debug=debug_parser + ) + self.new_rules = set() + + @staticmethod + def call_fn_name(token): + """Customize CALL_FUNCTION to add the number of positional arguments""" + if token.attr is not None: + return "%s_%i" % (token.kind, token.attr) + else: + return "%s_0" % (token.kind) + + def add_make_function_rule(self, rule, opname, attr, customize): + """Python 3.3 added a an addtional LOAD_STR before MAKE_FUNCTION and + this has an effect on many rules. + """ + new_rule = rule % "LOAD_STR " + self.add_unique_rule(new_rule, opname, attr, customize) + + def custom_build_class_rule(self, opname, i, token, tokens, customize): + """ + # Should the first rule be somehow folded into the 2nd one? + build_class ::= LOAD_BUILD_CLASS mkfunc + LOAD_CLASSNAME {expr}^n-1 CALL_FUNCTION_n + LOAD_CONST CALL_FUNCTION_n + build_class ::= LOAD_BUILD_CLASS mkfunc + expr + call + CALL_FUNCTION_3 + """ + # FIXME: I bet this can be simplified + # look for next MAKE_FUNCTION + for i in range(i + 1, len(tokens)): + if tokens[i].kind.startswith("MAKE_FUNCTION"): + break + elif tokens[i].kind.startswith("MAKE_CLOSURE"): + break + pass + assert i < len( + tokens + ), "build_class needs to find MAKE_FUNCTION or MAKE_CLOSURE" + assert ( + tokens[i + 1].kind == "LOAD_STR" + ), "build_class expecting CONST after MAKE_FUNCTION/MAKE_CLOSURE" + call_fn_tok = None + for i in range(i, len(tokens)): + if tokens[i].kind.startswith("CALL_FUNCTION"): + call_fn_tok = tokens[i] + break + if not call_fn_tok: + raise RuntimeError( + "build_class custom rule for %s needs to find CALL_FUNCTION" % opname + ) + + # customize build_class rule + # FIXME: What's the deal with the two rules? Different Python versions? + # Different situations? Note that the above rule is based on the CALL_FUNCTION + # token found, while this one doesn't. + # 3.6+ handling + call_function = call_fn_tok.kind + if call_function.startswith("CALL_FUNCTION_KW"): + self.addRule("classdef ::= build_class_kw store", nop_func) + rule = "build_class_kw ::= LOAD_BUILD_CLASS mkfunc %sLOAD_CONST %s" % ( + "expr " * (call_fn_tok.attr - 1), + call_function, + ) + else: + call_function = self.call_fn_name(call_fn_tok) + rule = "build_class ::= LOAD_BUILD_CLASS mkfunc %s%s" % ( + "expr " * (call_fn_tok.attr - 1), + call_function, + ) + self.addRule(rule, nop_func) + return + + # FIXME FIXME FIXME: The below is an utter mess. Come up with a better + # organization for this. For example, arrange organize by opcode base? + + def customize_grammar_rules(self, tokens, customize): + + is_pypy = False + + # For a rough break out on the first word. This may + # include instructions that don't need customization, + # but we'll do a finer check after the rough breakout. + customize_instruction_basenames = frozenset( + ( + "BEFORE", + "BUILD", + "CALL", + "CONTINUE", + "DELETE", + "FORMAT", + "GET", + "JUMP", + "LOAD", + "LOOKUP", + "MAKE", + "RETURN", + "RAISE", + "SETUP", + "UNPACK", + ) + ) + + # Opcode names in the custom_ops_processed set have rules that get added + # unconditionally and the rules are constant. So they need to be done + # only once and if we see the opcode a second we don't have to consider + # adding more rules. + # + # Note: BUILD_TUPLE_UNPACK_WITH_CALL gets considered by + # default because it starts with BUILD. So we'll set to ignore it from + # the start. + custom_ops_processed = set(("BUILD_TUPLE_UNPACK_WITH_CALL",)) + + # A set of instruction operation names that exist in the token stream. + # We use this customize the grammar that we create. + # 2.6-compatible set comprehensions + self.seen_ops = frozenset([t.kind for t in tokens]) + self.seen_op_basenames = frozenset( + [opname[: opname.rfind("_")] for opname in self.seen_ops] + ) + + # Loop over instructions adding custom grammar rules based on + # a specific instruction seen. + + if "PyPy" in customize: + is_pypy = True + self.addRule( + """ + stmt ::= assign3_pypy + stmt ::= assign2_pypy + assign3_pypy ::= expr expr expr store store store + assign2_pypy ::= expr expr store store + stmt ::= if_expr_lambda + stmt ::= conditional_not_lambda + if_expr_lambda ::= expr jmp_false expr return_if_lambda + return_lambda LAMBDA_MARKER + conditional_not_lambda + ::= expr jmp_true expr return_if_lambda + return_lambda LAMBDA_MARKER + """, + nop_func, + ) + + n = len(tokens) + + # Determine if we have an iteration CALL_FUNCTION_1. + has_get_iter_call_function1 = False + for i, token in enumerate(tokens): + if ( + token == "GET_ITER" + and i < n - 2 + and self.call_fn_name(tokens[i + 1]) == "CALL_FUNCTION_1" + ): + has_get_iter_call_function1 = True + + for i, token in enumerate(tokens): + opname = token.kind + + # Do a quick breakout before testing potentially + # each of the dozen or so instruction in if elif. + if ( + opname[: opname.find("_")] not in customize_instruction_basenames + or opname in custom_ops_processed + ): + continue + + opname_base = opname[: opname.rfind("_")] + + # The order of opname listed is roughly sorted below + + if opname == "LOAD_ASSERT" and "PyPy" in customize: + rules_str = """ + stmt ::= JUMP_IF_NOT_DEBUG stmts COME_FROM + """ + self.add_unique_doc_rules(rules_str, customize) + + elif opname == "BEFORE_ASYNC_WITH": + rules_str = """ + stmt ::= async_with_stmt + stmt ::= async_with_as_stmt + """ + + if self.version < 3.8: + rules_str += """ + async_with_stmt ::= expr + BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM + SETUP_ASYNC_WITH POP_TOP suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_ASYNC_WITH + WITH_CLEANUP_START + GET_AWAITABLE LOAD_CONST YIELD_FROM + WITH_CLEANUP_FINISH END_FINALLY + async_with_as_stmt ::= expr + BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM + SETUP_ASYNC_WITH store suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_ASYNC_WITH + WITH_CLEANUP_START + GET_AWAITABLE LOAD_CONST YIELD_FROM + WITH_CLEANUP_FINISH END_FINALLY + """ + else: + rules_str += """ + async_with_stmt ::= expr + BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM + SETUP_ASYNC_WITH POP_TOP suite_stmts + POP_TOP POP_BLOCK BEGIN_FINALLY COME_FROM_ASYNC_WITH + WITH_CLEANUP_START + GET_AWAITABLE LOAD_CONST YIELD_FROM + WITH_CLEANUP_FINISH END_FINALLY + async_with_as_stmt ::= expr + BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM + SETUP_ASYNC_WITH store suite_stmts + POP_TOP POP_BLOCK BEGIN_FINALLY COME_FROM_ASYNC_WITH + WITH_CLEANUP_START + GET_AWAITABLE LOAD_CONST YIELD_FROM + WITH_CLEANUP_FINISH END_FINALLY + """ + self.addRule(rules_str, nop_func) + + elif opname_base == "BUILD_CONST_KEY_MAP": + kvlist_n = "expr " * (token.attr) + rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname) + self.addRule(rule, nop_func) + + elif opname.startswith("BUILD_LIST_UNPACK"): + v = token.attr + rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname) + self.addRule(rule, nop_func) + rule = "expr ::= build_list_unpack" + self.addRule(rule, nop_func) + + elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"): + + if opname == "BUILD_MAP_UNPACK": + self.addRule( + """ + expr ::= unmap_dict + unmap_dict ::= dict_comp BUILD_MAP_UNPACK + """, + nop_func, + ) + pass + elif opname.startswith("BUILD_MAP_UNPACK_WITH_CALL"): + v = token.attr + rule = "build_map_unpack_with_call ::= %s%s" % ("expr " * v, opname) + self.addRule(rule, nop_func) + + kvlist_n = "kvlist_%s" % token.attr + if opname == "BUILD_MAP_n": + # PyPy sometimes has no count. Sigh. + rule = ( + "dict_comp_func ::= BUILD_MAP_n LOAD_FAST for_iter store " + "comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST" + ) + self.add_unique_rule(rule, "dict_comp_func", 1, customize) + + kvlist_n = "kvlist_n" + rule = "kvlist_n ::= kvlist_n kv3" + self.add_unique_rule(rule, "kvlist_n", 0, customize) + rule = "kvlist_n ::=" + self.add_unique_rule(rule, "kvlist_n", 1, customize) + rule = "dict ::= BUILD_MAP_n kvlist_n" + + if not opname.startswith("BUILD_MAP_WITH_CALL"): + # FIXME: Use the attr + # so this doesn't run into exponential parsing time. + if opname.startswith("BUILD_MAP_UNPACK"): + # FIXME: start here. The LHS should be unmap_dict, not dict. + # FIXME: really we need a combination of dict_entry-like things. + # It just so happens the most common case is not to mix + # dictionary comphensions with dictionary, elements + if "LOAD_DICTCOMP" in self.seen_ops: + rule = "dict ::= %s%s" % ("dict_comp " * token.attr, opname) + self.addRule(rule, nop_func) + rule = """ + expr ::= unmap_dict + unmap_dict ::= %s%s + """ % ( + "expr " * token.attr, + opname, + ) + else: + rule = "%s ::= %s %s" % ( + kvlist_n, + "expr " * (token.attr * 2), + opname, + ) + self.add_unique_rule(rule, opname, token.attr, customize) + rule = "dict ::= %s" % kvlist_n + self.add_unique_rule(rule, opname, token.attr, customize) + + elif opname.startswith("BUILD_MAP_UNPACK_WITH_CALL"): + v = token.attr + rule = "build_map_unpack_with_call ::= %s%s" % ("expr " * v, opname) + self.addRule(rule, nop_func) + + elif opname.startswith("BUILD_TUPLE_UNPACK_WITH_CALL"): + v = token.attr + rule = ( + "build_tuple_unpack_with_call ::= " + + "expr1024 " * int(v // 1024) + + "expr32 " * int((v // 32) % 32) + + "expr " * (v % 32) + + opname + ) + self.addRule(rule, nop_func) + rule = "starred ::= %s %s" % ("expr " * v, opname) + self.addRule(rule, nop_func) + + elif opname_base in ( + "BUILD_LIST", + "BUILD_SET", + "BUILD_TUPLE", + "BUILD_TUPLE_UNPACK", + ): + v = token.attr + + is_LOAD_CLOSURE = False + if opname_base == "BUILD_TUPLE": + # If is part of a "load_closure", then it is not part of a + # "list". + is_LOAD_CLOSURE = True + for j in range(v): + if tokens[i - j - 1].kind != "LOAD_CLOSURE": + is_LOAD_CLOSURE = False + break + if is_LOAD_CLOSURE: + rule = "load_closure ::= %s%s" % (("LOAD_CLOSURE " * v), opname) + self.add_unique_rule(rule, opname, token.attr, customize) + if not is_LOAD_CLOSURE or v == 0: + # We do this complicated test to speed up parsing of + # pathelogically long literals, especially those over 1024. + build_count = token.attr + thousands = build_count // 1024 + thirty32s = (build_count // 32) % 32 + if thirty32s > 0: + rule = "expr32 ::=%s" % (" expr" * 32) + self.add_unique_rule(rule, opname_base, build_count, customize) + pass + if thousands > 0: + self.add_unique_rule( + "expr1024 ::=%s" % (" expr32" * 32), + opname_base, + build_count, + customize, + ) + pass + collection = opname_base[opname_base.find("_") + 1 :].lower() + rule = ( + ("%s ::= " % collection) + + "expr1024 " * thousands + + "expr32 " * thirty32s + + "expr " * (build_count % 32) + + opname + ) + self.add_unique_rules(["expr ::= %s" % collection, rule], customize) + continue + continue + elif opname_base == "BUILD_SLICE": + if token.attr == 2: + self.add_unique_rules( + [ + "expr ::= build_slice2", + "build_slice2 ::= expr expr BUILD_SLICE_2", + ], + customize, + ) + else: + assert token.attr == 3, ( + "BUILD_SLICE value must be 2 or 3; is %s" % v + ) + self.add_unique_rules( + [ + "expr ::= build_slice3", + "build_slice3 ::= expr expr expr BUILD_SLICE_3", + ], + customize, + ) + + elif opname.startswith("BUILD_STRING"): + v = token.attr + rules_str = """ + expr ::= joined_str + joined_str ::= %sBUILD_STRING_%d + """ % ( + "expr " * v, + v, + ) + self.add_unique_doc_rules(rules_str, customize) + if "FORMAT_VALUE_ATTR" in self.seen_ops: + rules_str = """ + formatted_value_attr ::= expr expr FORMAT_VALUE_ATTR expr BUILD_STRING + expr ::= formatted_value_attr + """ + self.add_unique_doc_rules(rules_str, customize) + + elif opname in frozenset( + ( + "CALL_FUNCTION", + "CALL_FUNCTION_EX", + "CALL_FUNCTION_EX_KW", + "CALL_FUNCTION_VAR", + "CALL_FUNCTION_VAR_KW", + ) + ) or opname.startswith("CALL_FUNCTION_KW"): + + if opname == "CALL_FUNCTION" and token.attr == 1: + rule = """ + dict_comp ::= LOAD_DICTCOMP LOAD_STR MAKE_FUNCTION_0 expr + GET_ITER CALL_FUNCTION_1 + classdefdeco1 ::= expr classdefdeco2 CALL_FUNCTION_1 + """ + self.addRule(rule, nop_func) + + self.custom_classfunc_rule(opname, token, customize, tokens[i + 1]) + # Note: don't add to custom_ops_processed. + + elif opname_base == "CALL_METHOD": + # PyPy and Python 3.7+ only - DRY with parse2 + + args_pos, args_kw = self.get_pos_kw(token) + + # number of apply equiv arguments: + nak = (len(opname_base) - len("CALL_METHOD")) // 3 + rule = ( + "call ::= expr " + + ("pos_arg " * args_pos) + + ("kwarg " * args_kw) + + "expr " * nak + + opname + ) + self.add_unique_rule(rule, opname, token.attr, customize) + + elif opname == "CONTINUE": + self.addRule("continue ::= CONTINUE", nop_func) + custom_ops_processed.add(opname) + elif opname == "CONTINUE_LOOP": + self.addRule("continue ::= CONTINUE_LOOP", nop_func) + custom_ops_processed.add(opname) + elif opname == "DELETE_ATTR": + self.addRule("del_stmt ::= expr DELETE_ATTR", nop_func) + custom_ops_processed.add(opname) + elif opname == "DELETE_DEREF": + self.addRule( + """ + stmt ::= del_deref_stmt + del_deref_stmt ::= DELETE_DEREF + """, + nop_func, + ) + custom_ops_processed.add(opname) + elif opname == "DELETE_SUBSCR": + self.addRule( + """ + del_stmt ::= delete_subscript + delete_subscript ::= expr expr DELETE_SUBSCR + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "FORMAT_VALUE": + rules_str = """ + expr ::= formatted_value1 + formatted_value1 ::= expr FORMAT_VALUE + """ + self.add_unique_doc_rules(rules_str, customize) + + elif opname == "FORMAT_VALUE_ATTR": + rules_str = """ + expr ::= formatted_value2 + formatted_value2 ::= expr expr FORMAT_VALUE_ATTR + """ + self.add_unique_doc_rules(rules_str, customize) + + elif opname == "GET_ITER": + self.addRule( + """ + expr ::= get_iter + attribute ::= expr GET_ITER + """, + nop_func, + ) + custom_ops_processed.add(opname) + elif opname == "JUMP_IF_NOT_DEBUG": + v = token.attr + self.addRule( + """ + stmt ::= assert_pypy + stmt ::= assert2_pypy", nop_func) + assert_pypy ::= JUMP_IF_NOT_DEBUG assert_expr jmp_true + LOAD_ASSERT RAISE_VARARGS_1 COME_FROM + assert2_pypy ::= JUMP_IF_NOT_DEBUG assert_expr jmp_true + LOAD_ASSERT expr CALL_FUNCTION_1 + RAISE_VARARGS_1 COME_FROM + assert2_pypy ::= JUMP_IF_NOT_DEBUG assert_expr jmp_true + LOAD_ASSERT expr CALL_FUNCTION_1 + RAISE_VARARGS_1 COME_FROM, + """, + nop_func, + ) + custom_ops_processed.add(opname) + elif opname == "LOAD_BUILD_CLASS": + self.custom_build_class_rule(opname, i, token, tokens, customize) + # Note: don't add to custom_ops_processed. + elif opname == "LOAD_CLASSDEREF": + # Python 3.4+ + self.addRule("expr ::= LOAD_CLASSDEREF", nop_func) + custom_ops_processed.add(opname) + elif opname == "LOAD_CLASSNAME": + self.addRule("expr ::= LOAD_CLASSNAME", nop_func) + custom_ops_processed.add(opname) + elif opname == "LOAD_DICTCOMP": + if has_get_iter_call_function1: + rule_pat = ( + "dict_comp ::= LOAD_DICTCOMP %sMAKE_FUNCTION_0 expr " + "GET_ITER CALL_FUNCTION_1" + ) + self.add_make_function_rule(rule_pat, opname, token.attr, customize) + pass + custom_ops_processed.add(opname) + elif opname == "LOAD_ATTR": + self.addRule( + """ + expr ::= attribute + attribute ::= expr LOAD_ATTR + """, + nop_func, + ) + custom_ops_processed.add(opname) + elif opname == "LOAD_LISTCOMP": + self.add_unique_rule("expr ::= listcomp", opname, token.attr, customize) + custom_ops_processed.add(opname) + elif opname == "LOAD_SETCOMP": + # Should this be generalized and put under MAKE_FUNCTION? + if has_get_iter_call_function1: + self.addRule("expr ::= set_comp", nop_func) + rule_pat = ( + "set_comp ::= LOAD_SETCOMP %sMAKE_FUNCTION_0 expr " + "GET_ITER CALL_FUNCTION_1" + ) + self.add_make_function_rule(rule_pat, opname, token.attr, customize) + pass + custom_ops_processed.add(opname) + elif opname == "LOOKUP_METHOD": + # A PyPy speciality - DRY with parse3 + self.addRule( + """ + expr ::= attribute + attribute ::= expr LOOKUP_METHOD + """, + nop_func, + ) + custom_ops_processed.add(opname) + elif opname.startswith("MAKE_CLOSURE"): + # DRY with MAKE_FUNCTION + # Note: this probably doesn't handle kwargs proprerly + + if opname == "MAKE_CLOSURE_0" and "LOAD_DICTCOMP" in self.seen_ops: + # Is there something general going on here? + # Note that 3.6+ doesn't do this, but we'll remove + # this rule in parse36.py + rule = """ + dict_comp ::= load_closure LOAD_DICTCOMP LOAD_STR + MAKE_CLOSURE_0 expr + GET_ITER CALL_FUNCTION_1 + """ + self.addRule(rule, nop_func) + + args_pos, args_kw, annotate_args = token.attr + + # FIXME: Fold test into add_make_function_rule + j = 2 + if is_pypy or (i >= j and tokens[i - j] == "LOAD_LAMBDA"): + rule_pat = "mklambda ::= %sload_closure LOAD_LAMBDA %%s%s" % ( + "pos_arg " * args_pos, + opname, + ) + self.add_make_function_rule(rule_pat, opname, token.attr, customize) + + if has_get_iter_call_function1: + rule_pat = ( + "generator_exp ::= %sload_closure load_genexpr %%s%s expr " + "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + ) + self.add_make_function_rule(rule_pat, opname, token.attr, customize) + + if has_get_iter_call_function1: + if is_pypy or (i >= j and tokens[i - j] == "LOAD_LISTCOMP"): + # In the tokens we saw: + # LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION (>= 3.3) or + # LOAD_LISTCOMP MAKE_FUNCTION (< 3.3) or + # and have GET_ITER CALL_FUNCTION_1 + # Todo: For Pypy we need to modify this slightly + rule_pat = ( + "listcomp ::= %sload_closure LOAD_LISTCOMP %%s%s expr " + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * args_pos, opname) + ) + self.add_make_function_rule( + rule_pat, opname, token.attr, customize + ) + if is_pypy or (i >= j and tokens[i - j] == "LOAD_SETCOMP"): + rule_pat = ( + "set_comp ::= %sload_closure LOAD_SETCOMP %%s%s expr " + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * args_pos, opname) + ) + self.add_make_function_rule( + rule_pat, opname, token.attr, customize + ) + if is_pypy or (i >= j and tokens[i - j] == "LOAD_DICTCOMP"): + self.add_unique_rule( + "dict_comp ::= %sload_closure LOAD_DICTCOMP %s " + "expr GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * args_pos, opname), + opname, + token.attr, + customize, + ) + + if args_kw > 0: + kwargs_str = "kwargs " + else: + kwargs_str = "" + + rule = "mkfunc ::= %s%s%s load_closure LOAD_CODE LOAD_STR %s" % ( + "expr " * args_pos, + kwargs_str, + "expr " * annotate_args, + opname, + ) + + self.add_unique_rule(rule, opname, token.attr, customize) + + if args_kw == 0: + rule = "mkfunc ::= %sload_closure load_genexpr %s" % ( + "pos_arg " * args_pos, + opname, + ) + self.add_unique_rule(rule, opname, token.attr, customize) + + pass + elif opname_base.startswith("MAKE_FUNCTION"): + args_pos, args_kw, annotate_args, closure = token.attr + stack_count = args_pos + args_kw + annotate_args + if closure: + if args_pos: + rule = "mklambda ::= %s%s%s%s" % ( + "expr " * stack_count, + "load_closure " * closure, + "BUILD_TUPLE_1 LOAD_LAMBDA LOAD_STR ", + opname, + ) + else: + rule = "mklambda ::= %s%s%s" % ( + "load_closure " * closure, + "LOAD_LAMBDA LOAD_STR ", + opname, + ) + self.add_unique_rule(rule, opname, token.attr, customize) + + else: + rule = "mklambda ::= %sLOAD_LAMBDA LOAD_STR %s" % ( + ("expr " * stack_count), + opname, + ) + self.add_unique_rule(rule, opname, token.attr, customize) + + rule = "mkfunc ::= %s%s%s%s" % ( + "expr " * stack_count, + "load_closure " * closure, + "LOAD_CODE LOAD_STR ", + opname, + ) + self.add_unique_rule(rule, opname, token.attr, customize) + + if has_get_iter_call_function1: + rule_pat = ( + "generator_exp ::= %sload_genexpr %%s%s expr " + "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + ) + self.add_make_function_rule(rule_pat, opname, token.attr, customize) + rule_pat = ( + "generator_exp ::= %sload_closure load_genexpr %%s%s expr " + "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + ) + self.add_make_function_rule(rule_pat, opname, token.attr, customize) + if is_pypy or (i >= 2 and tokens[i - 2] == "LOAD_LISTCOMP"): + # 3.6+ sometimes bundles all of the + # 'exprs' in the rule above into a + # tuple. + rule_pat = ( + "listcomp ::= load_closure LOAD_LISTCOMP %%s%s " + "expr GET_ITER CALL_FUNCTION_1" % (opname,) + ) + self.add_make_function_rule( + rule_pat, opname, token.attr, customize + ) + rule_pat = ( + "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " + "GET_ITER CALL_FUNCTION_1" % ("expr " * args_pos, opname) + ) + self.add_make_function_rule( + rule_pat, opname, token.attr, customize + ) + + if is_pypy or (i >= 2 and tokens[i - 2] == "LOAD_LAMBDA"): + rule_pat = "mklambda ::= %s%sLOAD_LAMBDA %%s%s" % ( + ("pos_arg " * args_pos), + ("kwarg " * args_kw), + opname, + ) + self.add_make_function_rule(rule_pat, opname, token.attr, customize) + continue + + args_pos, args_kw, annotate_args, closure = token.attr + + j = 2 + + if has_get_iter_call_function1: + rule_pat = ( + "generator_exp ::= %sload_genexpr %%s%s expr " + "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + ) + self.add_make_function_rule(rule_pat, opname, token.attr, customize) + + if is_pypy or (i >= j and tokens[i - j] == "LOAD_LISTCOMP"): + # In the tokens we saw: + # LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION (>= 3.3) or + # LOAD_LISTCOMP MAKE_FUNCTION (< 3.3) or + # and have GET_ITER CALL_FUNCTION_1 + # Todo: For Pypy we need to modify this slightly + rule_pat = ( + "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " + "GET_ITER CALL_FUNCTION_1" % ("expr " * args_pos, opname) + ) + self.add_make_function_rule( + rule_pat, opname, token.attr, customize + ) + + # FIXME: Fold test into add_make_function_rule + if is_pypy or (i >= j and tokens[i - j] == "LOAD_LAMBDA"): + rule_pat = "mklambda ::= %s%sLOAD_LAMBDA %%s%s" % ( + ("pos_arg " * args_pos), + ("kwarg " * args_kw), + opname, + ) + self.add_make_function_rule(rule_pat, opname, token.attr, customize) + + if args_kw == 0: + kwargs = "no_kwargs" + self.add_unique_rule("no_kwargs ::=", opname, token.attr, customize) + else: + kwargs = "kwargs" + + # positional args before keyword args + rule = "mkfunc ::= %s%s %s%s" % ( + "pos_arg " * args_pos, + kwargs, + "LOAD_CODE LOAD_STR ", + opname, + ) + self.add_unique_rule(rule, opname, token.attr, customize) + + elif opname == "MAKE_FUNCTION_8": + if "LOAD_DICTCOMP" in self.seen_ops: + # Is there something general going on here? + rule = """ + dict_comp ::= load_closure LOAD_DICTCOMP LOAD_STR + MAKE_FUNCTION_8 expr + GET_ITER CALL_FUNCTION_1 + """ + self.addRule(rule, nop_func) + elif "LOAD_SETCOMP" in self.seen_ops: + rule = """ + set_comp ::= load_closure LOAD_SETCOMP LOAD_STR + MAKE_FUNCTION_8 expr + GET_ITER CALL_FUNCTION_1 + """ + self.addRule(rule, nop_func) + + elif opname == "RETURN_VALUE_LAMBDA": + self.addRule( + """ + return_lambda ::= ret_expr RETURN_VALUE_LAMBDA + """, + nop_func, + ) + custom_ops_processed.add(opname) + elif opname == "RAISE_VARARGS_0": + self.addRule( + """ + stmt ::= raise_stmt0 + raise_stmt0 ::= RAISE_VARARGS_0 + """, + nop_func, + ) + custom_ops_processed.add(opname) + elif opname == "RAISE_VARARGS_1": + self.addRule( + """ + stmt ::= raise_stmt1 + raise_stmt1 ::= expr RAISE_VARARGS_1 + """, + nop_func, + ) + custom_ops_processed.add(opname) + elif opname == "RAISE_VARARGS_2": + self.addRule( + """ + stmt ::= raise_stmt2 + raise_stmt2 ::= expr expr RAISE_VARARGS_2 + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "SETUP_EXCEPT": + self.addRule( + """ + try_except ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + except_handler opt_come_from_except + + tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + except_handler else_suite come_from_except_clauses + + tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + except_handler else_suite come_froms + + tryelsestmtl ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + except_handler else_suitel come_from_except_clauses + + stmt ::= tryelsestmtl3 + tryelsestmtl3 ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + except_handler COME_FROM else_suitel + opt_come_from_except + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "SETUP_WITH": + rules_str = """ + stmt ::= withstmt + stmt ::= withasstmt + + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + withasstmt ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + withstmt ::= expr + SETUP_WITH POP_TOP suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + withasstmt ::= expr + SETUP_WITH store suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + withstmt ::= expr + SETUP_WITH POP_TOP suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + withasstmt ::= expr + SETUP_WITH store suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + """ + if self.version < 3.8: + rules_str += """ + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK + LOAD_CONST + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + """ + else: + rules_str += """ + withstmt ::= expr + SETUP_WITH POP_TOP suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + withasstmt ::= expr + SETUP_WITH store suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK + BEGIN_FINALLY COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH + END_FINALLY + """ + self.addRule(rules_str, nop_func) + + elif opname_base in ("UNPACK_EX",): + before_count, after_count = token.attr + rule = ( + "unpack ::= " + opname + " store" * (before_count + after_count + 1) + ) + self.addRule(rule, nop_func) + + elif opname_base in ("UNPACK_TUPLE", "UNPACK_SEQUENCE"): + rule = "unpack ::= " + opname + " store" * token.attr + self.addRule(rule, nop_func) + + elif opname_base == "UNPACK_LIST": + rule = "unpack_list ::= " + opname + " store" * token.attr + self.addRule(rule, nop_func) + custom_ops_processed.add(opname) + pass + + pass + + self.check_reduce["and"] = "AST" + self.check_reduce["aug_assign1"] = "AST" + self.check_reduce["aug_assign2"] = "AST" + self.check_reduce["while1stmt"] = "noAST" + self.check_reduce["while1elsestmt"] = "noAST" + self.check_reduce["_ifstmts_jump"] = "AST" + self.check_reduce["ifelsestmt"] = "AST" + self.check_reduce["iflaststmt"] = "AST" + self.check_reduce["iflaststmtl"] = "AST" + self.check_reduce["ifstmt"] = "AST" + self.check_reduce["annotate_tuple"] = "noAST" + + # FIXME: remove parser errors caused by the below + # self.check_reduce['while1elsestmt'] = 'noAST' + + return + + def custom_classfunc_rule(self, opname, token, customize, next_token): + """ + call ::= expr {expr}^n CALL_FUNCTION_n + call ::= expr {expr}^n CALL_FUNCTION_VAR_n + call ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n + call ::= expr {expr}^n CALL_FUNCTION_KW_n + + classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc {expr}^n-1 CALL_FUNCTION_n + """ + args_pos, args_kw = self.get_pos_kw(token) + + # Additional exprs for * and ** args: + # 0 if neither + # 1 for CALL_FUNCTION_VAR or CALL_FUNCTION_KW + # 2 for * and ** args (CALL_FUNCTION_VAR_KW). + # Yes, this computation based on instruction name is a little bit hoaky. + nak = (len(opname) - len("CALL_FUNCTION")) // 3 + uniq_param = args_kw + args_pos + + if frozenset(("GET_AWAITABLE", "YIELD_FROM")).issubset(self.seen_ops): + rule = ( + "async_call ::= expr " + + ("pos_arg " * args_pos) + + ("kwarg " * args_kw) + + "expr " * nak + + token.kind + + " GET_AWAITABLE LOAD_CONST YIELD_FROM" + ) + self.add_unique_rule(rule, token.kind, uniq_param, customize) + self.add_unique_rule( + "expr ::= async_call", token.kind, uniq_param, customize + ) + + if opname.startswith("CALL_FUNCTION_VAR"): + token.kind = self.call_fn_name(token) + if opname.endswith("KW"): + kw = "expr " + else: + kw = "" + rule = ( + "call ::= expr expr " + + ("pos_arg " * args_pos) + + ("kwarg " * args_kw) + + kw + + token.kind + ) + + # Note: semantic actions make use of the fact of wheter "args_pos" + # zero or not in creating a template rule. + self.add_unique_rule(rule, token.kind, args_pos, customize) + else: + token.kind = self.call_fn_name(token) + uniq_param = args_kw + args_pos + + # Note: 3.5+ have subclassed this method; so we don't handle + # 'CALL_FUNCTION_VAR' or 'CALL_FUNCTION_EX' here. + rule = ( + "call ::= expr " + + ("pos_arg " * args_pos) + + ("kwarg " * args_kw) + + "expr " * nak + + token.kind + ) + + self.add_unique_rule(rule, token.kind, uniq_param, customize) + + if "LOAD_BUILD_CLASS" in self.seen_ops: + if ( + next_token == "CALL_FUNCTION" + and next_token.attr == 1 + and args_pos > 1 + ): + rule = "classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d" % ( + ("expr " * (args_pos - 1)), + opname, + args_pos, + ) + self.add_unique_rule(rule, token.kind, uniq_param, customize) + + def reduce_is_invalid(self, rule, ast, tokens, first, last): + lhs = rule[0] + n = len(tokens) + + if lhs == "and" and ast: + # FIXME: put in a routine somewhere + # Compare with parse30.py of uncompyle6 + jmp = ast[1] + if jmp.kind.startswith("jmp_"): + if last == n: + return True + jmp_target = jmp[0].attr + jmp_offset = jmp[0].offset + + if tokens[first].off2int() <= jmp_target < tokens[last].off2int(): + return True + if rule == ("and", ("expr", "jmp_false", "expr", "jmp_false")): + jmp2_target = ast[3][0].attr + return jmp_target != jmp2_target + elif rule == ("and", ("expr", "jmp_false", "expr")): + if tokens[last] == "POP_JUMP_IF_FALSE": + return jmp_target != tokens[last].attr + elif rule == ("and", ("expr", "jmp_false", "expr", "COME_FROM")): + return ast[-1].attr != jmp_offset + # elif rule == ("and", ("expr", "jmp_false", "expr", "COME_FROM")): + # return jmp_offset != tokens[first+3].attr + + return jmp_target != tokens[last].off2int() + return False + + elif lhs in ("aug_assign1", "aug_assign2") and ast[0][0] == "and": + return True + elif lhs == "annotate_tuple": + return not isinstance(tokens[first].attr, tuple) + elif lhs == "while1elsestmt": + + if last == n: + # Adjust for fuzziness in parsing + last -= 1 + + if tokens[last] == "COME_FROM_LOOP": + last -= 1 + elif tokens[last - 1] == "COME_FROM_LOOP": + last -= 2 + if tokens[last] in ("JUMP_BACK", "CONTINUE"): + # These indicate inside a loop, but token[last] + # should not be in a loop. + # FIXME: Not quite right: refine by using target + return True + + # if SETUP_LOOP target spans the else part, then this is + # not while1else. Also do for whileTrue? + last += 1 + # 3.8+ Doesn't have SETUP_LOOP + return self.version < 3.8 and tokens[first].attr > tokens[last].off2int() + + elif lhs == "while1stmt": + + # If there is a fall through to the COME_FROM_LOOP, then this is + # not a while 1. So the instruction before should either be a + # JUMP_BACK or the instruction before should not be the target of a + # jump. (Well that last clause i not quite right; that target could be + # from dead code. Ugh. We need a more uniform control flow analysis.) + if last == n or tokens[last - 1] == "COME_FROM_LOOP": + cfl = last - 1 + else: + cfl = last + assert tokens[cfl] == "COME_FROM_LOOP" + + for i in range(cfl - 1, first, -1): + if tokens[i] != "POP_BLOCK": + break + if tokens[i].kind not in ("JUMP_BACK", "RETURN_VALUE"): + if not tokens[i].kind.startswith("COME_FROM"): + return True + + # Check that the SETUP_LOOP jumps to the offset after the + # COME_FROM_LOOP + if 0 <= last < n and tokens[last] in ("COME_FROM_LOOP", "JUMP_BACK"): + # jump_back should be right before COME_FROM_LOOP? + last += 1 + if last == n: + last -= 1 + offset = tokens[last].off2int() + assert tokens[first] == "SETUP_LOOP" + if offset != tokens[first].attr: + return True + return False + elif lhs == "_ifstmts_jump" and len(rule[1]) > 1 and ast: + come_froms = ast[-1] + # Make sure all of the "come froms" offset at the + # end of the "if" come from somewhere inside the "if". + # Since the come_froms are ordered so that lowest + # offset COME_FROM is last, it is sufficient to test + # just the last one. + + # This is complicated, but note that the JUMP_IF instruction comes immediately + # *before* _ifstmts_jump so that's what we have to test + # the COME_FROM against. This can be complicated by intervening + # POP_TOP, and pseudo COME_FROM, ELSE instructions + # + pop_jump_index = first - 1 + while pop_jump_index > 0 and tokens[pop_jump_index] in ( + "ELSE", + "POP_TOP", + "JUMP_FORWARD", + "COME_FROM", + ): + pop_jump_index -= 1 + come_froms = ast[-1] + + # FIXME: something is fishy when and EXTENDED ARG is needed before the + # pop_jump_index instruction to get the argment. In this case, the + # _ifsmtst_jump can jump to a spot beyond the come_froms. + # That is going on in the non-EXTENDED_ARG case is that the POP_JUMP_IF + # jumps to a JUMP_(FORWARD) which is changed into an EXTENDED_ARG POP_JUMP_IF + # to the jumped forwareded address + if tokens[pop_jump_index].attr > 256: + return False + + if isinstance(come_froms, Token): + return ( + come_froms.attr is not None + and tokens[pop_jump_index].offset > come_froms.attr + ) + + elif len(come_froms) == 0: + return False + else: + return tokens[pop_jump_index].offset > come_froms[-1].attr + + elif lhs == "ifstmt" and ast: + # FIXME: put in a routine somewhere + testexpr = ast[0] + + if (last + 1) < n and tokens[last + 1] == "COME_FROM_LOOP": + # iflastsmtl jumped outside of loop. No good. + return True + + if testexpr[0] in ("testtrue", "testfalse"): + test = testexpr[0] + if len(test) > 1 and test[1].kind.startswith("jmp_"): + if last == n: + last -= 1 + jmp_target = test[1][0].attr + if tokens[first].off2int() <= jmp_target < tokens[last].off2int(): + return True + # jmp_target less than tokens[first] is okay - is to a loop + # jmp_target equal tokens[last] is also okay: normal non-optimized non-loop jump + if jmp_target > tokens[last].off2int(): + # One more weird case to look out for + # if c1: + # if c2: # Jumps around the *outer* "else" + # ... + # else: + if jmp_target == tokens[last - 1].attr: + return False + if last < n and tokens[last].kind.startswith("JUMP"): + return False + return True + + pass + return False + elif lhs in ("iflaststmt", "iflaststmtl") and ast: + # FIXME: put in a routine somewhere + testexpr = ast[0] + + if testexpr[0] in ("testtrue", "testfalse"): + + test = testexpr[0] + if len(test) > 1 and test[1].kind.startswith("jmp_"): + if last == n: + last -= 1 + jmp_target = test[1][0].attr + if tokens[first].off2int() <= jmp_target < tokens[last].off2int(): + return True + # jmp_target less than tokens[first] is okay - is to a loop + # jmp_target equal tokens[last] is also okay: normal non-optimized non-loop jump + + if (last + 1) < n and tokens[last - 1] != "JUMP_BACK" and tokens[last + 1] == "COME_FROM_LOOP": + # iflastsmtl is not at the end of a loop, but jumped outside of loop. No good. + # FIXME: check that tokens[last] == "POP_BLOCK"? Or allow for it not to appear? + return True + + # If the instruction before "first" is a "POP_JUMP_IF_FALSE" which goes + # to the same target as jmp_target, then this not nested "if .. if .." + # but rather "if ... and ..." + if first > 0 and tokens[first - 1] == "POP_JUMP_IF_FALSE": + return tokens[first - 1].attr == jmp_target + + if jmp_target > tokens[last].off2int(): + # One more weird case to look out for + # if c1: + # if c2: # Jumps around the *outer* "else" + # ... + # else: + if jmp_target == tokens[last - 1].attr: + return False + if last < n and tokens[last].kind.startswith("JUMP"): + return False + return True + + pass + return False + + # FIXME: put in a routine somewhere + elif lhs == "ifelsestmt": + + if (last + 1) < n and tokens[last + 1] == "COME_FROM_LOOP": + # ifelsestmt jumped outside of loop. No good. + return True + + if rule not in ( + ( + "ifelsestmt", + ( + "testexpr", + "c_stmts_opt", + "jump_forward_else", + "else_suite", + "_come_froms", + ), + ), + ( + "ifelsestmt", + ( + "testexpr", + "c_stmts_opt", + "jf_cfs", + "else_suite", + "opt_come_from_except", + ), + ), + ): + return False + + # Make sure all of the "come froms" offset at the + # end of the "if" come from somewhere inside the "if". + # Since the come_froms are ordered so that lowest + # offset COME_FROM is last, it is sufficient to test + # just the last one. + come_froms = ast[-1] + if come_froms == "opt_come_from_except" and len(come_froms) > 0: + come_froms = come_froms[0] + if not isinstance(come_froms, Token): + return tokens[first].offset > come_froms[-1].attr + elif tokens[first].offset > come_froms.attr: + return True + + # For mysterious reasons a COME_FROM in tokens[last+1] might be part of the grammar rule + # even though it is not found in come_froms. + # Work around this. + if ( + last < n + and tokens[last] == "COME_FROM" + and tokens[first].offset > tokens[last].attr + ): + return True + + testexpr = ast[0] + + # Check that the condition portion of the "if" + # jumps to the "else" part. + # Compare with parse30.py of uncompyle6 + if testexpr[0] in ("testtrue", "testfalse"): + test = testexpr[0] + if len(test) > 1 and test[1].kind.startswith("jmp_"): + if last == n: + last -= 1 + jmp = test[1] + jmp_target = jmp[0].attr + if tokens[first].off2int() > jmp_target: + return True + return (jmp_target > tokens[last].off2int()) and tokens[ + last + ] != "JUMP_FORWARD" + + return False + + return False diff --git a/uncompyle6/parsers/parse38.py b/uncompyle6/parsers/parse38.py index 5cea1baf..3aa235ba 100644 --- a/uncompyle6/parsers/parse38.py +++ b/uncompyle6/parsers/parse38.py @@ -22,6 +22,12 @@ from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from uncompyle6.parsers.parse37 import Python37Parser class Python38Parser(Python37Parser): + def p_38walrus(self, args): + """ + # named_expr is also known as the "walrus op" := + expr ::= named_expr + named_expr ::= expr DUP_TOP store + """ def p_38misc(self, args): """ @@ -38,6 +44,12 @@ class Python38Parser(Python37Parser): stmt ::= whilestmt38 stmt ::= whileTruestmt38 stmt ::= call + stmt ::= ifstmtl + + break ::= POP_BLOCK BREAK_LOOP + break ::= POP_BLOCK POP_TOP BREAK_LOOP + break ::= POP_TOP BREAK_LOOP + break ::= POP_EXCEPT BREAK_LOOP # FIXME: this should be restricted to being inside a try block stmt ::= except_ret38 @@ -89,27 +101,39 @@ class Python38Parser(Python37Parser): return ::= ret_expr ROT_TWO POP_TOP RETURN_VALUE + # 3.8 can push a looping JUMP_BACK into into a JUMP_ from a statement that jumps to it + lastl_stmt ::= ifpoplaststmtl + ifpoplaststmtl ::= testexpr POP_TOP c_stmts_opt JUMP_BACK + ifelsestmtl ::= testexpr c_stmts_opt jb_cfs else_suitel JUMP_BACK come_froms + + _ifstmts_jumpl ::= c_stmts JUMP_BACK + _ifstmts_jumpl ::= _ifstmts_jump + ifstmtl ::= testexpr _ifstmts_jumpl + for38 ::= expr get_iter store for_block JUMP_BACK - for38 ::= expr for_iter store for_block JUMP_BACK - for38 ::= expr for_iter store for_block JUMP_BACK POP_BLOCK - for38 ::= expr for_iter store for_block + for38 ::= expr get_for_iter store for_block JUMP_BACK + for38 ::= expr get_for_iter store for_block JUMP_BACK POP_BLOCK + for38 ::= expr get_for_iter store for_block - forelsestmt38 ::= expr for_iter store for_block POP_BLOCK else_suite - forelselaststmt38 ::= expr for_iter store for_block POP_BLOCK else_suitec - forelselaststmtl38 ::= expr for_iter store for_block POP_BLOCK else_suitel + forelsestmt38 ::= expr get_for_iter store for_block POP_BLOCK else_suite + forelselaststmt38 ::= expr get_for_iter store for_block POP_BLOCK else_suitec + forelselaststmtl38 ::= expr get_for_iter store for_block POP_BLOCK else_suitel - whilestmt38 ::= testexpr l_stmts_opt COME_FROM JUMP_BACK POP_BLOCK - whilestmt38 ::= testexpr l_stmts_opt JUMP_BACK POP_BLOCK - whilestmt38 ::= testexpr returns POP_BLOCK - whilestmt38 ::= testexpr l_stmts JUMP_BACK + whilestmt38 ::= _come_froms testexpr l_stmts_opt COME_FROM JUMP_BACK POP_BLOCK + whilestmt38 ::= _come_froms testexpr l_stmts_opt JUMP_BACK POP_BLOCK + whilestmt38 ::= _come_froms testexpr l_stmts_opt JUMP_BACK come_froms + whilestmt38 ::= _come_froms testexpr returns POP_BLOCK + whilestmt38 ::= _come_froms testexpr l_stmts JUMP_BACK + whilestmt38 ::= _come_froms testexpr l_stmts come_froms # while1elsestmt ::= l_stmts JUMP_BACK - whileTruestmt ::= l_stmts JUMP_BACK POP_BLOCK - while1stmt ::= l_stmts COME_FROM_LOOP - while1stmt ::= l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP - whileTruestmt38 ::= l_stmts JUMP_BACK + whileTruestmt ::= _come_froms l_stmts JUMP_BACK POP_BLOCK + while1stmt ::= _come_froms l_stmts COME_FROM_LOOP + while1stmt ::= _come_froms l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP + whileTruestmt38 ::= _come_froms l_stmts JUMP_BACK + whileTruestmt38 ::= _come_froms l_stmts JUMP_BACK COME_FROM_EXCEPT_CLAUSE - for_block ::= l_stmts_opt _come_from_loops JUMP_BACK + for_block ::= _come_froms l_stmts_opt _come_from_loops JUMP_BACK except_cond1 ::= DUP_TOP expr COMPARE_OP jmp_false POP_TOP POP_TOP POP_TOP @@ -134,7 +158,8 @@ class Python38Parser(Python37Parser): except_ret38a ::= COME_FROM_FINALLY POP_TOP POP_TOP POP_TOP expr ROT_FOUR POP_EXCEPT RETURN_VALUE END_FINALLY - except_handler38 ::= JUMP_FORWARD COME_FROM_FINALLY + + except_handler38 ::= _jump COME_FROM_FINALLY except_stmts END_FINALLY opt_come_from_except except_handler38a ::= COME_FROM_FINALLY POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP stmts END_FINALLY @@ -160,13 +185,15 @@ class Python38Parser(Python37Parser): self.customized = {} def remove_rules_38(self): - self.remove_rules(""" + self.remove_rules( + """ stmt ::= async_for_stmt37 stmt ::= for stmt ::= forelsestmt stmt ::= try_except36 + stmt ::= async_forelse_stmt - async_for_stmt ::= SETUP_LOOP expr + async_for_stmt ::= setup_loop expr GET_AITER SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM @@ -178,7 +205,7 @@ class Python38Parser(Python37Parser): COME_FROM POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP POP_BLOCK COME_FROM_LOOP - async_for_stmt37 ::= SETUP_LOOP expr + async_for_stmt37 ::= setup_loop expr GET_AITER SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM @@ -190,7 +217,7 @@ class Python38Parser(Python37Parser): POP_TOP POP_BLOCK COME_FROM_LOOP - async_forelse_stmt ::= SETUP_LOOP expr + async_forelse_stmt ::= setup_loop expr GET_AITER SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM @@ -203,13 +230,13 @@ class Python38Parser(Python37Parser): POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP POP_BLOCK else_suite COME_FROM_LOOP - for ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK - for ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK NOP + for ::= setup_loop expr get_for_iter store for_block POP_BLOCK + for ::= setup_loop expr get_for_iter store for_block POP_BLOCK NOP for_block ::= l_stmts_opt COME_FROM_LOOP JUMP_BACK - forelsestmt ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK else_suite - forelselaststmt ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK else_suitec - forelselaststmtl ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK else_suitel + forelsestmt ::= setup_loop expr get_for_iter store for_block POP_BLOCK else_suite + forelselaststmt ::= setup_loop expr get_for_iter store for_block POP_BLOCK else_suitec + forelselaststmtl ::= setup_loop expr get_for_iter store for_block POP_BLOCK else_suitel tryelsestmtl3 ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK except_handler COME_FROM else_suitel @@ -223,15 +250,16 @@ class Python38Parser(Python37Parser): COME_FROM_FINALLY suite_stmts_opt END_FINALLY tryfinally_return_stmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_FINALLY - - - """) + """ + ) def customize_grammar_rules(self, tokens, customize): super(Python37Parser, self).customize_grammar_rules(tokens, customize) self.remove_rules_38() - self.check_reduce['ifstmt'] = 'tokens' - self.check_reduce['whileTruestmt38'] = 'tokens' + self.check_reduce["ifstmt"] = "tokens" + self.check_reduce["whileTruestmt38"] = "tokens" + self.check_reduce["whilestmt38"] = "tokens" + self.check_reduce["ifstmtl"] = "tokens" def reduce_is_invalid(self, rule, ast, tokens, first, last): invalid = super(Python38Parser, @@ -240,34 +268,47 @@ class Python38Parser(Python37Parser): self.remove_rules_38() if invalid: return invalid - if rule[0] == 'ifstmt': + lhs = rule[0] + if lhs == "ifstmt": # Make sure jumps don't extend beyond the end of the if statement. l = last if l == len(tokens): l -= 1 if isinstance(tokens[l].offset, str): - last_offset = int(tokens[l].offset.split('_')[0], 10) + last_offset = int(tokens[l].offset.split("_")[0], 10) else: last_offset = tokens[l].offset for i in range(first, l): t = tokens[i] - if t.kind == 'POP_JUMP_IF_FALSE': + if t.kind == "POP_JUMP_IF_FALSE": if t.attr > last_offset: return True pass pass pass - elif rule[0] == 'whileTruestmt38': - t = tokens[last-1] - if t.kind == 'JUMP_BACK': - return t.attr != tokens[first].offset + elif lhs == "ifstmtl": + if last == len(tokens): + last -= 1 + if (tokens[last].attr and isinstance(tokens[last].attr, int)): + return tokens[first].offset < tokens[last].attr + pass + elif lhs in ("whileTruestmt38", "whilestmt38"): + jb_index = last - 1 + while jb_index > 0 and tokens[jb_index].kind.startswith("COME_FROM"): + jb_index -= 1 + t = tokens[jb_index] + if t.kind != "JUMP_BACK": + return True + return t.attr != tokens[first].off2int() pass return False + class Python38ParserSingle(Python38Parser, PythonParserSingle): pass + if __name__ == "__main__": # Check grammar # FIXME: DRY this with other parseXX.py routines @@ -284,9 +325,11 @@ if __name__ == "__main__": opcode_set = set(s.opc.opname).union( set( """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM - LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME - LAMBDA_MARKER RETURN_LAST - """.split())) + LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME + LAMBDA_MARKER RETURN_LAST + """.split() + ) + ) remain_tokens = set(tokens) - opcode_set import re diff --git a/uncompyle6/scanners/scanner37.py b/uncompyle6/scanners/scanner37.py index ad86e23a..44dfdd76 100644 --- a/uncompyle6/scanners/scanner37.py +++ b/uncompyle6/scanners/scanner37.py @@ -22,24 +22,50 @@ This sets up opcodes Python's 3.7 and calls a generalized scanner routine for Python 3. """ -from uncompyle6.scanners.scanner36 import Scanner36 -from uncompyle6.scanners.scanner3 import Scanner3 +from uncompyle6.scanners.scanner37base import Scanner37Base # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_37 as opc + +# bytecode verification, verify(), uses JUMP_OPS from here JUMP_OPs = opc.JUMP_OPS -class Scanner37(Scanner36): +class Scanner37(Scanner37Base): def __init__(self, show_asm=None): - Scanner3.__init__(self, 3.7, show_asm) + Scanner37Base.__init__(self, 3.7, show_asm) return + pass + def ingest(self, co, classname=None, code_objects={}, show_asm=None): + tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm) + for t in tokens: + # The lowest bit of flags indicates whether the + # var-keyword argument is placed at the top of the stack + if t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1: + t.kind = "CALL_FUNCTION_EX_KW" + pass + elif t.op == self.opc.BUILD_STRING: + t.kind = "BUILD_STRING_%s" % t.attr + elif t.op == self.opc.CALL_FUNCTION_KW: + t.kind = "CALL_FUNCTION_KW_%s" % t.attr + elif t.op == self.opc.FORMAT_VALUE: + if t.attr & 0x4: + t.kind = "FORMAT_VALUE_ATTR" + pass + elif t.op == self.opc.BUILD_MAP_UNPACK_WITH_CALL: + t.kind = "BUILD_MAP_UNPACK_WITH_CALL_%d" % t.attr + elif t.op == self.opc.BUILD_TUPLE_UNPACK_WITH_CALL: + t.kind = "BUILD_TUPLE_UNPACK_WITH_CALL_%d" % t.attr + pass + return tokens, customize + if __name__ == "__main__": from uncompyle6 import PYTHON_VERSION if PYTHON_VERSION == 3.7: import inspect + co = inspect.currentframe().f_code tokens, customize = Scanner37().ingest(co) for t in tokens: diff --git a/uncompyle6/scanners/scanner37base.py b/uncompyle6/scanners/scanner37base.py new file mode 100644 index 00000000..f6fc5a8c --- /dev/null +++ b/uncompyle6/scanners/scanner37base.py @@ -0,0 +1,1110 @@ +# Copyright (c) 2015-2019 by Rocky Bernstein +# Copyright (c) 2005 by Dan Pascu +# Copyright (c) 2000-2002 by hartmut Goebel +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Python 37 bytecode scanner/deparser base. + +Also we *modify* the instruction sequence to assist deparsing code. +For example: + - we add "COME_FROM" instructions to help in figuring out + conditional branching and looping. + - LOAD_CONSTs are classified further into the type of thing + they load: + lambda's, genexpr's, {dict,set,list} comprehension's, + - PARAMETER counts appended {CALL,MAKE}_FUNCTION, BUILD_{TUPLE,SET,SLICE} + +Finally we save token information. +""" + +from xdis.code import iscode +from xdis.bytecode import instruction_size, _get_const_info + +from uncompyle6.scanner import Token +import xdis + +# Get all the opcodes into globals +import xdis.opcodes.opcode_37 as op3 + +from uncompyle6.scanner import Scanner + +import sys + +globals().update(op3.opmap) + + +class Scanner37Base(Scanner): + def __init__(self, version, show_asm=None, is_pypy=False): + super(Scanner37Base, self).__init__(version, show_asm, is_pypy) + + # Create opcode classification sets + # Note: super initilization above initializes self.opc + + # Ops that start SETUP_ ... We will COME_FROM with these names + # Some blocks and END_ statements. And they can start + # a new statement + if self.version < 3.8: + setup_ops = [ + self.opc.SETUP_LOOP, + self.opc.SETUP_EXCEPT, + self.opc.SETUP_FINALLY, + ] + self.setup_ops_no_loop = frozenset(setup_ops) - frozenset( + [self.opc.SETUP_LOOP] + ) + else: + setup_ops = [self.opc.SETUP_FINALLY] + self.setup_ops_no_loop = frozenset(setup_ops) + + # Add back these opcodes which help us detect "break" and + # "continue" statements via parsing. + self.opc.BREAK_LOOP = 80 + self.opc.CONTINUE_LOOP = 119 + pass + + setup_ops.append(self.opc.SETUP_WITH) + self.setup_ops = frozenset(setup_ops) + + self.pop_jump_tf = frozenset([self.opc.PJIF, self.opc.PJIT]) + self.not_continue_follow = ("END_FINALLY", "POP_BLOCK") + + # Opcodes that can start a statement. + statement_opcodes = [ + self.opc.POP_BLOCK, + self.opc.STORE_FAST, + self.opc.DELETE_FAST, + self.opc.STORE_DEREF, + self.opc.STORE_GLOBAL, + self.opc.DELETE_GLOBAL, + self.opc.STORE_NAME, + self.opc.DELETE_NAME, + self.opc.STORE_ATTR, + self.opc.DELETE_ATTR, + self.opc.STORE_SUBSCR, + self.opc.POP_TOP, + self.opc.DELETE_SUBSCR, + self.opc.END_FINALLY, + self.opc.RETURN_VALUE, + self.opc.RAISE_VARARGS, + self.opc.PRINT_EXPR, + self.opc.JUMP_ABSOLUTE, + # These are phony for 3.8+ + self.opc.BREAK_LOOP, + self.opc.CONTINUE_LOOP, + ] + + self.statement_opcodes = frozenset(statement_opcodes) | self.setup_ops_no_loop + + # Opcodes that can start a "store" non-terminal. + # FIXME: JUMP_ABSOLUTE is weird. What's up with that? + self.designator_ops = frozenset( + [ + self.opc.STORE_FAST, + self.opc.STORE_NAME, + self.opc.STORE_GLOBAL, + self.opc.STORE_DEREF, + self.opc.STORE_ATTR, + self.opc.STORE_SUBSCR, + self.opc.UNPACK_SEQUENCE, + self.opc.JUMP_ABSOLUTE, + self.opc.UNPACK_EX, + ] + ) + + self.jump_if_pop = frozenset( + [self.opc.JUMP_IF_FALSE_OR_POP, self.opc.JUMP_IF_TRUE_OR_POP] + ) + + self.pop_jump_if_pop = frozenset( + [ + self.opc.JUMP_IF_FALSE_OR_POP, + self.opc.JUMP_IF_TRUE_OR_POP, + self.opc.POP_JUMP_IF_TRUE, + self.opc.POP_JUMP_IF_FALSE, + ] + ) + # Not really a set, but still clasification-like + self.statement_opcode_sequences = [ + (self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_FORWARD), + (self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_ABSOLUTE), + (self.opc.POP_JUMP_IF_TRUE, self.opc.JUMP_FORWARD), + (self.opc.POP_JUMP_IF_TRUE, self.opc.JUMP_ABSOLUTE), + ] + + # FIXME: remove this and use instead info from xdis. + # Opcodes that take a variable number of arguments + # (expr's) + varargs_ops = set( + [ + self.opc.BUILD_LIST, + self.opc.BUILD_TUPLE, + self.opc.BUILD_SET, + self.opc.BUILD_SLICE, + self.opc.BUILD_MAP, + self.opc.UNPACK_SEQUENCE, + self.opc.RAISE_VARARGS, + ] + ) + + varargs_ops.add(self.opc.CALL_METHOD) + varargs_ops |= set( + [ + self.opc.BUILD_SET_UNPACK, + self.opc.BUILD_MAP_UNPACK, # we will handle this later + self.opc.BUILD_LIST_UNPACK, + self.opc.BUILD_TUPLE_UNPACK, + ] + ) + varargs_ops.add(self.opc.BUILD_CONST_KEY_MAP) + # Below is in bit order, "default = bit 0, closure = bit 3 + self.MAKE_FUNCTION_FLAGS = tuple( + """ + default keyword-only annotation closure""".split() + ) + + self.varargs_ops = frozenset(varargs_ops) + # FIXME: remove the above in favor of: + # self.varargs_ops = frozenset(self.opc.hasvargs) + return + + def ingest(self, co, classname=None, code_objects={}, show_asm=None): + """ + Pick out tokens from an uncompyle6 code object, and transform them, + returning a list of uncompyle6 Token's. + + The transformations are made to assist the deparsing grammar. + Specificially: + - various types of LOAD_CONST's are categorized in terms of what they load + - COME_FROM instructions are added to assist parsing control structures + - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + - some EXTENDED_ARGS instructions are removed + + Also, when we encounter certain tokens, we add them to a set which will cause custom + grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST + cause specific rules for the specific number of arguments they take. + """ + + def tokens_append(j, token): + tokens.append(token) + self.offset2tok_index[token.offset] = j + j += 1 + assert j == len(tokens) + return j + + if not show_asm: + show_asm = self.show_asm + + bytecode = self.build_instructions(co) + + # show_asm = 'both' + if show_asm in ("both", "before"): + for instr in bytecode.get_instructions(co): + print(instr.disassemble()) + + # "customize" is in the process of going away here + customize = {} + + if self.is_pypy: + customize["PyPy"] = 0 + + # Scan for assertions. Later we will + # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. + # 'LOAD_ASSERT' is used in assert statements. + self.load_asserts = set() + + # list of tokens/instructions + tokens = [] + self.offset2tok_index = {} + + n = len(self.insts) + for i, inst in enumerate(self.insts): + + # We need to detect the difference between: + # raise AssertionError + # and + # assert ... + # If we have a JUMP_FORWARD after the + # RAISE_VARARGS then we have a "raise" statement + # else we have an "assert" statement. + assert_can_follow = inst.opname == "POP_JUMP_IF_TRUE" and i + 1 < n + if assert_can_follow: + next_inst = self.insts[i + 1] + if ( + next_inst.opname == "LOAD_GLOBAL" + and next_inst.argval == "AssertionError" + and inst.argval + ): + raise_idx = self.offset2inst_index[self.prev_op[inst.argval]] + raise_inst = self.insts[raise_idx] + if raise_inst.opname.startswith("RAISE_VARARGS"): + self.load_asserts.add(next_inst.offset) + pass + pass + + # Get jump targets + # Format: {target offset: [jump offsets]} + jump_targets = self.find_jump_targets(show_asm) + # print("XXX2", jump_targets) + + last_op_was_break = False + + j = 0 + for i, inst in enumerate(self.insts): + + argval = inst.argval + op = inst.opcode + + if inst.opname == "EXTENDED_ARG": + # FIXME: The EXTENDED_ARG is used to signal annotation + # parameters + if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION: + continue + + if inst.offset in jump_targets: + jump_idx = 0 + # We want to process COME_FROMs to the same offset to be in *descending* + # offset order so we have the larger range or biggest instruction interval + # last. (I think they are sorted in increasing order, but for safety + # we sort them). That way, specific COME_FROM tags will match up + # properly. For example, a "loop" with an "if" nested in it should have the + # "loop" tag last so the grammar rule matches that properly. + for jump_offset in sorted(jump_targets[inst.offset], reverse=True): + come_from_name = "COME_FROM" + opname = self.opname_for_offset(jump_offset) + if opname == "EXTENDED_ARG": + k = xdis.next_offset(op, self.opc, jump_offset) + opname = self.opname_for_offset(k) + + if opname.startswith("SETUP_"): + come_from_type = opname[len("SETUP_") :] + come_from_name = "COME_FROM_%s" % come_from_type + pass + elif inst.offset in self.except_targets: + come_from_name = "COME_FROM_EXCEPT_CLAUSE" + j = tokens_append( + j, + Token( + come_from_name, + jump_offset, + repr(jump_offset), + offset="%s_%s" % (inst.offset, jump_idx), + has_arg=True, + opc=self.opc, + ), + ) + jump_idx += 1 + pass + pass + elif inst.offset in self.else_start: + end_offset = self.else_start[inst.offset] + j = tokens_append( + j, + Token( + "ELSE", + None, + repr(end_offset), + offset="%s" % (inst.offset), + has_arg=True, + opc=self.opc, + ), + ) + + pass + + pattr = inst.argrepr + opname = inst.opname + + if op in self.opc.CONST_OPS: + const = argval + if iscode(const): + if const.co_name == "": + assert opname == "LOAD_CONST" + opname = "LOAD_LAMBDA" + elif const.co_name == "": + opname = "LOAD_GENEXPR" + elif const.co_name == "": + opname = "LOAD_DICTCOMP" + elif const.co_name == "": + opname = "LOAD_SETCOMP" + elif const.co_name == "": + opname = "LOAD_LISTCOMP" + else: + opname = "LOAD_CODE" + # verify() uses 'pattr' for comparison, since 'attr' + # now holds Code(const) and thus can not be used + # for comparison (todo: think about changing this) + # pattr = 'code_object @ 0x%x %s->%s' %\ + # (id(const), const.co_filename, const.co_name) + pattr = "" + elif isinstance(const, str): + opname = "LOAD_STR" + else: + if isinstance(inst.arg, int) and inst.arg < len(co.co_consts): + argval, _ = _get_const_info(inst.arg, co.co_consts) + # Why don't we use _ above for "pattr" rather than "const"? + # This *is* a little hoaky, but we have to coordinate with + # other parts like n_LOAD_CONST in pysource.py for example. + pattr = const + pass + elif opname in ("MAKE_FUNCTION", "MAKE_CLOSURE"): + flags = argval + opname = "MAKE_FUNCTION_%d" % (flags) + attr = [] + for flag in self.MAKE_FUNCTION_FLAGS: + bit = flags & 1 + attr.append(bit) + flags >>= 1 + attr = attr[:4] # remove last value: attr[5] == False + j = tokens_append( + j, + Token( + opname=opname, + attr=attr, + pattr=pattr, + offset=inst.offset, + linestart=inst.starts_line, + op=op, + has_arg=inst.has_arg, + opc=self.opc, + ), + ) + continue + elif op in self.varargs_ops: + pos_args = argval + if self.is_pypy and not pos_args and opname == "BUILD_MAP": + opname = "BUILD_MAP_n" + else: + opname = "%s_%d" % (opname, pos_args) + + elif self.is_pypy and opname == "JUMP_IF_NOT_DEBUG": + # The value in the dict is in special cases in semantic actions, such + # as JUMP_IF_NOT_DEBUG. The value is not used in these cases, so we put + # in arbitrary value 0. + customize[opname] = 0 + elif opname == "UNPACK_EX": + # FIXME: try with scanner and parser by + # changing argval + before_args = argval & 0xFF + after_args = (argval >> 8) & 0xFF + pattr = "%d before vararg, %d after" % (before_args, after_args) + argval = (before_args, after_args) + opname = "%s_%d+%d" % (opname, before_args, after_args) + + elif op == self.opc.JUMP_ABSOLUTE: + # Further classify JUMP_ABSOLUTE into backward jumps + # which are used in loops, and "CONTINUE" jumps which + # may appear in a "continue" statement. The loop-type + # and continue-type jumps will help us classify loop + # boundaries The continue-type jumps help us get + # "continue" statements with would otherwise be turned + # into a "pass" statement because JUMPs are sometimes + # ignored in rules as just boundary overhead. In + # comprehensions we might sometimes classify JUMP_BACK + # as CONTINUE, but that's okay since we add a grammar + # rule for that. + pattr = argval + target = self.get_target(inst.offset) + if target <= inst.offset: + next_opname = self.insts[i + 1].opname + + # 'Continue's include jumps to loops that are not + # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP. + # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD + # then we'll take it as a "continue". + is_continue = ( + self.insts[self.offset2inst_index[target]].opname == "FOR_ITER" + and self.insts[i + 1].opname == "JUMP_FORWARD" + ) + + if self.version < 3.8 and ( + is_continue + or ( + inst.offset in self.stmts + and ( + inst.starts_line + and next_opname not in self.not_continue_follow + ) + ) + ): + opname = "CONTINUE" + else: + opname = "JUMP_BACK" + # FIXME: this is a hack to catch stuff like: + # if x: continue + # the "continue" is not on a new line. + # There are other situations where we don't catch + # CONTINUE as well. + if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval: + if tokens[-2].kind == "BREAK_LOOP": + del tokens[-1] + else: + # intern is used because we are changing the *previous* token + tokens[-1].kind = sys.intern("CONTINUE") + if last_op_was_break and opname == "CONTINUE": + last_op_was_break = False + continue + + elif inst.offset in self.load_asserts: + opname = "LOAD_ASSERT" + + last_op_was_break = opname == "BREAK_LOOP" + j = tokens_append( + j, + Token( + opname=opname, + attr=argval, + pattr=pattr, + offset=inst.offset, + linestart=inst.starts_line, + op=op, + has_arg=inst.has_arg, + opc=self.opc, + ), + ) + pass + + if show_asm in ("both", "after"): + for t in tokens: + print(t.format(line_prefix="L.")) + print() + return tokens, customize + + def find_jump_targets(self, debug): + """ + Detect all offsets in a byte code which are jump targets + where we might insert a COME_FROM instruction. + + Return the list of offsets. + + Return the list of offsets. An instruction can be jumped + to in from multiple instructions. + """ + code = self.code + n = len(code) + self.structs = [{"type": "root", "start": 0, "end": n - 1}] + + # All loop entry points + self.loops = [] + + # Map fixed jumps to their real destination + self.fixed_jumps = {} + self.except_targets = {} + self.ignore_if = set() + self.build_statement_indices() + self.else_start = {} + + # Containers filled by detect_control_flow() + self.not_continue = set() + self.return_end_ifs = set() + self.setup_loop_targets = {} # target given setup_loop offset + self.setup_loops = {} # setup_loop offset given target + + targets = {} + for i, inst in enumerate(self.insts): + offset = inst.offset + op = inst.opcode + + # Determine structures and fix jumps in Python versions + # since 2.3 + self.detect_control_flow(offset, targets, i) + + if inst.has_arg: + label = self.fixed_jumps.get(offset) + oparg = inst.arg + if self.code[offset] == self.opc.EXTENDED_ARG: + j = xdis.next_offset(op, self.opc, offset) + next_offset = xdis.next_offset(op, self.opc, j) + else: + next_offset = xdis.next_offset(op, self.opc, offset) + + if label is None: + if op in self.opc.hasjrel and op != self.opc.FOR_ITER: + label = next_offset + oparg + elif op in self.opc.hasjabs: + if op in self.jump_if_pop: + if oparg > offset: + label = oparg + + if label is not None and label != -1: + targets[label] = targets.get(label, []) + [offset] + elif op == self.opc.END_FINALLY and offset in self.fixed_jumps: + label = self.fixed_jumps[offset] + targets[label] = targets.get(label, []) + [offset] + pass + + pass # for loop + + # DEBUG: + if debug in ("both", "after"): + import pprint as pp + + pp.pprint(self.structs) + + return targets + + def build_statement_indices(self): + code = self.code + start = 0 + end = codelen = len(code) + + # Compose preliminary list of indices with statements, + # using plain statement opcodes + prelim = self.inst_matches(start, end, self.statement_opcodes) + + # Initialize final container with statements with + # preliminary data + stmts = self.stmts = set(prelim) + + # Same for opcode sequences + pass_stmts = set() + for sequence in self.statement_opcode_sequences: + for i in self.op_range(start, end - (len(sequence) + 1)): + match = True + for elem in sequence: + if elem != code[i]: + match = False + break + i += instruction_size(code[i], self.opc) + + if match is True: + i = self.prev_op[i] + stmts.add(i) + pass_stmts.add(i) + + # Initialize statement list with the full data we've gathered so far + if pass_stmts: + stmt_offset_list = list(stmts) + stmt_offset_list.sort() + else: + stmt_offset_list = prelim + # 'List-map' which contains offset of start of + # next statement, when op offset is passed as index + self.next_stmt = slist = [] + last_stmt_offset = -1 + i = 0 + # Go through all statement offsets + for stmt_offset in stmt_offset_list: + # Process absolute jumps, but do not remove 'pass' statements + # from the set + if ( + code[stmt_offset] == self.opc.JUMP_ABSOLUTE + and stmt_offset not in pass_stmts + ): + # If absolute jump occurs in forward direction or it takes off from the + # same line as previous statement, this is not a statement + # FIXME: 0 isn't always correct + target = self.get_target(stmt_offset) + if ( + target > stmt_offset + or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no + ): + stmts.remove(stmt_offset) + continue + # Rewing ops till we encounter non-JUMP_ABSOLUTE one + j = self.prev_op[stmt_offset] + while code[j] == self.opc.JUMP_ABSOLUTE: + j = self.prev_op[j] + # If we got here, then it's list comprehension which + # is not a statement too + if code[j] == self.opc.LIST_APPEND: + stmts.remove(stmt_offset) + continue + # Exclude ROT_TWO + POP_TOP + elif ( + code[stmt_offset] == self.opc.POP_TOP + and code[self.prev_op[stmt_offset]] == self.opc.ROT_TWO + ): + stmts.remove(stmt_offset) + continue + # Exclude FOR_ITER + designators + elif code[stmt_offset] in self.designator_ops: + j = self.prev_op[stmt_offset] + while code[j] in self.designator_ops: + j = self.prev_op[j] + if code[j] == self.opc.FOR_ITER: + stmts.remove(stmt_offset) + continue + # Add to list another list with offset of current statement, + # equal to length of previous statement + slist += [stmt_offset] * (stmt_offset - i) + last_stmt_offset = stmt_offset + i = stmt_offset + # Finish filling the list for last statement + slist += [codelen] * (codelen - len(slist)) + + def detect_control_flow(self, offset, targets, inst_index): + """ + Detect type of block structures and their boundaries to fix optimized jumps + in python2.3+ + """ + + code = self.code + inst = self.insts[inst_index] + op = inst.opcode + + # Detect parent structure + parent = self.structs[0] + start = parent["start"] + end = parent["end"] + + # Pick inner-most parent for our offset + for struct in self.structs: + current_start = struct["start"] + current_end = struct["end"] + if (current_start <= offset < current_end) and ( + current_start >= start and current_end <= end + ): + start = current_start + end = current_end + parent = struct + + if self.version < 3.8 and op == self.opc.SETUP_LOOP: + # We categorize loop types: 'for', 'while', 'while 1' with + # possibly suffixes '-loop' and '-else' + # Try to find the jump_back instruction of the loop. + # It could be a return instruction. + + start += inst.inst_size + target = self.get_target(offset) + end = self.restrict_to_parent(target, parent) + self.setup_loops[target] = offset + + if target != end: + self.fixed_jumps[offset] = end + + (line_no, next_line_byte) = self.lines[offset] + jump_back = self.last_instr( + start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False + ) + + if jump_back: + jump_forward_offset = xdis.next_offset( + code[jump_back], self.opc, jump_back + ) + else: + jump_forward_offset = None + + return_val_offset1 = self.prev[self.prev[end]] + + if ( + jump_back + and jump_back != self.prev_op[end] + and self.is_jump_forward(jump_forward_offset) + ): + if code[self.prev_op[end]] == self.opc.RETURN_VALUE or ( + code[self.prev_op[end]] == self.opc.POP_BLOCK + and code[return_val_offset1] == self.opc.RETURN_VALUE + ): + jump_back = None + if not jump_back: + # loop suite ends in return + jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE) + if not jump_back: + return + + jb_inst = self.get_inst(jump_back) + jump_back = self.next_offset(jb_inst.opcode, jump_back) + + if_offset = None + if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf: + if_offset = self.prev[next_line_byte] + if if_offset: + loop_type = "while" + self.ignore_if.add(if_offset) + else: + loop_type = "for" + target = next_line_byte + end = xdis.next_offset(code[jump_back], self.opc, jump_back) + else: + if self.get_target(jump_back) >= next_line_byte: + jump_back = self.last_instr( + start, end, self.opc.JUMP_ABSOLUTE, start, False + ) + + jb_inst = self.get_inst(jump_back) + + jb_next_offset = self.next_offset(jb_inst.opcode, jump_back) + if end > jb_next_offset and self.is_jump_forward(end): + if self.is_jump_forward(jb_next_offset): + if self.get_target(jb_next_offset) == self.get_target(end): + self.fixed_jumps[offset] = jb_next_offset + end = jb_next_offset + elif target < offset: + self.fixed_jumps[offset] = jb_next_offset + end = jb_next_offset + + target = self.get_target(jump_back) + + if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER): + loop_type = "for" + else: + loop_type = "while" + test = self.prev_op[next_line_byte] + + if test == offset: + loop_type = "while 1" + elif self.code[test] in self.opc.JUMP_OPs: + self.ignore_if.add(test) + test_target = self.get_target(test) + if test_target > (jump_back + 3): + jump_back = test_target + self.not_continue.add(jump_back) + self.loops.append(target) + self.structs.append( + {"type": loop_type + "-loop", "start": target, "end": jump_back} + ) + after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) + if after_jump_offset != end: + self.structs.append( + { + "type": loop_type + "-else", + "start": after_jump_offset, + "end": end, + } + ) + elif op in self.pop_jump_tf: + start = offset + inst.inst_size + target = inst.argval + rtarget = self.restrict_to_parent(target, parent) + prev_op = self.prev_op + + # Do not let jump to go out of parent struct bounds + if target != rtarget and parent["type"] == "and/or": + self.fixed_jumps[offset] = rtarget + return + + # Does this jump to right after another conditional jump that is + # not myself? If so, it's part of a larger conditional. + # rocky: if we have a conditional jump to the next instruction, then + # possibly I am "skipping over" a "pass" or null statement. + pretarget = self.get_inst(prev_op[target]) + + if ( + pretarget.opcode in self.pop_jump_if_pop + and (target > offset) + and pretarget.offset != offset + ): + + # FIXME: hack upon hack... + # In some cases the pretarget can be a jump to the next instruction + # and these aren't and/or's either. We limit to 3.5+ since we experienced there + # but it might be earlier versions, or might be a general principle. + if pretarget.argval != target: + # FIXME: this is not accurate The commented out below + # is what it should be. However grammar rules right now + # assume the incorrect offsets. + # self.fixed_jumps[offset] = target + self.fixed_jumps[offset] = pretarget.offset + self.structs.append( + {"type": "and/or", "start": start, "end": pretarget.offset} + ) + return + + # The opcode *two* instructions before the target jump offset is important + # in making a determination of what we have. Save that. + pre_rtarget = prev_op[rtarget] + + if op == self.opc.POP_JUMP_IF_FALSE: + self.fixed_jumps[offset] = target + + # op == POP_JUMP_IF_TRUE + else: + next = self.next_stmt[offset] + if prev_op[next] == offset: + pass + elif self.is_jump_forward(next) and target == self.get_target(next): + if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE: + if ( + code[next] == self.opc.JUMP_FORWARD + or target != rtarget + or code[prev_op[pre_rtarget]] + not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE) + ): + self.fixed_jumps[offset] = prev_op[next] + return + elif ( + code[next] == self.opc.JUMP_ABSOLUTE + and self.is_jump_forward(target) + and self.get_target(target) == self.get_target(next) + ): + self.fixed_jumps[offset] = prev_op[next] + return + + rtarget_is_ja = code[pre_rtarget] == self.opc.JUMP_ABSOLUTE + if ( + rtarget_is_ja + and pre_rtarget in self.stmts + and pre_rtarget != offset + and prev_op[pre_rtarget] != offset + and not ( + code[rtarget] == self.opc.JUMP_ABSOLUTE + and code[rtarget + 3] == self.opc.POP_BLOCK + and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE + ) + ): + rtarget = pre_rtarget + + # Does the "jump if" jump beyond a jump op? + # That is, we have something like: + # POP_JUMP_IF_FALSE HERE + # ... + # JUMP_FORWARD + # HERE: + # + # If so, this can be block inside an "if" statement + # or a conditional assignment like: + # x = 1 if x else 2 + # + # For 3.5, for JUMP_FORWARD above we could have also + # JUMP_BACK or CONTINUE + # + # There are other situations we may need to consider, like + # if the condition jump is to a forward location. + # Also the existence of a jump to the instruction after "END_FINALLY" + # will distinguish "try/else" from "try". + rtarget_break = (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP) + + if self.is_jump_forward(pre_rtarget) or (rtarget_is_ja): + if_end = self.get_target(pre_rtarget) + + # If the jump target is back, we are looping + if ( + if_end < pre_rtarget + and self.version < 3.8 + and (code[prev_op[if_end]] == self.opc.SETUP_LOOP) + ): + if if_end > start: + return + + end = self.restrict_to_parent(if_end, parent) + + self.structs.append( + {"type": "if-then", "start": start, "end": pre_rtarget} + ) + + # FIXME: add this + # self.fixed_jumps[offset] = rtarget + self.not_continue.add(pre_rtarget) + + if rtarget < end and ( + code[rtarget] not in (self.opc.END_FINALLY, self.opc.JUMP_ABSOLUTE) + and code[prev_op[pre_rtarget]] + not in (self.opc.POP_EXCEPT, self.opc.END_FINALLY) + ): + self.structs.append({"type": "else", "start": rtarget, "end": end}) + self.else_start[rtarget] = end + elif self.is_jump_back(pre_rtarget, 0): + if_end = rtarget + self.structs.append( + {"type": "if-then", "start": start, "end": pre_rtarget} + ) + self.not_continue.add(pre_rtarget) + elif code[pre_rtarget] in rtarget_break: + self.structs.append({"type": "if-then", "start": start, "end": rtarget}) + # It is important to distingish if this return is inside some sort + # except block return + jump_prev = prev_op[offset] + if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: + if self.opc.cmp_op[code[jump_prev + 1]] == "exception-match": + return + pass + + # Check that next instruction after pops and jump is + # not from SETUP_EXCEPT + next_op = rtarget + if code[next_op] == self.opc.POP_BLOCK: + next_op += instruction_size(self.code[next_op], self.opc) + if code[next_op] == self.opc.JUMP_ABSOLUTE: + next_op += instruction_size(self.code[next_op], self.opc) + if next_op in targets: + for try_op in targets[next_op]: + come_from_op = code[try_op] + if self.version < 3.8 and come_from_op == self.opc.SETUP_EXCEPT: + return + pass + + self.fixed_jumps[offset] = rtarget + + if code[pre_rtarget] == self.opc.RETURN_VALUE: + # If we are at some sort of POP_JUMP_IF and the instruction before was + # COMPARE_OP exception-match, then pre_rtarget is not an end_if + if not ( + inst_index > 0 + and self.insts[inst_index - 1].argval == "exception-match" + ): + self.return_end_ifs.add(pre_rtarget) + else: + self.fixed_jumps[offset] = rtarget + self.not_continue.add(pre_rtarget) + else: + + if target > offset: + self.fixed_jumps[offset] = target + pass + + elif self.version < 3.8 and op == self.opc.SETUP_EXCEPT: + target = self.get_target(offset) + end = self.restrict_to_parent(target, parent) + self.fixed_jumps[offset] = end + elif op == self.opc.POP_EXCEPT: + next_offset = xdis.next_offset(op, self.opc, offset) + target = self.get_target(next_offset) + if target > next_offset: + next_op = code[next_offset] + if ( + self.opc.JUMP_ABSOLUTE == next_op + and self.opc.END_FINALLY + != code[xdis.next_offset(next_op, self.opc, next_offset)] + ): + self.fixed_jumps[next_offset] = target + self.except_targets[target] = next_offset + + elif op == self.opc.SETUP_FINALLY: + target = self.get_target(offset) + end = self.restrict_to_parent(target, parent) + self.fixed_jumps[offset] = end + elif op in self.jump_if_pop: + target = self.get_target(offset) + if target > offset: + unop_target = self.last_instr( + offset, target, self.opc.JUMP_FORWARD, target + ) + if unop_target and code[unop_target + 3] != self.opc.ROT_TWO: + self.fixed_jumps[offset] = unop_target + else: + self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) + pass + pass + else: + # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get + # misclassified as RETURN_END_IF. Handle that here. + # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF + if op == self.opc.RETURN_VALUE: + next_offset = xdis.next_offset(op, self.opc, offset) + if next_offset < len(code) and ( + code[next_offset] == self.opc.JUMP_ABSOLUTE + and offset in self.return_end_ifs + ): + self.return_end_ifs.remove(offset) + pass + pass + elif op == self.opc.JUMP_FORWARD: + # If we have: + # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x: + # then RETURN_VALUE is not RETURN_END_IF + rtarget = self.get_target(offset) + rtarget_prev = self.prev[rtarget] + if ( + code[rtarget_prev] == self.opc.RETURN_VALUE + and rtarget_prev in self.return_end_ifs + ): + i = rtarget_prev + while i != offset: + if code[i] in [op3.JUMP_FORWARD, op3.JUMP_ABSOLUTE]: + return + i = self.prev[i] + self.return_end_ifs.remove(rtarget_prev) + pass + return + + def is_jump_back(self, offset, extended_arg): + """ + Return True if the code at offset is some sort of jump back. + That is, it is ether "JUMP_FORWARD" or an absolute jump that + goes forward. + """ + if self.code[offset] != self.opc.JUMP_ABSOLUTE: + return False + return offset > self.get_target(offset, extended_arg) + + def next_except_jump(self, start): + """ + Return the next jump that was generated by an except SomeException: + construct in a try...except...else clause or None if not found. + """ + + if self.code[start] == self.opc.DUP_TOP: + except_match = self.first_instr( + start, len(self.code), self.opc.POP_JUMP_IF_FALSE + ) + if except_match: + jmp = self.prev_op[self.get_target(except_match)] + self.ignore_if.add(except_match) + self.not_continue.add(jmp) + return jmp + + count_END_FINALLY = 0 + count_SETUP_ = 0 + for i in self.op_range(start, len(self.code)): + op = self.code[i] + if op == self.opc.END_FINALLY: + if count_END_FINALLY == count_SETUP_: + assert self.code[self.prev_op[i]] in frozenset( + [ + self.opc.JUMP_ABSOLUTE, + self.opc.JUMP_FORWARD, + self.opc.RETURN_VALUE, + ] + ) + self.not_continue.add(self.prev_op[i]) + return self.prev_op[i] + count_END_FINALLY += 1 + elif op in self.setup_opts_no_loop: + count_SETUP_ += 1 + + def rem_or(self, start, end, instr, target=None, include_beyond_target=False): + """ + Find offsets of all requested between and , + optionally ing specified offset, and return list found + offsets which are not within any POP_JUMP_IF_TRUE jumps. + """ + assert start >= 0 and end <= len(self.code) and start <= end + + # Find all offsets of requested instructions + instr_offsets = self.inst_matches( + start, end, instr, target, include_beyond_target + ) + # Get all POP_JUMP_IF_TRUE (or) offsets + jump_true_op = self.opc.POP_JUMP_IF_TRUE + pjit_offsets = self.inst_matches(start, end, jump_true_op) + filtered = [] + for pjit_offset in pjit_offsets: + pjit_tgt = self.get_target(pjit_offset) - 3 + for instr_offset in instr_offsets: + if instr_offset <= pjit_offset or instr_offset >= pjit_tgt: + filtered.append(instr_offset) + instr_offsets = filtered + filtered = [] + return instr_offsets + + +if __name__ == "__main__": + from uncompyle6 import PYTHON_VERSION + + if PYTHON_VERSION >= 3.7: + import inspect + + co = inspect.currentframe().f_code + from uncompyle6 import PYTHON_VERSION + + tokens, customize = Scanner37Base(PYTHON_VERSION).ingest(co) + for t in tokens: + print(t) + else: + print( + "Need to be Python 3.7 or greater to demo; I am version {PYTHON_VERSION}." + % PYTHON_VERSION + ) + pass diff --git a/uncompyle6/scanners/scanner38.py b/uncompyle6/scanners/scanner38.py index 598f8f0a..73505bde 100644 --- a/uncompyle6/scanners/scanner38.py +++ b/uncompyle6/scanners/scanner38.py @@ -23,23 +23,63 @@ scanner routine for Python 3. """ from uncompyle6.scanners.scanner37 import Scanner37 -from uncompyle6.scanners.scanner3 import Scanner3 +from uncompyle6.scanners.scanner37base import Scanner37Base # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_38 as opc + +# bytecode verification, verify(), uses JUMP_OPS from here JUMP_OPs = opc.JUMP_OPS -class Scanner38(Scanner37): +class Scanner38(Scanner37): def __init__(self, show_asm=None): - Scanner3.__init__(self, 3.8, show_asm) + Scanner37Base.__init__(self, 3.8, show_asm) return + pass + def ingest(self, co, classname=None, code_objects={}, show_asm=None): + tokens, customize = super(Scanner38, self).ingest( + co, classname, code_objects, show_asm + ) + for i, token in enumerate(tokens): + opname = token.kind + if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE"): + # Turn JUMPs into BREAK_LOOP + jump_target = token.attr + + if opname == "JUMP_ABSOLUTE" and token.offset >= jump_target: + # Not a forward jump, so continue + # FIXME: Do we need "continue" detection? + continue + if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK": + # Sometimes the jump back is *after* the break... + jump_back_index = i + 1 + else: + # and sometimes it is *before* where we jumped to. + jump_back_index = self.offset2tok_index[jump_target] - 1 + while tokens[jump_back_index].kind.startswith("COME_FROM_"): + jump_back_index -= 1 + pass + pass + jump_back_token = tokens[jump_back_index] + if ( + jump_back_token == "JUMP_BACK" + and jump_back_token.attr < token.offset + ): + token.kind = "BREAK_LOOP" + pass + pass + return tokens, customize + + if __name__ == "__main__": - from uncompyle6 import PYTHON_VERSION + from decompyle3 import PYTHON_VERSION + if PYTHON_VERSION == 3.8: import inspect + co = inspect.currentframe().f_code tokens, customize = Scanner38().ingest(co) for t in tokens: diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index 77e5e8ee..25356070 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -159,5 +159,12 @@ class Token: def __getitem__(self, i): raise IndexError + def off2int(self): + if isinstance(self.offset, int): + return self.offset + else: + assert isinstance(self.offset, str) + return(int(self.offset.split("_")[0])) + NoneToken = Token("LOAD_CONST", offset=-1, attr=None, pattr=None) diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index d6d004d7..31588857 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -344,8 +344,12 @@ TABLE_DIRECT = { # 'return': ( '%|return %c\n', 0), 'return_if_stmt': ( 'return %c\n', 0), - 'ifstmt': ( '%|if %c:\n%+%c%-', 0, 1 ), - 'iflaststmt': ( '%|if %c:\n%+%c%-', 0, 1 ), + 'ifstmt': ( '%|if %c:\n%+%c%-', + 0, # "testexpr" or "testexpr_then" + 1, # "_ifstmts_jump" or "return_stmts" + ), + + 'iflaststmt': ( '%|if %c:\n%+%c%-', 0, 1 ), 'iflaststmtl': ( '%|if %c:\n%+%c%-', 0, 1 ), 'testtrue': ( 'not %p', (0, PRECEDENCE['unary_not']) ), @@ -359,18 +363,18 @@ TABLE_DIRECT = { 'ifelsestmt': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelsestmtc': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelsestmtl': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), - 'ifelsestmtr': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 2 ), - 'ifelsestmtr2': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-\n\n', 0, 1, 3 ), # has COME_FROM in position 2 - - # "elif" forms are not generated by the parser but are created through tree - # transformations. See "n_ifelsestmt". - 'ifelifstmt': ( '%|if %c:\n%+%c%-%c', 0, 1, 3 ), + # These are created only via transformation + 'ifelifstmt': ( '%|if %c:\n%+%c%-%c', + 0, # "testexpr" or "testexpr_then" + 1, 3 ), 'elifelifstmt': ( '%|elif %c:\n%+%c%-%c', 0, 1, 3 ), 'elifstmt': ( '%|elif %c:\n%+%c%-', 0, 1 ), 'elifelsestmt': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), + 'ifelsestmtr': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 2 ), + 'ifelsestmtr2': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-\n\n', 0, 1, 3 ), # has COME_FROM 'elifelsestmtr': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-\n\n', 0, 1, 2 ), - 'elifelsestmtr2': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-\n\n', 0, 1, 3 ), # has COME_FROM in position 2 + 'elifelsestmtr2': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-\n\n', 0, 1, 3 ), # has COME_FROM 'whileTruestmt': ( '%|while True:\n%+%c%-\n\n', 1 ), 'whilestmt': ( '%|while %c:\n%+%c%-\n\n', 1, 2 ), @@ -414,10 +418,11 @@ TABLE_DIRECT = { (1, 'expr'), (5, 'store') ), 'except_suite': ( '%+%c%-%C', 0, (1, maxint, '') ), - # In Python 3.6, this is more complicated in the presence of "returns" + # In Python 3.6+, this is more complicated in the presence of "returns" 'except_suite_finalize': ( '%+%c%-%C', 1, (3, maxint, '') ), 'pass': ( '%|pass\n', ), + 'STORE_FAST': ( '%{pattr}', ), 'kv': ( '%c: %c', 3, 1 ), 'kv2': ( '%c: %c', 1, 2 ), 'import': ( '%|import %c\n', 2), diff --git a/uncompyle6/semantics/customize3.py b/uncompyle6/semantics/customize3.py index 9875126d..6a6fc135 100644 --- a/uncompyle6/semantics/customize3.py +++ b/uncompyle6/semantics/customize3.py @@ -37,6 +37,7 @@ def customize_for_version3(self, version): (0, "expr"), (4, "expr"), ), + "except_cond2": ("%|except %c as %c:\n", (1, "expr"), (5, "store")), "function_def_annotate": ("\n\n%|def %c%c\n", -1, 0), # When a generator is a single parameter of a function, # it doesn't need the surrounding parenethesis. @@ -331,7 +332,8 @@ def customize_for_version3(self, version): (1, "suite_stmts_opt"), (3, "except_handler"), (5, "else_suitel"), - ) + ), + "LOAD_CLASSDEREF": ("%{pattr}",), } ) if version >= 3.4: diff --git a/uncompyle6/semantics/customize38.py b/uncompyle6/semantics/customize38.py index aa46e396..2e587d93 100644 --- a/uncompyle6/semantics/customize38.py +++ b/uncompyle6/semantics/customize38.py @@ -12,7 +12,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Isolate Python 3.6 version-specific semantic actions here. +"""Isolate Python 3.8 version-specific semantic actions here. """ ######################## @@ -80,10 +80,18 @@ def customize_for_version38(self, version): (0, 'expr'), (3, 'for_block'), -2 ), + 'ifpoplaststmtl': ( '%|if %c:\n%+%c%-', + (0, "testexpr"), + (2, "c_stmts" ) ), + + 'ifstmtl': ( '%|if %c:\n%+%c%-', + (0, "testexpr"), + (1, "_ifstmts_jumpl") ), + 'whilestmt38': ( '%|while %c:\n%+%c%-\n\n', - (0, 'testexpr'), (1, 'l_stmts') ), + (1, 'testexpr'), (2, 'l_stmts') ), 'whileTruestmt38': ( '%|while True:\n%+%c%-\n\n', - (0, 'l_stmts') ), + (1, 'l_stmts') ), 'try_elsestmtl38': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', (1, 'suite_stmts_opt'), @@ -98,4 +106,7 @@ def customize_for_version38(self, version): 'tryfinally38': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', (3, 'returns'), 6 ), + "named_expr": ( # AKA "walrus operatotr" + "%c := %c", (2, "store"), (0, "expr") + ) }) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index b5bb0a3f..a410d4d7 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -1130,11 +1130,15 @@ class SourceWalker(GenericASTTraversal, object): def n_generator_exp(self, node): self.write("(") + iter_index = 3 if self.version > 3.2: code_index = -6 + if self.version > 3.6: + # Python 3.7+ adds optional "come_froms" at node[0] + iter_index = 4 else: code_index = -5 - self.comprehension_walk(node, iter_index=3, code_index=code_index) + self.comprehension_walk(node, iter_index=iter_index, code_index=code_index) self.write(")") self.prune() @@ -2015,8 +2019,13 @@ class SourceWalker(GenericASTTraversal, object): self.default(node) def n_except_cond2(self, node): - if node[-2][0] == "unpack": - node[-2][0].kind = "unpack_w_parens" + if node[-1] == "come_from_opt": + unpack_node = -3 + else: + unpack_node = -2 + + if node[unpack_node][0] == "unpack": + node[unpack_node][0].kind = "unpack_w_parens" self.default(node) def template_engine(self, entry, startnode): diff --git a/uncompyle6/semantics/transform.py b/uncompyle6/semantics/transform.py index f80fe23d..63d1755d 100644 --- a/uncompyle6/semantics/transform.py +++ b/uncompyle6/semantics/transform.py @@ -181,6 +181,7 @@ class TreeTransform(GenericASTTraversal, object): n = else_suite[0] old_stmts = None + else_suite_index = 1 if len(n) == 1 == len(n[0]) and n[0] == "stmt": n = n[0][0] @@ -192,9 +193,12 @@ class TreeTransform(GenericASTTraversal, object): "iflaststmtl", "ifelsestmtl", "ifelsestmtc", + "ifpoplaststmtl", ): - # This seems needed for Python 2.5-2.7 n = n[0] + if n.kind == "ifpoplaststmtl": + old_stmts = n[2] + else_suite_index = 2 pass pass elif len(n) > 1 and 1 == len(n[0]) and n[0] == "stmt" and n[1].kind == "stmt": @@ -206,7 +210,7 @@ class TreeTransform(GenericASTTraversal, object): else: return node - if n.kind in ("ifstmt", "iflaststmt", "iflaststmtl"): + if n.kind in ("ifstmt", "iflaststmt", "iflaststmtl", "ifpoplaststmtl"): node.kind = "ifelifstmt" n.kind = "elifstmt" elif n.kind in ("ifelsestmtr",): @@ -223,17 +227,24 @@ class TreeTransform(GenericASTTraversal, object): if old_stmts: if n.kind == "elifstmt": trailing_else = SyntaxTree("stmts", old_stmts[1:]) - # We use elifelsestmtr because it has 3 nodes - elifelse_stmt = SyntaxTree( - "elifelsestmtr", [n[0], n[1], trailing_else] - ) - node[3] = elifelse_stmt + if len(trailing_else): + # We use elifelsestmtr because it has 3 nodes + elifelse_stmt = SyntaxTree( + "elifelsestmtr", [n[0], n[else_suite_index], trailing_else] + ) + node[3] = elifelse_stmt + else: + elif_stmt = SyntaxTree( + "elifstmt", [n[0], n[else_suite_index]] + ) + node[3] = elif_stmt + + node.transformed_by = "n_ifelsestmt" pass else: # Other cases for n.kind may happen here pass pass - node.transformed_by = "n_ifelsestmt" return node n_ifelsestmtc = n_ifelsestmtl = n_ifelsestmt