Merge branch 'python-3.3-to-3.5' into python-2.4

This commit is contained in:
rocky
2022-04-25 07:57:15 -04:00
22 changed files with 1270 additions and 259 deletions

Binary file not shown.

Binary file not shown.

View File

@@ -1,3 +0,0 @@
# Long lists pose a slowdown in uncompiling.
x = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
print(x)

View File

@@ -0,0 +1,720 @@
# Long lists pose a slowdown in uncompiling.
"This program is self-checking!"
# Try an empty list to check that long-matching detection doesn't mess that up.
# In theory this should work even though we put cap on short lists which
# is checked below.
x = []
assert len(x) == 0 and isinstance(x, list)
# Try an short list to check that long-matching detection doesn't mess that up.
# This is a more general situation of the above.
x = [1, 1, 1]
# Until we have better "and" rules (which we have
# around, but not in decompyle3 or uncompyle6 yet)
# avoid 3-term "and"s
assert len(x) == 3
assert isinstance(x, list) and all(x)
# fmt: off
# Try a long list. This should not be slow
# as it has been in the past.
x = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
]
assert all(x)
assert len(x) == 300 and isinstance(x, list)
# Try a long set. This should not be slow
# as it has been in the past.
x = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
}
assert x == {1} and isinstance(x, set)
# Try using variables rather than constants
a = 1
# First, a list
x = [
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
]
assert all(x)
assert len(x) == 300 and isinstance(x, list)
# Next, a set
x = {
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a, a, a,
}
assert x == {1} and isinstance(x, set)
# Check some dictionary keys.
# Ensure that in dictionary we produce quoted strings
x = {
"b": 1,
"c": 2,
"e": 3,
"g": 6,
"h": 7,
"j": 9,
"k": 11,
"return": 12,
}
assert tuple(x.keys()) == ("b", "c", "e", "g", "h", "j", "k", "return")
# Ensure that in dictionary we produce integers, not strings
x = {1: 2, 3: 4}
assert tuple(x.keys()) == (1, 3)
# Try a long dictionary.
# This should not be slow as it has been in the past
values = {
"value1": x, # Note this is LOAD_NAME
"value2": 2 + 1, # Constant should be folded into "LOAD_CONST"
"value3": 3 + 1,
"value4": 4 + 1,
"value5": 5 + 1,
"value6": 6 + 1,
"value7": 7 + 1,
"value8": 8 + 1,
"value9": 9 + 1,
"value10": 10 + 1,
"value11": 11 + 1,
"value12": 12 + 1,
"value13": 13 + 1,
"value14": 14 + 1,
"value15": 15 + 1,
"value16": 16 + 1,
"value17": 17 + 1,
"value18": 18 + 1,
"value19": 19 + 1,
"value20": 20 + 1,
"value21": 21 + 1,
"value22": 22 + 1,
"value23": 23 + 1,
"value24": 24 + 1,
"value25": 25 + 1,
"value26": 26 + 1,
"value27": 27 + 1,
"value28": 28 + 1,
"value29": 29 + 1,
"value30": 30 + 1,
"value31": 31 + 1,
"value32": 32 + 1,
"value33": 33 + 1,
"value34": 34 + 1,
"value35": 35 + 1,
"value36": 36 + 1,
"value37": 37 + 1,
"value38": 38 + 1,
"value39": 39 + 1,
"value40": 40 + 1,
"value41": 41 + 1,
"value42": 42 + 1,
"value43": 43 + 1,
"value44": 44 + 1,
"value45": 45 + 1,
"value46": 46 + 1,
"value47": 47 + 1,
"value48": 48 + 1,
"value49": 49 + 1,
"value50": 50 + 1,
"value51": 51 + 1,
"value52": 52 + 1,
"value53": 53 + 1,
"value54": 54 + 1,
"value55": 55 + 1,
"value56": 56 + 1,
"value57": 57 + 1,
"value58": 58 + 1,
"value59": 59 + 1,
"value60": 60 + 1,
"value61": 61 + 1,
"value62": 62 + 1,
"value63": 63 + 1,
"value64": 64 + 1,
"value65": 65 + 1,
"value66": 66 + 1,
"value67": 67 + 1,
"value68": 68 + 1,
"value69": 69 + 1,
"value70": 70 + 1,
"value71": 71 + 1,
"value72": 72 + 1,
"value73": 73 + 1,
"value74": 74 + 1,
"value75": 75 + 1,
"value76": 76 + 1,
"value77": 77 + 1,
"value78": 78 + 1,
"value79": 79 + 1,
"value80": 80 + 1,
"value81": 81 + 1,
"value82": 82 + 1,
"value83": 83 + 1,
"value84": 84 + 1,
"value85": 85 + 1,
"value86": 86 + 1,
"value87": 87 + 1,
"value88": 88 + 1,
"value89": 89 + 1,
"value90": 90 + 1,
"value91": 91 + 1,
"value92": 92 + 1,
"value93": 93 + 1,
"value94": 94 + 1,
"value95": 95 + 1,
"value96": 96 + 1,
"value97": 97 + 1,
"value98": 98 + 1,
"value99": 99 + 1,
"value100": 100 + 1,
"value101": 101 + 1,
"value102": 102 + 1,
"value103": 103 + 1,
"value104": 104 + 1,
"value105": 105 + 1,
"value106": 106 + 1,
"value107": 107 + 1,
"value108": 108 + 1,
"value109": 109 + 1,
"value110": 110 + 1,
"value111": 111 + 1,
"value112": 112 + 1,
"value113": 113 + 1,
"value114": 114 + 1,
"value115": 115 + 1,
"value116": 116 + 1,
"value117": 117 + 1,
"value118": 118 + 1,
"value119": 119 + 1,
"value120": 120 + 1,
"value121": 121 + 1,
"value122": 122 + 1,
"value123": 123 + 1,
"value124": 124 + 1,
"value125": 125 + 1,
"value126": 126 + 1,
"value127": 127 + 1,
"value128": 128 + 1,
"value129": 129 + 1,
"value130": 130 + 1,
"value131": 131 + 1,
"value132": 132 + 1,
"value133": 133 + 1,
"value134": 134 + 1,
"value135": 135 + 1,
"value136": 136 + 1,
"value137": 137 + 1,
"value138": 138 + 1,
"value139": 139 + 1,
"value140": 140 + 1,
"value141": 141 + 1,
"value142": 142 + 1,
"value143": 143 + 1,
"value144": 144 + 1,
"value145": 145 + 1,
"value146": 146 + 1,
"value147": 147 + 1,
"value148": 148 + 1,
"value149": 149 + 1,
"value150": 150 + 1,
"value151": 151 + 1,
"value152": 152 + 1,
"value153": 153 + 1,
"value154": 154 + 1,
"value155": 155 + 1,
"value156": 156 + 1,
"value157": 157 + 1,
"value158": 158 + 1,
"value159": 159 + 1,
"value160": 160 + 1,
"value161": 161 + 1,
"value162": 162 + 1,
"value163": 163 + 1,
"value164": 164 + 1,
"value165": 165 + 1,
"value166": 166 + 1,
"value167": 167 + 1,
"value168": 168 + 1,
"value169": 169 + 1,
"value170": 170 + 1,
"value171": 171 + 1,
"value172": 172 + 1,
"value173": 173 + 1,
"value174": 174 + 1,
"value175": 175 + 1,
"value176": 176 + 1,
"value177": 177 + 1,
"value178": 178 + 1,
"value179": 179 + 1,
"value180": 180 + 1,
"value181": 181 + 1,
"value182": 182 + 1,
"value183": 183 + 1,
"value184": 184 + 1,
"value185": 185 + 1,
"value186": 186 + 1,
"value187": 187 + 1,
"value188": 188 + 1,
"value189": 189 + 1,
"value190": 190 + 1,
"value191": 191 + 1,
"value192": 192 + 1,
"value193": 193 + 1,
"value194": 194 + 1,
"value195": 195 + 1,
"value196": 196 + 1,
"value197": 197 + 1,
"value198": 198 + 1,
"value199": 199 + 1,
"value200": 200 + 1,
"value201": 201 + 1,
"value202": 202 + 1,
"value203": 203 + 1,
"value204": 204 + 1,
"value205": 205 + 1,
"value206": 206 + 1,
"value207": 207 + 1,
"value208": 208 + 1,
"value209": 209 + 1,
"value210": 210 + 1,
"value211": 211 + 1,
"value212": 212 + 1,
"value213": 213 + 1,
"value214": 214 + 1,
"value215": 215 + 1,
"value216": 216 + 1,
"value217": 217 + 1,
"value218": 218 + 1,
"value219": 219 + 1,
"value220": 220 + 1,
"value221": 221 + 1,
"value222": 222 + 1,
"value223": 223 + 1,
"value224": 224 + 1,
"value225": 225 + 1,
"value226": 226 + 1,
"value227": 227 + 1,
"value228": 228 + 1,
"value229": 229 + 1,
"value230": 230 + 1,
"value231": 231 + 1,
"value232": 232 + 1,
"value233": 233 + 1,
"value234": 234 + 1,
"value235": 235 + 1,
"value236": 236 + 1,
"value237": 237 + 1,
"value238": 238 + 1,
"value239": 239 + 1,
"value240": 240 + 1,
"value241": 241 + 1,
"value242": 242 + 1,
"value243": 243 + 1,
"value244": 244 + 1,
"value245": 245 + 1,
"value246": 246 + 1,
"value247": 247 + 1,
"value248": 248 + 1,
"value249": 249 + 1,
"value250": 250 + 1,
"value251": 251 + 1,
"value252": 252 + 1,
"value253": 253 + 1,
"value254": 254 + 1,
"value255": 255 + 1,
"value256": 256 + 1,
"value257": 257 + 1,
"value258": 258 + 1,
"value259": 259 + 1,
"value260": 260 + 1,
"value261": 261 + 1,
"value262": 262 + 1,
"value263": 263 + 1,
"value264": 264 + 1,
"value265": 265 + 1,
"value266": 266 + 1,
"value267": 267 + 1,
"value268": 268 + 1,
"value269": 269 + 1,
"value270": 270 + 1,
"value271": 271 + 1,
"value272": 272 + 1,
"value273": 273 + 1,
"value274": 274 + 1,
"value275": 275 + 1,
"value276": 276 + 1,
"value277": 277 + 1,
"value278": 278 + 1,
"value279": 279 + 1,
"value280": 280 + 1,
"value281": 281 + 1,
"value282": 282 + 1,
"value283": 283 + 1,
"value284": 284 + 1,
"value285": 285 + 1,
"value286": 286 + 1,
"value287": 287 + 1,
"value288": 288 + 1,
"value289": 289 + 1,
"value290": 290 + 1,
"value291": 291 + 1,
"value292": 292 + 1,
"value293": 293 + 1,
"value294": 294 + 1,
"value295": 295 + 1,
"value296": 296 + 1,
"value297": 297 + 1,
"value298": 298 + 1,
"value299": 299 + 1,
"value300": 300 + 1,
"value301": 301 + 1,
"value302": 302 + 1,
"value303": 303 + 1,
"value304": 304 + 1,
"value305": 305 + 1,
"value306": 306 + 1,
"value307": 307 + 1,
"value308": 308 + 1,
"value309": 309 + 1,
"value310": 310 + 1,
"value311": 311 + 1,
"value312": 312 + 1,
"value313": 313 + 1,
"value314": 314 + 1,
"value315": 315 + 1,
"value316": 316 + 1,
"value317": 317 + 1,
"value318": 318 + 1,
"value319": 319 + 1,
"value320": 320 + 1,
"value321": 321 + 1,
"value322": 322 + 1,
"value323": 323 + 1,
"value324": 324 + 1,
"value325": 325 + 1,
"value326": 326 + 1,
"value327": 327 + 1,
"value328": 328 + 1,
"value329": 329 + 1,
"value330": 330 + 1,
"value331": 331 + 1,
"value332": 332 + 1,
"value333": 333 + 1,
"value334": 334 + 1,
"value335": 335 + 1,
"value336": 336 + 1,
"value337": 337 + 1,
"value338": 338 + 1,
"value339": 339 + 1,
"value340": 340 + 1,
"value341": 341 + 1,
"value342": 342 + 1,
"value343": 343 + 1,
"value344": 344 + 1,
"value345": 345 + 1,
"value346": 346 + 1,
"value347": 347 + 1,
"value348": 348 + 1,
"value349": 349 + 1,
"value350": 350 + 1,
"value351": 351 + 1,
"value352": 352 + 1,
"value353": 353 + 1,
"value354": 354 + 1,
"value355": 355 + 1,
"value356": 356 + 1,
"value357": 357 + 1,
"value358": 358 + 1,
"value359": 359 + 1,
"value360": 360 + 1,
"value361": 361 + 1,
"value362": 362 + 1,
"value363": 363 + 1,
"value364": 364 + 1,
"value365": 365 + 1,
"value366": 366 + 1,
"value367": 367 + 1,
"value368": 368 + 1,
"value369": 369 + 1,
"value370": 370 + 1,
"value371": 371 + 1,
"value372": 372 + 1,
"value373": 373 + 1,
"value374": 374 + 1,
"value375": 375 + 1,
"value376": 376 + 1,
"value377": 377 + 1,
"value378": 378 + 1,
"value379": 379 + 1,
"value380": 380 + 1,
"value381": 381 + 1,
"value382": 382 + 1,
"value383": 383 + 1,
"value384": 384 + 1,
"value385": 385 + 1,
"value386": 386 + 1,
"value387": 387 + 1,
"value388": 388 + 1,
"value389": 389 + 1,
"value390": 390 + 1,
"value391": 391 + 1,
"value392": 392 + 1,
"value393": 393 + 1,
"value394": 394 + 1,
"value395": 395 + 1,
"value396": 396 + 1,
"value397": 397 + 1,
"value398": 398 + 1,
"value399": 399 + 1,
"value400": 400 + 1,
"value401": 401 + 1,
"value402": 402 + 1,
"value403": 403 + 1,
"value404": 404 + 1,
"value405": 405 + 1,
"value406": 406 + 1,
"value407": 407 + 1,
"value408": 408 + 1,
"value409": 409 + 1,
"value410": 410 + 1,
"value411": 411 + 1,
"value412": 412 + 1,
"value413": 413 + 1,
"value414": 414 + 1,
"value415": 415 + 1,
"value416": 416 + 1,
"value417": 417 + 1,
"value418": 418 + 1,
"value419": 419 + 1,
"value420": 420 + 1,
"value421": 421 + 1,
"value422": 422 + 1,
"value423": 423 + 1,
"value424": 424 + 1,
"value425": 425 + 1,
"value426": 426 + 1,
"value427": 427 + 1,
"value428": 428 + 1,
"value429": 429 + 1,
"value430": 430 + 1,
"value431": 431 + 1,
"value432": 432 + 1,
"value433": 433 + 1,
"value434": 434 + 1,
"value435": 435 + 1,
"value436": 436 + 1,
"value437": 437 + 1,
"value438": 438 + 1,
"value439": 439 + 1,
"value440": 440 + 1,
"value441": 441 + 1,
"value442": 442 + 1,
"value443": 443 + 1,
"value444": 444 + 1,
"value445": 445 + 1,
"value446": 446 + 1,
"value447": 447 + 1,
"value448": 448 + 1,
"value449": 449 + 1,
"value450": 450 + 1,
"value451": 451 + 1,
"value452": 452 + 1,
"value453": 453 + 1,
"value454": 454 + 1,
"value455": 455 + 1,
"value456": 456 + 1,
"value457": 457 + 1,
"value458": 458 + 1,
"value459": 459 + 1,
"value460": 460 + 1,
"value461": 461 + 1,
"value462": 462 + 1,
"value463": 463 + 1,
"value464": 464 + 1,
"value465": 465 + 1,
"value466": 466 + 1,
"value467": 467 + 1,
"value468": 468 + 1,
"value469": 469 + 1,
"value470": 470 + 1,
"value471": 471 + 1,
"value472": 472 + 1,
"value473": 473 + 1,
"value474": 474 + 1,
"value475": 475 + 1,
"value476": 476 + 1,
"value477": 477 + 1,
"value478": 478 + 1,
"value479": 479 + 1,
"value480": 480 + 1,
"value481": 481 + 1,
"value482": 482 + 1,
"value483": 483 + 1,
"value484": 484 + 1,
"value485": 485 + 1,
"value486": 486 + 1,
"value487": 487 + 1,
"value488": 488 + 1,
"value489": 489 + 1,
"value490": 490 + 1,
"value491": 491 + 1,
"value492": 492 + 1,
"value493": 493 + 1,
"value494": 494 + 1,
"value495": 495 + 1,
"value496": 496 + 1,
"value497": 497 + 1,
"value498": 498 + 1,
"value499": 499 + 1,
"value500": 500 + 1,
"value501": 501 + 1,
"value502": 502 + 1,
}
assert list(values.values())[1:] == list(range(3, 502 + 2))
# Try a long dictionary that fails because we have a binary op.
# We can get a expr32 grouping speedup
# which is slower than if this were all constant.
# The above was not implemented at the time this test was written.
values = {
"value1": a + 1, # This is a binary op not consant
"value2": 2,
"value3": 3,
"value4": 4,
"value5": 5,
"value6": 6,
"value7": 7,
"value8": 8,
"value9": 9,
"value10": 10,
"value11": 11,
"value12": 12,
"value13": 13,
"value14": 14,
"value15": 15,
"value16": 16,
"value17": 17,
"value18": 18,
"value19": 19,
"value20": 20,
"value21": 21,
"value22": 22,
"value23": 23,
"value24": 24,
"value25": 25,
"value26": 26,
"value27": 27,
"value28": 28,
"value29": 29,
"value30": 30,
"value31": 31,
"value32": 32,
"value33": 33,
}
assert list(values.values())[1:] == list(range(2, 34))

View File

@@ -68,7 +68,7 @@ def disco_loop(disasm, queue, real_out):
queue.append(t.pattr) queue.append(t.pattr)
elif iscode(t.attr): elif iscode(t.attr):
queue.append(t.attr) queue.append(t.attr)
real_out.write(t) real_out.write(str(t) + "\n")
pass pass
pass pass

View File

@@ -56,6 +56,7 @@ class PythonParser(GenericASTBuilder):
"_come_froms", "_come_froms",
"_stmts", "_stmts",
"attributes", "attributes",
"add_consts",
"come_froms", "come_froms",
"except_stmts", "except_stmts",
"exprlist", "exprlist",

View File

@@ -319,6 +319,22 @@ class Python37BaseParser(PythonParser):
""" """
self.addRule(rules_str, nop_func) self.addRule(rules_str, nop_func)
elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
if opname == "BUILD_CONST_DICT":
rule = """
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts %s
dict ::= const_list
expr ::= dict
""" % opname
else:
rule = """
add_consts ::= ADD_VALUE*
const_list ::= COLLECTION_START add_consts %s
expr ::= const_list
""" % opname
self.addRule(rule, nop_func)
elif opname_base == "BUILD_CONST_KEY_MAP": elif opname_base == "BUILD_CONST_KEY_MAP":
kvlist_n = "expr " * (token.attr) kvlist_n = "expr " * (token.attr)
rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname) rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016, 2018-2021 by Rocky Bernstein # Copyright (c) 2016, 2018-2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock # Copyright (c) 1999 John Aycock
@@ -85,6 +85,9 @@ def long(num):
return num return num
CONST_COLLECTIONS = ["CONST_LIST", "CONST_SET", "CONST_DICT"]
class Code(object): class Code(object):
""" """
Class for representing code-objects. Class for representing code-objects.
@@ -125,6 +128,80 @@ class Scanner(object):
# FIXME: This weird Python2 behavior is not Python3 # FIXME: This weird Python2 behavior is not Python3
self.resetTokenClass() self.resetTokenClass()
def bound_collection(
self, tokens, next_tokens, t, i, collection_type
):
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return next_tokens + [t]
collection_start = i - count
for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return next_tokens + [t]
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset="%s_0" % start_offset,
has_arg=True,
opc=self.opc,
has_extended_arg=False,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
has_arg=True,
linestart=tokens[j].linestart,
opc=self.opc,
has_extended_arg=False,
)
)
new_tokens.append(
Token(
opname="BUILD_%s" % collection_type,
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
has_arg=t.has_arg,
linestart=t.linestart,
opc=t.opc,
has_extended_arg=False,
)
)
return new_tokens
def build_instructions(self, co): def build_instructions(self, co):
""" """
Create a list of instructions (a structured object rather than Create a list of instructions (a structured object rather than

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016-2018, 2021 by Rocky Bernstein # Copyright (c) 2016-2018, 2021-2022 by Rocky Bernstein
""" """
Python 1.5 bytecode decompiler massaging. Python 1.5 bytecode decompiler massaging.
@@ -28,10 +28,22 @@ class Scanner15(scan.Scanner21):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
""" """
tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm) tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm)
for t in tokens: for t in tokens:

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2019, 2021 by Rocky Bernstein # Copyright (c) 2019, 2021-2022 by Rocky Bernstein
""" """
Python 1.6 bytecode decompiler massaging. Python 1.6 bytecode decompiler massaging.
@@ -28,10 +28,22 @@ class Scanner16(scan.Scanner21):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
""" """
tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm) tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm)
for t in tokens: for t in tokens:

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015-2021 by Rocky Bernstein # Copyright (c) 2015-2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# #
@@ -180,15 +180,18 @@ class Scanner2(Scanner):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load - various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures - COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
- some EXTENDED_ARGS instructions are removed * BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016-2018, 2021 by Rocky Bernstein # Copyright (c) 2016-2018, 2021-2022 by Rocky Bernstein
""" """
Python 2.2 bytecode massaging. Python 2.2 bytecode massaging.
@@ -29,6 +29,24 @@ class Scanner22(scan.Scanner23):
return return
def ingest22(self, co, classname=None, code_objects={}, show_asm=None): def ingest22(self, co, classname=None, code_objects={}, show_asm=None):
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = self.parent_ingest(co, classname, code_objects, show_asm) tokens, customize = self.parent_ingest(co, classname, code_objects, show_asm)
tokens = [t for t in tokens if t.kind != 'SET_LINENO'] tokens = [t for t in tokens if t.kind != 'SET_LINENO']
return tokens, customize return tokens, customize

View File

@@ -47,14 +47,18 @@ class Scanner26(scan.Scanner2):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
returning a list of uncompyle6 'Token's. are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load - various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures - COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015-2019, 2021 by Rocky Bernstein # Copyright (c) 2015-2019, 2021-2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# #

View File

@@ -22,6 +22,24 @@ class Scanner36(Scanner3):
return return
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm) tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm)
not_pypy36 = not (self.version[:2] == (3, 6) and self.is_pypy) not_pypy36 = not (self.version[:2] == (3, 6) and self.is_pypy)
for t in tokens: for t in tokens:

View File

@@ -30,6 +30,8 @@ from xdis.opcodes import opcode_37 as opc
# bytecode verification, verify(), uses JUMP_OPS from here # bytecode verification, verify(), uses JUMP_OPS from here
JUMP_OPs = opc.JUMP_OPS JUMP_OPs = opc.JUMP_OPS
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
class Scanner37(Scanner37Base): class Scanner37(Scanner37Base):
def __init__(self, show_asm=None, is_pypy=False): def __init__(self, show_asm=None, is_pypy=False):
@@ -39,9 +41,45 @@ class Scanner37(Scanner37Base):
pass pass
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(
self, co, classname=None, code_objects={}, show_asm=None
):
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm) tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm)
for t in tokens: new_tokens = []
for i, t in enumerate(tokens):
# things that smash new_tokens like BUILD_LIST have to come first.
if t.op in (
self.opc.BUILD_CONST_KEY_MAP,
self.opc.BUILD_LIST,
self.opc.BUILD_SET,
):
if t.kind.startswith("BUILD_CONST_KEY_MAP"):
collection_type = "DICT"
else:
collection_type = t.kind.split("_")[1]
new_tokens = self.bound_collection(
tokens, new_tokens, t, i, "CONST_%s" % collection_type
)
continue
# The lowest bit of flags indicates whether the # The lowest bit of flags indicates whether the
# var-keyword argument is placed at the top of the stack # var-keyword argument is placed at the top of the stack
if t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1: if t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1:
@@ -59,8 +97,9 @@ class Scanner37(Scanner37Base):
t.kind = "BUILD_MAP_UNPACK_WITH_CALL_%d" % t.attr t.kind = "BUILD_MAP_UNPACK_WITH_CALL_%d" % t.attr
elif not self.is_pypy and t.op == self.opc.BUILD_TUPLE_UNPACK_WITH_CALL: elif not self.is_pypy and t.op == self.opc.BUILD_TUPLE_UNPACK_WITH_CALL:
t.kind = "BUILD_TUPLE_UNPACK_WITH_CALL_%d" % t.attr t.kind = "BUILD_TUPLE_UNPACK_WITH_CALL_%d" % t.attr
pass new_tokens.append(t)
return tokens, customize
return new_tokens, customize
if __name__ == "__main__": if __name__ == "__main__":
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015-2020 by Rocky Bernstein # Copyright (c) 2015-2020, 2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# #
@@ -181,20 +181,22 @@ class Scanner37Base(Scanner):
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's. returning a list of uncompyle6 Token's.
The transformations are made to assist the deparsing grammar. Some transformations are made to assist the deparsing grammar:
Specificially:
- various types of LOAD_CONST's are categorized in terms of what they load - various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures - COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
- some EXTENDED_ARGS instructions are removed * BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take. cause specific rules for the specific number of arguments they take.
""" """
def tokens_append(j, token): def tokens_append(j, token):
@@ -212,7 +214,7 @@ class Scanner37Base(Scanner):
# show_asm = 'both' # show_asm = 'both'
if show_asm in ("both", "before"): if show_asm in ("both", "before"):
for instr in bytecode.get_instructions(co): for instr in bytecode.get_instructions(co):
print(instr.disassemble()) print(instr.disassemble(self.opc))
# "customize" is in the process of going away here # "customize" is in the process of going away here
customize = {} customize = {}
@@ -316,6 +318,7 @@ class Scanner37Base(Scanner):
# "loop" tag last so the grammar rule matches that properly. # "loop" tag last so the grammar rule matches that properly.
for jump_offset in sorted(jump_targets[inst.offset], reverse=True): for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
come_from_name = "COME_FROM" come_from_name = "COME_FROM"
opname = self.opname_for_offset(jump_offset) opname = self.opname_for_offset(jump_offset)
if opname == "EXTENDED_ARG": if opname == "EXTENDED_ARG":
k = xdis.next_offset(op, self.opc, jump_offset) k = xdis.next_offset(op, self.opc, jump_offset)
@@ -342,22 +345,6 @@ class Scanner37Base(Scanner):
jump_idx += 1 jump_idx += 1
pass pass
pass pass
elif inst.offset in self.else_start:
end_offset = self.else_start[inst.offset]
j = tokens_append(
j,
Token(
"ELSE",
None,
repr(end_offset),
offset="%s" % (inst.offset),
has_arg=True,
opc=self.opc,
has_extended_arg=inst.has_extended_arg,
),
)
pass
pattr = inst.argrepr pattr = inst.argrepr
opname = inst.opname opname = inst.opname
@@ -444,17 +431,24 @@ class Scanner37Base(Scanner):
opname = "%s_%d+%d" % (opname, before_args, after_args) opname = "%s_%d+%d" % (opname, before_args, after_args)
elif op == self.opc.JUMP_ABSOLUTE: elif op == self.opc.JUMP_ABSOLUTE:
# Further classify JUMP_ABSOLUTE into backward jumps # Refine JUMP_ABSOLUTE further in into:
# which are used in loops, and "CONTINUE" jumps which #
# may appear in a "continue" statement. The loop-type # * "JUMP_LOOP" - which are are used in loops. This is sometimes
# and continue-type jumps will help us classify loop # found at the end of a looping construct
# boundaries The continue-type jumps help us get # * "BREAK_LOOP" - which are are used to break loops.
# "continue" statements with would otherwise be turned # * "CONTINUE" - jumps which may appear in a "continue" statement.
# into a "pass" statement because JUMPs are sometimes # It is okay to confuse this with JUMP_LOOP. The
# ignored in rules as just boundary overhead. In # grammar should tolerate this.
# comprehensions we might sometimes classify JUMP_BACK # * "JUMP_FORWARD - forward jumps that are not BREAK_LOOP jumps.
# as CONTINUE, but that's okay since we add a grammar #
# rule for that. # The loop-type and continue-type jumps will help us
# classify loop boundaries The continue-type jumps
# help us get "continue" statements with would
# otherwise be turned into a "pass" statement because
# JUMPs are sometimes ignored in rules as just
# boundary overhead. Again, in comprehensions we might
# sometimes classify JUMP_LOOP as CONTINUE, but that's
# okay since grammar rules should tolerate that.
pattr = argval pattr = argval
target = inst.argval target = inst.argval
if target <= inst.offset: if target <= inst.offset:
@@ -545,7 +539,6 @@ class Scanner37Base(Scanner):
self.except_targets = {} self.except_targets = {}
self.ignore_if = set() self.ignore_if = set()
self.build_statement_indices() self.build_statement_indices()
self.else_start = {}
# Containers filled by detect_control_flow() # Containers filled by detect_control_flow()
self.not_continue = set() self.not_continue = set()
@@ -655,9 +648,9 @@ class Scanner37Base(Scanner):
): ):
stmts.remove(stmt_offset) stmts.remove(stmt_offset)
continue continue
# Rewing ops till we encounter non-JUMP_ABSOLUTE one # Scan back bytecode ops till we encounter non-JUMP_ABSOLUTE op
j = self.prev_op[stmt_offset] j = self.prev_op[stmt_offset]
while code[j] == self.opc.JUMP_ABSOLUTE: while code[j] == self.opc.JUMP_ABSOLUTE and j > 0:
j = self.prev_op[j] j = self.prev_op[j]
# If we got here, then it's list comprehension which # If we got here, then it's list comprehension which
# is not a statement too # is not a statement too
@@ -687,7 +680,9 @@ class Scanner37Base(Scanner):
# Finish filling the list for last statement # Finish filling the list for last statement
slist += [codelen] * (codelen - len(slist)) slist += [codelen] * (codelen - len(slist))
def detect_control_flow(self, offset, targets, inst_index): def detect_control_flow(
self, offset, targets, inst_index
):
""" """
Detect type of block structures and their boundaries to fix optimized jumps Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+ in python2.3+
@@ -933,20 +928,16 @@ class Scanner37Base(Scanner):
if __name__ == "__main__": if __name__ == "__main__":
from uncompyle6 import PYTHON_VERSION from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
if PYTHON_VERSION >= 3.7: if PYTHON_VERSION_TRIPLE[:2] == (3, 7):
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code # type: ignore
from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner37Base(PYTHON_VERSION).ingest(co) tokens, customize = Scanner37Base(PYTHON_VERSION_TRIPLE).ingest(co)
for t in tokens: for t in tokens:
print(t) print(t)
else: else:
print( print("Need to be Python 3.7 to demo; I am version %s." % version_tuple_to_str())
"Need to be Python 3.7 or greater to demo; I am version {PYTHON_VERSION}."
% PYTHON_VERSION
)
pass pass

View File

@@ -42,6 +42,24 @@ class Scanner38(Scanner37):
pass pass
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Create "tokens" the bytecode of an Python code object. Largely these
are the opcode name, but in some cases that has been modified to make parsing
easier.
returning a list of uncompyle6 Token's.
Some transformations are made to assist the deparsing grammar:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
* BUILD_LIST, BUILD_SET
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- EXTENDED_ARGS instructions are removed
Also, when we encounter certain tokens, we add them to a set which will cause custom
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
cause specific rules for the specific number of arguments they take.
"""
tokens, customize = super(Scanner38, self).ingest( tokens, customize = super(Scanner38, self).ingest(
co, classname, code_objects, show_asm co, classname, code_objects, show_asm
) )
@@ -62,6 +80,8 @@ class Scanner38(Scanner37):
print(jump_back_targets) print(jump_back_targets)
loop_ends = [] loop_ends = []
next_end = tokens[len(tokens) - 1].off2int() + 10 next_end = tokens[len(tokens) - 1].off2int() + 10
new_tokens = []
for i, token in enumerate(tokens): for i, token in enumerate(tokens):
opname = token.kind opname = token.kind
offset = token.offset offset = token.offset
@@ -75,6 +95,8 @@ class Scanner38(Scanner37):
else: else:
next_end = tokens[len(tokens)-1].off2int() + 10 next_end = tokens[len(tokens)-1].off2int() + 10
# things that smash new_tokens like BUILD_LIST have to come first.
if offset in jump_back_targets: if offset in jump_back_targets:
next_end = off2int(jump_back_targets[offset], prefer_last=False) next_end = off2int(jump_back_targets[offset], prefer_last=False)
if self.debug: if self.debug:
@@ -92,6 +114,7 @@ class Scanner38(Scanner37):
if opname == "JUMP_ABSOLUTE" and jump_target <= next_end: if opname == "JUMP_ABSOLUTE" and jump_target <= next_end:
# Not a forward-enough jump to break out of the next loop, so continue. # Not a forward-enough jump to break out of the next loop, so continue.
# FIXME: Do we need "continue" detection? # FIXME: Do we need "continue" detection?
new_tokens.append(token)
continue continue
# We also want to avoid confusing BREAK_LOOPS with parts of the # We also want to avoid confusing BREAK_LOOPS with parts of the
@@ -122,8 +145,8 @@ class Scanner38(Scanner37):
): ):
token.kind = "BREAK_LOOP" token.kind = "BREAK_LOOP"
pass pass
pass new_tokens.append(token)
return tokens, customize return new_tokens, customize
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -281,6 +281,7 @@ TABLE_DIRECT = {
"comp_if": (" if %c%c", 0, 2), "comp_if": (" if %c%c", 0, 2),
"comp_if_not": (" if not %p%c", (0, "expr", PRECEDENCE["unary_not"]), 2), "comp_if_not": (" if not %p%c", (0, "expr", PRECEDENCE["unary_not"]), 2),
"comp_body": ("",), # ignore when recusing "comp_body": ("",), # ignore when recusing
"set_comp_body": ("%c", 0), "set_comp_body": ("%c", 0),
"gen_comp_body": ("%c", 0), "gen_comp_body": ("%c", 0),
"dict_comp_body": ("%c:%c", 1, 0), "dict_comp_body": ("%c:%c", 1, 0),

View File

@@ -279,8 +279,16 @@ def make_function36(self, node, is_lambda, nested=1, code_node=None):
# FIXME: handle free_tup, ann_dict, and default_tup # FIXME: handle free_tup, ann_dict, and default_tup
if kw_dict: if kw_dict:
assert kw_dict == "dict" assert kw_dict == "dict"
defaults = [self.traverse(n, indent="") for n in kw_dict[:-2]] const_list = kw_dict[0]
names = eval(self.traverse(kw_dict[-2])) if kw_dict[0] == "const_list":
add_consts = const_list[1]
assert add_consts == "add_consts"
names = add_consts[-1].attr
defaults = [v.pattr for v in add_consts[:-1]]
else:
defaults = [self.traverse(n, indent="") for n in kw_dict[:-2]]
names = eval(self.traverse(kw_dict[-2]))
assert len(defaults) == len(names) assert len(defaults) == len(names)
# FIXME: possibly handle line breaks # FIXME: possibly handle line breaks
for i, n in enumerate(names): for i, n in enumerate(names):

View File

@@ -202,6 +202,68 @@ class NonterminalActions:
n_classdefdeco2 = n_classdef n_classdefdeco2 = n_classdef
def n_const_list(self, node):
"""
prettyprint a constant dict, list, set or tuple.
"""
p = self.prec
lastnodetype = node[2].kind
flat_elems = node[1]
is_dict = lastnodetype.endswith("DICT")
if lastnodetype.endswith("LIST"):
self.write("[")
endchar = "]"
elif lastnodetype.endswith("SET") or is_dict:
self.write("{")
endchar = "}"
else:
# from trepan.api import debug; debug()
raise TypeError(
"Internal Error: n_const_list expects dict, list set, or set; got %s" % lastnodetype
)
self.indent_more(INDENT_PER_LEVEL)
sep = ""
if is_dict:
keys = flat_elems[-1].pattr
assert isinstance(keys, tuple)
assert len(keys) == len(flat_elems) - 1
for i, elem in enumerate(flat_elems[:-1]):
assert elem.kind == "ADD_VALUE"
value = elem.pattr
if elem.linestart is not None:
if elem.linestart != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
self.line_number = elem.linestart
else:
if sep != "":
sep += " "
self.write("%s %s: %s" % (sep, repr(keys[i]), value))
sep = ","
else:
for elem in flat_elems:
if elem.kind != "ADD_VALUE":
from trepan.api import debug; debug()
assert elem.kind == "ADD_VALUE"
value = elem.pattr
if elem.linestart is not None:
if elem.linestart != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
self.line_number = elem.linestart
else:
if sep != "":
sep += " "
self.write(sep, value)
sep = ","
self.write(endchar)
self.indent_less(INDENT_PER_LEVEL)
self.prec = p
self.prune()
return
def n_delete_subscript(self, node): def n_delete_subscript(self, node):
if node[-2][0] == "build_list" and node[-2][0][-1].kind.startswith( if node[-2][0] == "build_list" and node[-2][0][-1].kind.startswith(
"BUILD_TUPLE" "BUILD_TUPLE"
@@ -212,6 +274,189 @@ class NonterminalActions:
n_store_subscript = n_subscript = n_delete_subscript n_store_subscript = n_subscript = n_delete_subscript
def n_dict(self, node):
"""
Prettyprint a dict.
'dict' is something like k = {'a': 1, 'b': 42}"
We will use source-code line breaks to guide us when to break.
"""
if len(node) == 1 and node[0] == "const_list":
self.preorder(node[0])
self.prune()
return
p = self.prec
self.prec = 100
self.indent_more(INDENT_PER_LEVEL)
sep = INDENT_PER_LEVEL[:-1]
if node[0] != "dict_entry":
self.write("{")
line_number = self.line_number
if self.version >= (3, 0) and not self.is_pypy:
if node[0].kind.startswith("kvlist"):
# Python 3.5+ style key/value list in dict
kv_node = node[0]
l = list(kv_node)
length = len(l)
if kv_node[-1].kind.startswith("BUILD_MAP"):
length -= 1
i = 0
# Respect line breaks from source
while i < length:
self.write(sep)
name = self.traverse(l[i], indent="")
if i > 0:
line_number = self.indent_if_source_nl(
line_number, self.indent + INDENT_PER_LEVEL[:-1]
)
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(
l[i + 1], indent=self.indent + (len(name) + 2) * " "
)
self.write(value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
i += 2
pass
pass
elif len(node) > 1 and node[1].kind.startswith("kvlist"):
# Python 3.0..3.4 style key/value list in dict
kv_node = node[1]
l = list(kv_node)
if len(l) > 0 and l[0].kind == "kv3":
# Python 3.2 does this
kv_node = node[1][0]
l = list(kv_node)
i = 0
while i < len(l):
self.write(sep)
name = self.traverse(l[i + 1], indent="")
if i > 0:
line_number = self.indent_if_source_nl(
line_number, self.indent + INDENT_PER_LEVEL[:-1]
)
pass
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(
l[i], indent=self.indent + (len(name) + 2) * " "
)
self.write(value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
else:
sep += " "
i += 3
pass
pass
elif node[-1].kind.startswith("BUILD_CONST_KEY_MAP"):
# Python 3.6+ style const map
keys = node[-2].pattr
values = node[:-2]
# FIXME: Line numbers?
for key, value in zip(keys, values):
self.write(sep)
self.write(repr(key))
line_number = self.line_number
self.write(":")
self.write(self.traverse(value[0]))
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
else:
sep += " "
pass
pass
if sep.startswith(",\n"):
self.write(sep[1:])
pass
elif node[0].kind.startswith("dict_entry"):
assert self.version >= (3, 5)
template = ("%C", (0, len(node[0]), ", **"))
self.template_engine(template, node[0])
sep = ""
elif node[-1].kind.startswith("BUILD_MAP_UNPACK") or node[
-1
].kind.startswith("dict_entry"):
assert self.version >= (3, 5)
# FIXME: I think we can intermingle dict_comp's with other
# dictionary kinds of things. The most common though is
# a sequence of dict_comp's
kwargs = node[-1].attr
template = ("**%C", (0, kwargs, ", **"))
self.template_engine(template, node)
sep = ""
pass
else:
# Python 2 style kvlist. Find beginning of kvlist.
indent = self.indent + " "
line_number = self.line_number
if node[0].kind.startswith("BUILD_MAP"):
if len(node) > 1 and node[1].kind in ("kvlist", "kvlist_n"):
kv_node = node[1]
else:
kv_node = node[1:]
self.kv_map(kv_node, sep, line_number, indent)
else:
sep = ""
opname = node[-1].kind
if self.is_pypy and self.version >= (3, 5):
if opname.startswith("BUILD_CONST_KEY_MAP"):
keys = node[-2].attr
# FIXME: DRY this and the above
for i in range(len(keys)):
key = keys[i]
value = self.traverse(node[i], indent="")
self.write(sep, key, ": ", value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + " "
line_number = self.line_number
pass
pass
pass
else:
if opname.startswith("kvlist"):
list_node = node[0]
else:
list_node = node
assert list_node[-1].kind.startswith("BUILD_MAP")
for i in range(0, len(list_node) - 1, 2):
key = self.traverse(list_node[i], indent="")
value = self.traverse(list_node[i + 1], indent="")
self.write(sep, key, ": ", value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + " "
line_number = self.line_number
pass
pass
pass
elif opname.startswith("kvlist"):
kv_node = node[-1]
self.kv_map(node[-1], sep, line_number, indent)
pass
pass
if sep.startswith(",\n"):
self.write(sep[1:])
if node[0] != "dict_entry":
self.write("}")
self.indent_less(INDENT_PER_LEVEL)
self.prec = p
self.prune()
def n_docstring(self, node): def n_docstring(self, node):
indent = self.indent indent = self.indent
@@ -501,6 +746,11 @@ class NonterminalActions:
""" """
prettyprint a dict, list, set or tuple. prettyprint a dict, list, set or tuple.
""" """
if len(node) == 1 and node[0] == "const_list":
self.preorder(node[0])
self.prune()
return
p = self.prec p = self.prec
self.prec = PRECEDENCE["yield"] - 1 self.prec = PRECEDENCE["yield"] - 1
lastnode = node.pop() lastnode = node.pop()
@@ -550,7 +800,6 @@ class NonterminalActions:
self.write("(") self.write("(")
endchar = ")" endchar = ")"
else: else:
# from trepan.api import debug; debug()
raise TypeError( raise TypeError(
"Internal Error: n_build_list expects list, tuple, set, or unpack" "Internal Error: n_build_list expects list, tuple, set, or unpack"
) )

View File

@@ -708,184 +708,6 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
pass pass
pass pass
def n_dict(self, node):
"""
prettyprint a dict
'dict' is something like k = {'a': 1, 'b': 42}"
We will use source-code line breaks to guide us when to break.
"""
p = self.prec
self.prec = 100
self.indent_more(INDENT_PER_LEVEL)
sep = INDENT_PER_LEVEL[:-1]
if node[0] != "dict_entry":
self.write("{")
line_number = self.line_number
if self.version >= (3, 0) and not self.is_pypy:
if node[0].kind.startswith("kvlist"):
# Python 3.5+ style key/value list in dict
kv_node = node[0]
l = list(kv_node)
length = len(l)
if kv_node[-1].kind.startswith("BUILD_MAP"):
length -= 1
i = 0
# Respect line breaks from source
while i < length:
self.write(sep)
name = self.traverse(l[i], indent="")
if i > 0:
line_number = self.indent_if_source_nl(
line_number, self.indent + INDENT_PER_LEVEL[:-1]
)
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(
l[i + 1], indent=self.indent + (len(name) + 2) * " "
)
self.write(value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
i += 2
pass
pass
elif len(node) > 1 and node[1].kind.startswith("kvlist"):
# Python 3.0..3.4 style key/value list in dict
kv_node = node[1]
l = list(kv_node)
if len(l) > 0 and l[0].kind == "kv3":
# Python 3.2 does this
kv_node = node[1][0]
l = list(kv_node)
i = 0
while i < len(l):
self.write(sep)
name = self.traverse(l[i + 1], indent="")
if i > 0:
line_number = self.indent_if_source_nl(
line_number, self.indent + INDENT_PER_LEVEL[:-1]
)
pass
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(
l[i], indent=self.indent + (len(name) + 2) * " "
)
self.write(value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
else:
sep += " "
i += 3
pass
pass
elif node[-1].kind.startswith("BUILD_CONST_KEY_MAP"):
# Python 3.6+ style const map
keys = node[-2].pattr
values = node[:-2]
# FIXME: Line numbers?
for key, value in zip(keys, values):
self.write(sep)
self.write(repr(key))
line_number = self.line_number
self.write(":")
self.write(self.traverse(value[0]))
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
else:
sep += " "
pass
pass
if sep.startswith(",\n"):
self.write(sep[1:])
pass
elif node[0].kind.startswith("dict_entry"):
assert self.version >= (3, 5)
template = ("%C", (0, len(node[0]), ", **"))
self.template_engine(template, node[0])
sep = ""
elif node[-1].kind.startswith("BUILD_MAP_UNPACK") or node[
-1
].kind.startswith("dict_entry"):
assert self.version >= (3, 5)
# FIXME: I think we can intermingle dict_comp's with other
# dictionary kinds of things. The most common though is
# a sequence of dict_comp's
kwargs = node[-1].attr
template = ("**%C", (0, kwargs, ", **"))
self.template_engine(template, node)
sep = ""
pass
else:
# Python 2 style kvlist. Find beginning of kvlist.
indent = self.indent + " "
line_number = self.line_number
if node[0].kind.startswith("BUILD_MAP"):
if len(node) > 1 and node[1].kind in ("kvlist", "kvlist_n"):
kv_node = node[1]
else:
kv_node = node[1:]
self.kv_map(kv_node, sep, line_number, indent)
else:
sep = ""
opname = node[-1].kind
if self.is_pypy and self.version >= (3, 5):
if opname.startswith("BUILD_CONST_KEY_MAP"):
keys = node[-2].attr
# FIXME: DRY this and the above
for i in range(len(keys)):
key = keys[i]
value = self.traverse(node[i], indent="")
self.write(sep, key, ": ", value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + " "
line_number = self.line_number
pass
pass
pass
else:
if opname.startswith("kvlist"):
list_node = node[0]
else:
list_node = node
assert list_node[-1].kind.startswith("BUILD_MAP")
for i in range(0, len(list_node) - 1, 2):
key = self.traverse(list_node[i], indent="")
value = self.traverse(list_node[i + 1], indent="")
self.write(sep, key, ": ", value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + " "
line_number = self.line_number
pass
pass
pass
elif opname.startswith("kvlist"):
kv_node = node[-1]
self.kv_map(node[-1], sep, line_number, indent)
pass
pass
if sep.startswith(",\n"):
self.write(sep[1:])
if node[0] != "dict_entry":
self.write("}")
self.indent_less(INDENT_PER_LEVEL)
self.prec = p
self.prune()
def template_engine(self, entry, startnode): def template_engine(self, entry, startnode):
"""The format template interpetation engine. See the comment at the """The format template interpetation engine. See the comment at the
beginning of this module for the how we interpret format beginning of this module for the how we interpret format