diff --git a/pytest/test_disasm.py-notyet b/pytest/test_disasm.py-notyet index 97ed847c..27f4ad36 100644 --- a/pytest/test_disasm.py-notyet +++ b/pytest/test_disasm.py-notyet @@ -1,7 +1,7 @@ import os.path import pytest -from uncompyle6.disas import disassemble_file +from uncompyle6.code_fns import disassemble_file def get_srcdir(): filename = os.path.normcase(os.path.dirname(__file__)) diff --git a/requirements.txt b/requirements.txt index 5b1e7b65..40a86898 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,8 @@ hypothesis==2.0.0 pytest -e . + +Click~=7.0 +xdis>=6.0.4 +configobj~=5.0.6 +setuptools~=65.3.0 diff --git a/setup.py b/setup.py index c2297b7e..495117b5 100755 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ #!/usr/bin/env python """Setup script for the 'uncompyle6' distribution.""" +import setuptools import sys SYS_VERSION = sys.version_info[0:2] @@ -37,9 +38,7 @@ from __pkginfo__ import ( zip_safe, ) -from setuptools import setup, find_packages - -setup( +setuptools.setup( author=author, author_email=author_email, classifiers=classifiers, @@ -50,7 +49,7 @@ setup( long_description=long_description, long_description_content_type="text/x-rst", name=modname, - packages=find_packages(), + packages=setuptools.find_packages(), py_modules=py_modules, test_suite="nose.collector", url=web, diff --git a/test/bytecode_3.7_run/10_extendedargifelse.pyc b/test/bytecode_3.7_run/10_extendedargifelse.pyc new file mode 100644 index 00000000..3dda1b31 Binary files /dev/null and b/test/bytecode_3.7_run/10_extendedargifelse.pyc differ diff --git a/test/simple_source/bug37/10_extendedargifelse.py b/test/simple_source/bug37/10_extendedargifelse.py new file mode 100644 index 00000000..d8c95091 --- /dev/null +++ b/test/simple_source/bug37/10_extendedargifelse.py @@ -0,0 +1,272 @@ +# This is RUNNABLE! + +"""This program is self-checking!""" + +# Bug was handling if which has EXTENDED_ARG +# See https://github.com/rocky/python-uncompyle6/pull/406 + +aa = 0 +ab = 0 +ac = 0 +ad = 0 +ae = 0 +af = 0 +ag = 0 +ah = 0 +ai = 0 +aj = 0 +ak = 0 +al = 0 +am = 0 +an = 0 +ao = 0 +ap = 0 +aq = 0 +ar = 0 +at = 0 +au = 0 +av = 0 +aw = 0 +ax = 0 +ay = 0 +az = 0 +ba = 0 +bb = 0 +bc = 0 +bd = 0 +be = 0 +bf = 0 +bg = 0 +bh = 0 +bi = 0 +bj = 0 +bk = 0 +bl = 0 +bm = 0 +bn = 0 +bo = 0 +bp = 0 +bq = 0 +br = 0 +bs = 0 +bt = 0 +bu = 0 +bv = 0 +bw = 0 +bx = 0 +by = 0 +bz = 0 +ca = 0 +cb = 0 +cc = 0 +cd = 0 +ce = 0 +cf = 0 +cg = 0 +ch = 0 +ci = 0 +cj = 0 +ck = 0 +cl = 0 +cm = 0 +cn = 0 +co = 0 +cp = 0 +cq = 0 +cr = 0 +cs = 0 +ct = 0 +cu = 0 +cv = 0 +cw = 0 +cx = 0 +cy = 0 +cz = 0 +da = 0 +db = 0 +dc = 0 +dd = 0 +de = 0 +df = 0 +dg = 0 +dh = 0 +di = 0 +dj = 0 +dk = 0 +dl = 0 +dm = 0 +dn = 0 +do = 0 +dp = 0 +dq = 0 +dr = 0 +ds = 0 +dt = 0 +du = 0 +dv = 0 +dw = 0 +dx = 0 +dy = 0 +dz = 0 +ea = 0 +eb = 0 +ec = 0 +ed = 0 +ee = 0 +ef = 0 +eg = 0 +eh = 0 +ei = 0 +ej = 0 +ek = 0 +el = 0 +em = 0 +en = 0 +eo = 0 +ep = 0 +eq = 0 +er = 0 +es = 0 +et = 0 +eu = 0 +ev = 0 +ew = 0 +ex = 0 +ey = 0 +ez = 0 +fa = 0 +fb = 0 +fc = 0 +fd = 0 +fe = 0 +ff = 0 +fg = 0 +fh = 0 +fi = 0 +fj = 0 +fk = 0 +fl = 0 +fm = 0 +fn = 0 +fo = 0 +fp = 0 +fq = 0 +fr = 0 +fs = 0 +ft = 0 +fu = 0 +fv = 0 +fw = 0 +fx = 0 +fy = 0 +fz = 0 +ga = 0 +gb = 0 +gc = 0 +gd = 0 +ge = 0 +gf = 0 +gg = 0 +gh = 0 +gi = 0 +gj = 0 +gk = 0 +gl = 0 +gm = 0 +gn = 0 +go = 0 +gp = 0 +gq = 0 +gr = 0 +gs = 0 +gt = 0 +gu = 0 +gv = 0 +gw = 0 +gx = 0 +gy = 0 +gz = 0 +ha = 0 +hb = 0 +hc = 0 +hd = 0 +he = 0 +hf = 0 +hg = 0 +hh = 0 +hi = 0 +hj = 0 +hk = 0 +hl = 0 +hm = 0 +hn = 0 +ho = 0 +hp = 0 +hq = 0 +hr = 0 +hs = 0 +ht = 0 +hu = 0 +hv = 0 +hw = 0 +hx = 0 +hy = 0 +hz = 0 +ia = 0 +ib = 0 +ic = 0 +id = 0 +ie = 0 +ig = 0 +ih = 0 +ii = 0 +ij = 0 +ik = 0 +il = 0 +im = 0 +io = 0 +ip = 0 +iq = 0 +ir = 0 +it = 0 +iu = 0 +iv = 0 +iw = 0 +ix = 0 +iy = 0 +iz = 0 +ja = 0 +jb = 0 +jc = 0 +jd = 0 +je = 0 +jf = 0 +jg = 0 +jh = 0 +ji = 0 +jj = 0 +jk = 0 +jl = 0 +jm = 0 +jn = 0 +jo = 0 +jp = 0 +jq = 0 +jr = 0 +js = 0 +jt = 0 +ju = 0 +jv = 0 +jw = 0 +jx = 0 +jy = 0 +jz = 0 +ka = 0 +kb = 0 +kc = 0 +var = True +if var: + aa = 1 +else: + aa = 2 +assert aa == 1 diff --git a/uncompyle6/bin/pydisassemble.py b/uncompyle6/bin/pydisassemble.py index 54413c55..92eac720 100755 --- a/uncompyle6/bin/pydisassemble.py +++ b/uncompyle6/bin/pydisassemble.py @@ -5,7 +5,7 @@ # import sys, os, getopt -from uncompyle6.disas import disassemble_file +from uncompyle6.code_fns import disassemble_file from uncompyle6.version import __version__ program, ext = os.path.splitext(os.path.basename(__file__)) diff --git a/uncompyle6/disas.py b/uncompyle6/code_fns.py similarity index 100% rename from uncompyle6/disas.py rename to uncompyle6/code_fns.py diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 49174fa5..ff3f4a67 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -16,7 +16,7 @@ import datetime, os, py_compile, subprocess, sys from xdis import iscode from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE, version_tuple_to_str -from uncompyle6.disas import check_object_path +from uncompyle6.code_fns import check_object_path from uncompyle6.semantics import pysource from uncompyle6.semantics.pysource import PARSER_DEFAULT_DEBUG from uncompyle6.parser import ParserError diff --git a/uncompyle6/parsers/reducecheck/ifelsestmt.py b/uncompyle6/parsers/reducecheck/ifelsestmt.py index 40f747c5..133df853 100644 --- a/uncompyle6/parsers/reducecheck/ifelsestmt.py +++ b/uncompyle6/parsers/reducecheck/ifelsestmt.py @@ -136,6 +136,8 @@ def ifelsestmt(self, lhs, n, rule, tree, tokens, first, last): # print(tokens[t]) # print("=" * 30) + first_offset = tokens[first].off2int() + if rule not in IFELSE_STMT_RULES: # print("XXX", rule) return False @@ -151,7 +153,7 @@ def ifelsestmt(self, lhs, n, rule, tree, tokens, first, last): ): return True - # Make sure all of the "come froms" offset at the + # Make sure all the offsets from the "come froms" at the # end of the "if" come from somewhere inside the "if". # Since the come_froms are ordered so that lowest # offset COME_FROM is last, it is sufficient to test @@ -163,8 +165,8 @@ def ifelsestmt(self, lhs, n, rule, tree, tokens, first, last): end_come_froms = end_come_froms[0] if not isinstance(end_come_froms, Token): if len(end_come_froms): - return tokens[first].offset > end_come_froms[-1].attr - elif tokens[first].offset > end_come_froms.attr: + return first_offset > end_come_froms[-1].attr + elif first_offset > end_come_froms.attr: return True # FIXME: There is weirdness in the grammar we need to work around. @@ -173,7 +175,7 @@ def ifelsestmt(self, lhs, n, rule, tree, tokens, first, last): last_token = tree[-1] else: last_token = tokens[last] - if last_token == "COME_FROM" and tokens[first].offset > last_token.attr: + if last_token == "COME_FROM" and first_offset > last_token.attr: if self.version < (3, 0) and self.insts[self.offset2inst_index[last_token.attr]].opname != "SETUP_LOOP": return True @@ -237,7 +239,7 @@ def ifelsestmt(self, lhs, n, rule, tree, tokens, first, last): if jump_else_end[-1].off2int() != jmp_target: return True - if tokens[first].off2int() > jmp_target: + if first_offset > jmp_target: return True return (jmp_target > last_offset) and tokens[last] != "JUMP_FORWARD" diff --git a/uncompyle6/parsers/reducecheck/ifstmt2.py b/uncompyle6/parsers/reducecheck/ifstmt2.py index a8e616b9..3cf098a0 100644 --- a/uncompyle6/parsers/reducecheck/ifstmt2.py +++ b/uncompyle6/parsers/reducecheck/ifstmt2.py @@ -6,7 +6,6 @@ If statement reduction check for Python 2.6 (and older?) def ifstmt2(self, lhs, n, rule, ast, tokens, first, last): - # print("XXX", first, last) # for t in range(first, last): # print(tokens[t]) # print("=" * 30) @@ -61,16 +60,25 @@ def ifstmt2(self, lhs, n, rule, ast, tokens, first, last): if testexpr[0] in ("testtrue", "testfalse"): test = testexpr[0] - if len(test) > 1 and test[1].kind.startswith("jmp_"): - jmp_target = int(test[1][0].pattr) + jmp = test[1] + if len(test) > 1 and jmp.kind.startswith("jmp_"): + jmp_target = int(jmp[0].pattr) if last == len(tokens): last -= 1 + + if_end_offset = tokens[last].off2int(prefer_last=False) if ( tokens[first].off2int(prefer_last=True) <= jmp_target - < tokens[last].off2int(prefer_last=False) + < if_end_offset ): - return True + # In 2.6 (and before?) we need to check if the previous instruction + # is a jump to the last token. If so, testexpr is negated? and so + # jmp_target < if_end_offset. + previous_inst_index = self.offset2inst_index[jmp_target] - 1 + previous_inst = self.insts[previous_inst_index] + if previous_inst.opname != "JUMP_ABSOLUTE" and previous_inst.argval != if_end_offset: + return True # jmp_target less than tokens[first] is okay - is to a loop # jmp_target equal tokens[last] is also okay: normal non-optimized non-loop jump if jmp_target > tokens[last].off2int(): diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 373913ac..98ef5f9d 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -137,9 +137,9 @@ class Scanner(object): assert count <= i if collection_type == "CONST_DICT": - # constant dictonaries work via BUILD_CONST_KEY_MAP and + # constant dictionaries work via BUILD_CONST_KEY_MAP and # handle the values() like sets and lists. - # However the keys() are an LOAD_CONST of the keys. + # However, the keys() are an LOAD_CONST of the keys. # adjust offset to account for this count += 1 diff --git a/uncompyle6/scanners/pypy37.py b/uncompyle6/scanners/pypy37.py index 6856c839..eac8b2e3 100644 --- a/uncompyle6/scanners/pypy37.py +++ b/uncompyle6/scanners/pypy37.py @@ -13,6 +13,7 @@ from xdis.opcodes import opcode_37pypy as opc # is this right? JUMP_OPs = opc.JUMP_OPS + # We base this off of 3.7 class ScannerPyPy37(scan.Scanner37): def __init__(self, show_asm): diff --git a/uncompyle6/scanners/pypy38.py b/uncompyle6/scanners/pypy38.py index f32c1bbd..8f22e397 100644 --- a/uncompyle6/scanners/pypy38.py +++ b/uncompyle6/scanners/pypy38.py @@ -13,6 +13,7 @@ from xdis.opcodes import opcode_38pypy as opc JUMP_OPs = opc.JUMP_OPS + # We base this off of 3.8 class ScannerPyPy38(scan.Scanner38): def __init__(self, show_asm): diff --git a/uncompyle6/scanners/scanner37.py b/uncompyle6/scanners/scanner37.py index 59d59049..ca7e5396 100644 --- a/uncompyle6/scanners/scanner37.py +++ b/uncompyle6/scanners/scanner37.py @@ -119,9 +119,7 @@ class Scanner37(Scanner37Base): ) return new_tokens - def ingest( - self, co, classname=None, code_objects={}, show_asm=None - ): + def ingest(self, bytecode, classname=None, code_objects={}, show_asm=None): """ Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing @@ -141,7 +139,7 @@ class Scanner37(Scanner37Base): cause specific rules for the specific number of arguments they take. """ tokens, customize = Scanner37Base.ingest( - self, co, classname, code_objects, show_asm + self, bytecode, classname, code_objects, show_asm ) new_tokens = [] for i, t in enumerate(tokens): diff --git a/uncompyle6/scanners/scanner37base.py b/uncompyle6/scanners/scanner37base.py index 6c8d69ae..15dc1cbe 100644 --- a/uncompyle6/scanners/scanner37base.py +++ b/uncompyle6/scanners/scanner37base.py @@ -45,14 +45,18 @@ import sys globals().update(op3.opmap) +CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT") + + class Scanner37Base(Scanner): def __init__(self, version, show_asm=None, debug="", is_pypy=False): super(Scanner37Base, self).__init__(version, show_asm, is_pypy) + self.offset2tok_index = None self.debug = debug self.is_pypy = is_pypy # Create opcode classification sets - # Note: super initilization above initializes self.opc + # Note: super initialization above initializes self.opc # Ops that start SETUP_ ... We will COME_FROM with these names # Some blocks and END_ statements. And they can start @@ -137,7 +141,7 @@ class Scanner37Base(Scanner): self.opc.POP_JUMP_IF_FALSE, ] ) - # Not really a set, but still clasification-like + # Not really a set, but still classification-like self.statement_opcode_sequences = [ (self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_FORWARD), (self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_ABSOLUTE), @@ -272,7 +276,7 @@ class Scanner37Base(Scanner): if inst.opname == "JUMP_FORWARD": jump_inst = self.insts[self.offset2inst_index[inst.argval]] if jump_inst.has_extended_arg and jump_inst.opname.startswith("JUMP"): - # Create comination of the jump-to instruction and + # Create a combination of the jump-to instruction and # this one. Keep the position information of this instruction, # but the operator and operand properties come from the other # instruction @@ -440,9 +444,9 @@ class Scanner37Base(Scanner): elif op == self.opc.JUMP_ABSOLUTE: # Refine JUMP_ABSOLUTE further in into: # - # * "JUMP_LOOP" - which are are used in loops. This is sometimes + # * "JUMP_LOOP" - which are used in loops. This is sometimes # found at the end of a looping construct - # * "BREAK_LOOP" - which are are used to break loops. + # * "BREAK_LOOP" - which are used to break loops. # * "CONTINUE" - jumps which may appear in a "continue" statement. # It is okay to confuse this with JUMP_LOOP. The # grammar should tolerate this. diff --git a/uncompyle6/scanners/scanner38.py b/uncompyle6/scanners/scanner38.py index 23909804..6fad6c77 100644 --- a/uncompyle6/scanners/scanner38.py +++ b/uncompyle6/scanners/scanner38.py @@ -41,9 +41,7 @@ class Scanner38(Scanner37): pass - def ingest( - self, co, classname=None, code_objects={}, show_asm=None - ): + def ingest(self, bytecode, classname=None, code_objects={}, show_asm=None): """ Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing @@ -63,7 +61,7 @@ class Scanner38(Scanner37): cause specific rules for the specific number of arguments they take. """ tokens, customize = super(Scanner38, self).ingest( - co, classname, code_objects, show_asm + bytecode, classname, code_objects, show_asm ) # Hacky way to detect loop ranges. diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index 51d04812..614743e0 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -15,7 +15,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import re, sys +import re +import sys def off2int(offset, prefer_last=True): @@ -90,7 +91,7 @@ class Token: # Python 2.4 can't have empty () print("I don't know about Python version %s yet." % e) try: version_tuple = tuple(int(i) for i in str(e)[1:-1].split(".")) - except: + except Exception: pass else: if version_tuple > (3, 9):