diff --git a/uncompyle6/disas.py b/uncompyle6/disas.py index 57814438..6634cdd1 100644 --- a/uncompyle6/disas.py +++ b/uncompyle6/disas.py @@ -29,7 +29,7 @@ def disco(version, co, out=None): diassembles and deparses a given code block 'co' """ - assert inspect.iscode(co) + assert hasattr(co, 'co_name') # store final output stream for case of error real_out = out or sys.stdout diff --git a/uncompyle6/load.py b/uncompyle6/load.py index 24fd5bc9..f6d56fcf 100644 --- a/uncompyle6/load.py +++ b/uncompyle6/load.py @@ -34,22 +34,22 @@ def check_object_path(path): return path def load_file(filename): - ''' + """ load a Python source file and compile it to byte-code _load_file(filename: string): code_object filename: name of file containing Python source code (normally a .py) code_object: code_object compiled from this source code This function does NOT write any file! - ''' - fp = open(filename, 'rb') - source = fp.read().decode('utf-8') + '\n' - try: - co = compile(source, filename, 'exec', dont_inherit=True) - except SyntaxError: - print('>>Syntax error in %s\n' % filename, file= sys.stderr) - raise - fp.close() + """ + with open(filename, 'rb') as fp: + source = fp.read().decode('utf-8') + '\n' + try: + co = compile(source, filename, 'exec', dont_inherit=True) + except SyntaxError: + print('>>Syntax error in %s\n' % filename, file= sys.stderr) + raise + pass return co def load_module(filename, code_objects={}): diff --git a/uncompyle6/main.py b/uncompyle6/main.py index c0795b35..231feb6e 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -13,7 +13,7 @@ def uncompyle(version, co, out=None, showasm=False, showast=False, disassembles and deparses a given code block 'co' """ - assert inspect.iscode(co) + assert hasattr(co, 'co_name') # store final output stream for case of error real_out = out or sys.stdout @@ -34,6 +34,7 @@ def uncompyle(version, co, out=None, showasm=False, showast=False, print(e, file=real_out) raise + def uncompyle_file(filename, outstream=None, showasm=False, showast=False, showgrammar=False): """ diff --git a/uncompyle6/marsh.py b/uncompyle6/marsh.py index 387a659d..d3fdd546 100644 --- a/uncompyle6/marsh.py +++ b/uncompyle6/marsh.py @@ -18,6 +18,7 @@ from __future__ import print_function import sys, types from struct import unpack +import uncompyle6.scanners.scanner3 as scan3 from uncompyle6.magics import PYTHON_MAGIC_INT internStrings = [] @@ -60,23 +61,46 @@ def load_code_internal(fp, magic_int, bytes_for_s=False, code_objects={}): b1 = ord(fp.read(1)) if b1 & 0x80: - TypeError("Can't handle object references yet") + raise TypeError("Can't handle object references yet") + code = load_code_type(fp, magic_int, bytes_for_s, code_objects) + # FIXME: do something with reference? + return code marshalType = chr(b1) if marshalType == 'c': Code = types.CodeType - # FIXME If 'i' is deprecated, what would we use? - co_argcount = unpack('i', fp.read(4))[0] - co_nlocals = unpack('i', fp.read(4))[0] - co_stacksize = unpack('i', fp.read(4))[0] - co_flags = unpack('i', fp.read(4))[0] - # FIXME: somewhere between Python 2.7 and python 3.2 there's - # another 4 bytes before we get to the bytecode. What's going on? - # Again, because magic ints decreased between python 2.7 and 3.0 we need - # a range here. - if 3000 < magic_int < 20121: - fp.read(4) + # Python [1.3 .. 2.3) + # FIXME: find out what magics were for 1.3 + v13_to_23 = magic_int in (20121, 50428, 50823, 60202, 60717) + + # Python [1.5 .. 2.3) + v15_to_23 = magic_int in (20121, 50428, 50823, 60202, 60717) + + if v13_to_23: + co_argcount = unpack('h', fp.read(2))[0] + else: + co_argcount = unpack('i', fp.read(4))[0] + + if 3020 < magic_int < 20121: + kwonlyargcount = unpack('i', fp.read(4))[0] + else: + kwonlyargcount = 0 + + if v13_to_23: + co_nlocals = unpack('h', fp.read(2))[0] + else: + co_nlocals = unpack('i', fp.read(4))[0] + + if v15_to_23: + co_stacksize = unpack('h', fp.read(2))[0] + else: + co_stacksize = unpack('i', fp.read(4))[0] + + if v13_to_23: + co_flags = unpack('h', fp.read(2))[0] + else: + co_flags = unpack('i', fp.read(4))[0] co_code = load_code_internal(fp, magic_int, bytes_for_s=True, code_objects=code_objects) @@ -92,24 +116,22 @@ def load_code_internal(fp, magic_int, bytes_for_s=False, code_objects={}): # The Python3 code object is different than Python2's which # we are reading if we get here. # Also various parameters which were strings are now - # bytes (which is probably more logical). + # bytes (which is probably more logical). if PYTHON3: if PYTHON_MAGIC_INT > 3020: # In later Python3 magic_ints, there is a # kwonlyargcount parameter which we set to 0. - code = Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, - co_code, - co_consts, co_names, co_varnames, co_filename, co_name, + code = Code(co_argcount, kwonlyargcount, co_nlocals, co_stacksize, co_flags, + co_code, co_consts, co_names, co_varnames, co_filename, co_name, co_firstlineno, bytes(co_lnotab, encoding='utf-8'), co_freevars, co_cellvars) else: - code = Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, - co_code, - co_consts, co_names, co_varnames, co_filename, co_name, + code = Code(co_argcount, kwonlyargcount, co_nlocals, co_stacksize, co_flags, + co_code, co_consts, co_names, co_varnames, co_filename, co_name, co_firstlineno, bytes(co_lnotab, encoding='utf-8'), co_freevars, co_cellvars) else: - if (3000 < magic_int < 20121): + if (3000 <= magic_int < 20121): # Python 3 encodes some fields as Unicode while Python2 # requires the corresponding field to have string values co_consts = tuple([str(s) if s else None for s in co_consts]) @@ -117,45 +139,57 @@ def load_code_internal(fp, magic_int, bytes_for_s=False, code_objects={}): co_varnames = tuple([str(s) if s else None for s in co_varnames]) co_filename = str(co_filename) co_name = str(co_name) - code = Code(co_argcount, co_nlocals, co_stacksize, co_flags, co_code, + if 3020 < magic_int <= 20121: + code = scan3.Code3(co_argcount, kwonlyargcount, + co_nlocals, co_stacksize, co_flags, co_code, + co_consts, co_names, co_varnames, co_filename, co_name, + co_firstlineno, co_lnotab, co_freevars, co_cellvars) + else: + code = Code(co_argcount, co_nlocals, co_stacksize, co_flags, co_code, co_consts, co_names, co_varnames, co_filename, co_name, co_firstlineno, co_lnotab, co_freevars, co_cellvars) + code_objects[str(code)] = code return code elif marshalType == 'C': raise KeyError("New-style code not finished yet") # const type - elif marshalType == '.': - return Ellipsis elif marshalType == '0': - raise KeyError(marshalType) + # Null return None elif marshalType == 'N': return None - elif marshalType == 'T': - return True elif marshalType == 'F': return False + elif marshalType == 'T': + return True elif marshalType == 'S': return StopIteration - # number type + elif marshalType == '.': + return Ellipsis + elif marshalType == 'i': + # int + return int(unpack('i', fp.read(4))[0]) + elif marshalType == 'I': + # int64 + return unpack('q', fp.read(8))[0] elif marshalType == 'f': + # float n = fp.read(1) return float(unpack('d', fp.read(n))[0]) elif marshalType == 'g': + # binary float return float(unpack('d', fp.read(8))[0]) - elif marshalType == 'i': - return int(unpack('i', fp.read(4))[0]) - elif marshalType == 'I': - return unpack('q', fp.read(8))[0] elif marshalType == 'x': + # complex raise KeyError(marshalType) - return None elif marshalType == 'y': + # binary complex raise KeyError(marshalType) return None elif marshalType == 'l': + # long n = unpack('i', fp.read(4))[0] if n == 0: return long(0) @@ -167,26 +201,27 @@ def load_code_internal(fp, magic_int, bytes_for_s=False, code_objects={}): if n < 0: return long(d*-1) return d - # strings type - elif marshalType == 'R': - refnum = unpack('i', fp.read(4))[0] - return internStrings[refnum] elif marshalType == 's': + # string + # Note: could mean bytes in Python3 processing Python2 bytecode strsize = unpack('i', fp.read(4))[0] s = fp.read(strsize) if not bytes_for_s: s = compat_str(s) return s elif marshalType == 't': + # interned strsize = unpack('i', fp.read(4))[0] interned = compat_str(fp.read(strsize)) internStrings.append(interned) return interned - elif marshalType == 'u': - strsize = unpack('i', fp.read(4))[0] - unicodestring = fp.read(strsize) - return unicodestring.decode('utf-8') - # collection type + elif marshalType == 'R': + # string reference + refnum = unpack('i', fp.read(4))[0] + return internStrings[refnum] + elif marshalType == 'r': + # object reference - new in Python3 + raise KeyError("reference code not finished yet") elif marshalType == '(': tuplesize = unpack('i', fp.read(4))[0] ret = tuple() @@ -196,13 +231,61 @@ def load_code_internal(fp, magic_int, bytes_for_s=False, code_objects={}): return ret elif marshalType == '[': raise KeyError(marshalType) - return None + elif marshalType == '{': + # dictionary + raise KeyError(marshalType) elif marshalType == '{': raise KeyError(marshalType) - return None + elif marshalType == 'C': + # code type used in Python 1.0 - 1.2 + raise KeyError("C code is Python 1.0 - 1.2; can't handle yet") + elif marshalType == 'u': + strsize = unpack('i', fp.read(4))[0] + unicodestring = fp.read(strsize) + return unicodestring.decode('utf-8') elif marshalType in ['<', '>']: raise KeyError(marshalType) + elif marshalType == '?': + # unknown + raise KeyError(marshalType) + elif marshalType in ['<', '>']: + # set and frozenset + raise KeyError(marshalType) return None + elif marshalType == 'a': + # ascii + # FIXME check + strsize = unpack('i', fp.read(4))[0] + s = fp.read(strsize) + s = compat_str(s) + return s + elif marshalType == 'A': + # ascii interned + # FIXME: check + strsize = unpack('i', fp.read(4))[0] + interned = compat_str(fp.read(strsize)) + internStrings.append(interned) + return interned + elif marshalType == ')': + # small tuple + tuplesize = unpack('B', fp.read(1))[0] + ret = tuple() + while tuplesize > 0: + ret += load_code_internal(fp, magic_int, refs, code_objects=code_objects), + tuplesize -= 1 + return ret + elif marshalType == 'z': + # short ascii + strsize = unpack('B', fp.read(1))[0] + return compat_str(fp.read(strsize)) + elif marshalType == 'Z': + # short ascii interned + # FIXME: check + strsize = unpack('B', fp.read(1))[0] + interned = compat_str(fp.read(strsize)) + internStrings.append(interned) + return interned else: - sys.stderr.write("Unknown type %i (hex %x)\n" % (ord(marshalType), ord(marshalType))) + sys.stderr.write("Unknown type %i (hex %x) %c\n" % + (ord(marshalType), ord(marshalType), ord(marshalType))) return diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index b937f441..b9c9e701 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -22,6 +22,29 @@ globals().update(dis.opmap) from uncompyle6.opcodes.opcode_33 import * import uncompyle6.scanner as scan +class Code3: + """Class for a Python3 code object used when a Python interpreter less than 3 is + working on Python3 bytecode + """ + def __init__(self, co_argcount, co_kwonlyargcount,co_nlocals, co_stacksize, co_flags, co_code, + co_consts, co_names, co_varnames, co_filename, co_name, + co_firstlineno, co_lnotab, co_freevars, co_cellvars): + self.co_argcount = co_argcount + self.co_kwonlyargcount = co_kwonlyargcount + self.co_nlocals = co_nlocals + self.co_stacksize = co_stacksize + self.co_flags = co_flags + self.co_code = co_code + self.co_consts = co_consts + self.co_names = co_names + self.co_varnames = co_varnames + self.co_filename = co_filename + self.co_name = co_name + self.co_firstlineno = co_firstlineno + self.co_lnotab = co_lnotab + self.co_freevars = co_freevars + self.co_cellvars = co_cellvars + class Scanner3(scan.Scanner): diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 4f73941c..1bd91d0e 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -481,7 +481,7 @@ class Traverser(pysource.Walker, object): self.prec = 27 code = node[-5].attr - assert inspect.iscode(code) + assert hasattr(co, 'co_name') code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) @@ -524,7 +524,8 @@ class Traverser(pysource.Walker, object): self.prec = 27 code = node[code_index].attr - assert inspect.iscode(code) + assert hasattr(code, 'co_name') + ## Or Code3 code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) @@ -1244,7 +1245,7 @@ class Traverser(pysource.Walker, object): def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, showgrammar=False): - assert inspect.iscode(co) + assert hasattr(co, 'co_name') # store final output stream for case of error scanner = get_scanner(version) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 4841599c..8996ebbc 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -981,7 +981,7 @@ class Walker(GenericASTTraversal, object): self.prec = 27 code = node[code_index].attr - assert inspect.iscode(code) + assert hasattr(code, 'co_name') code = Code(code, self.scanner, self.currentclass) ast = self.build_ast(code._tokens, code._customize) @@ -1027,7 +1027,7 @@ class Walker(GenericASTTraversal, object): self.prec = 27 code = node[code_index].attr - assert inspect.iscode(code) + assert hasattr(code, 'co_name') code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) @@ -1446,7 +1446,7 @@ class Walker(GenericASTTraversal, object): defparams = node[:node[-1].attr] code = node[code_index].attr - assert inspect.iscode(code) + assert hasattr(code, 'co_name') code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) @@ -1513,7 +1513,7 @@ class Walker(GenericASTTraversal, object): def build_class(self, code): """Dump class definition, doc string and class body.""" - assert inspect.iscode(code) + assert hasattr(code, 'co_name') self.classes.append(self.currentclass) code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) @@ -1624,7 +1624,7 @@ def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False, disassembles and deparses a given code block 'co' """ - assert inspect.iscode(co) + assert hasattr(co, 'co_name') # store final output stream for case of error scanner = get_scanner(version)