From ead41d7a966cedbaa68428af5b6c2e5abe34572d Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 29 May 2016 19:13:57 -0400 Subject: [PATCH] Bang again on Python 2.5 and 2.6 scanners --- uncompyle6/scanners/scanner2.py | 17 ++--- uncompyle6/scanners/scanner25.py | 118 +++++++------------------------ uncompyle6/scanners/scanner26.py | 117 +++++++++++++++--------------- 3 files changed, 92 insertions(+), 160 deletions(-) diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 3c12404c..0424dc46 100755 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -35,6 +35,7 @@ class Scanner2(scan.Scanner): def __init__(self, version): scan.Scanner.__init__(self, version) self.pop_jump_if = frozenset([self.opc.PJIF, self.opc.PJIT]) + self.jump_forward = frozenset([self.opc.JA, self.opc.JF]) def disassemble(self, co, classname=None, code_objects={}): """ @@ -465,7 +466,7 @@ class Scanner2(scan.Scanner): jump_back = self.last_instr(start, end, self.opc.JA, next_line_byte, False) - if jump_back and jump_back != self.prev[end] and code[jump_back+3] in (self.opc.JA, self.opc.JF): + if jump_back and jump_back != self.prev[end] and code[jump_back+3] in self.jump_forward: if code[self.prev[end]] == self.opc.RETURN_VALUE or \ (code[self.prev[end]] == self.opc.POP_BLOCK and code[self.prev[self.prev[end]]] == self.opc.RETURN_VALUE): jump_back = None @@ -483,8 +484,8 @@ class Scanner2(scan.Scanner): else: if self.get_target(jump_back) >= next_line_byte: jump_back = self.last_instr(start, end, self.opc.JA, start, False) - if end > jump_back+4 and code[end] in (self.opc.JF, self.opc.JA): - if code[jump_back+4] in (self.opc.JA, self.opc.JF): + if end > jump_back+4 and code[end] in self.jump_forward: + if code[jump_back+4] in self.jump_forward: if self.get_target(jump_back+4) == self.get_target(end): self.fixed_jumps[pos] = jump_back+4 end = jump_back+4 @@ -583,7 +584,7 @@ class Scanner2(scan.Scanner): match = self.remove_mid_line_ifs(match) if match: - if code[pre[rtarget]] in (self.opc.JF, self.opc.JA) \ + if code[pre[rtarget]] in self.jump_forward \ and pre[rtarget] not in self.stmts \ and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget: if code[pre[pre[rtarget]]] == self.opc.JA \ @@ -625,17 +626,17 @@ class Scanner2(scan.Scanner): next = self.next_stmt[pos] if pre[next] == pos: pass - elif code[next] in (self.opc.JF, self.opc.JA) and target == self.get_target(next): + elif code[next] in self.jump_forward and target == self.get_target(next): if code[pre[next]] == self.opc.PJIF: if code[next] == self.opc.JF or target != rtarget or code[pre[pre[rtarget]]] not in (self.opc.JA, self.opc.RETURN_VALUE): self.fixed_jumps[pos] = pre[next] return - elif code[next] == self.opc.JA and code[target] in (self.opc.JA, self.opc.JF): + elif code[next] == self.opc.JA and code[target] in self.jump_forward: next_target = self.get_target(next) if self.get_target(target) == next_target: self.fixed_jumps[pos] = pre[next] return - elif code[next_target] in (self.opc.JA, self.opc.JF) and self.get_target(next_target) == self.get_target(target): + elif code[next_target] in self.jump_forward and self.get_target(next_target) == self.get_target(target): self.fixed_jumps[pos] = pre[next] return @@ -655,7 +656,7 @@ class Scanner2(scan.Scanner): else: rtarget = pre[rtarget] # does the if jump just beyond a jump op, then this is probably an if statement - if code[pre[rtarget]] in (self.opc.JA, self.opc.JF): + if code[pre[rtarget]] in self.jump_forward: if_end = self.get_target(pre[rtarget]) # is this a loop not an if? diff --git a/uncompyle6/scanners/scanner25.py b/uncompyle6/scanners/scanner25.py index d6399827..3fc3e8d2 100755 --- a/uncompyle6/scanners/scanner25.py +++ b/uncompyle6/scanners/scanner25.py @@ -1,7 +1,7 @@ # Copyright (c) 2015-2016 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel -# +# Copyright (c) 1999 John Aycock """ Python 2.5 bytecode scanner/deparser @@ -10,11 +10,8 @@ Python 3 and other versions of Python. Also, we save token information for later use in deparsing. """ -from collections import namedtuple -from array import array - -import dis from uncompyle6.opcodes.opcode_25 import * +from xdis.bytecode import findlinestarts import uncompyle6.scanners.scanner2 as scan class Scanner25(scan.Scanner2): @@ -28,23 +25,19 @@ class Scanner25(scan.Scanner2): The main part of this procedure is modelled after dis.disassemble(). ''' - rv = [] + + # import dis; dis.disassemble(co) # DEBUG + + # Container for tokens + tokens = [] customize = {} Token = self.Token # shortcut - self.code = array('B', co.co_code) - for i in self.op_range(0, len(self.code)): - if self.code[i] in (RETURN_VALUE, END_FINALLY): - n = i + 1 - self.code = array('B', co.co_code[:n]) - # linestarts is a tuple of (offset, line number. - # Turn that in a has that we can index - self.linestarts = list(dis.findlinestarts(co)) - linestartoffsets = {} - for offset, lineno in self.linestarts: - linestartoffsets[offset] = lineno + n = self.setup_code(co) + self.build_lines_data(co, n) - self.prev = [0] + # linestarts contains block code adresses (addr,block) + self.linestarts = list(findlinestarts(co)) # class and names if classname: @@ -68,42 +61,25 @@ class Scanner25(scan.Scanner2): self.toChange = [] self.restructBytecode() codelen = len(self.code) - # mapping adresses of prev instru + + # mapping adresses of previous instruction + self.prev = [0] for i in self.op_range(0, codelen): op = self.code[i] self.prev.append(i) if self.op_hasArgument(op): self.prev.append(i) self.prev.append(i) - j = 0 - linestarts = self.linestarts - self.lines = [] - linetuple = namedtuple('linetuple', ['l_no', 'next']) - - # linestarts is a tuple of (offset, line number). - # Turn that in a has that we can index - linestartoffsets = {} - for offset, lineno in linestarts: - linestartoffsets[offset] = lineno - - (prev_start_byte, prev_line_no) = linestarts[0] - for (start_byte, line_no) in linestarts[1:]: - while j < start_byte: - self.lines.append(linetuple(prev_line_no, start_byte)) - j += 1 - prev_line_no = start_byte - while j < codelen: - self.lines.append(linetuple(prev_line_no, codelen)) - j+=1 self.load_asserts = set() for i in self.op_range(0, codelen): - if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL: + if self.code[i] == self.opc.PJIT and self.code[i + 3] == self.opc.LOAD_GLOBAL: if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) # self.lines contains (block,addrLastInstr) - cf = self.find_jump_targets(self.code) + cf = self.find_jump_targets() + # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] @@ -136,7 +112,7 @@ class Scanner25(scan.Scanner2): if offset in cf: k = 0 for j in cf[offset]: - rv.append(Token('COME_FROM', None, repr(j), + tokens.append(Token('COME_FROM', None, repr(j), offset="%s_%d" % (offset, k) )) k += 1 if self.op_hasArgument(op): @@ -219,17 +195,17 @@ class Scanner25(scan.Scanner2): if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' - if offset in linestartoffsets: - linestart = linestartoffsets[offset] + if offset in self.linestartoffsets: + linestart = self.linestartoffsets[offset] else: linestart = None if offset not in replace: - rv.append(Token(op_name, oparg, pattr, offset, linestart)) + tokens.append(Token(op_name, oparg, pattr, offset, linestart)) else: - rv.append(Token(replace[offset], oparg, pattr, offset, linestart)) + tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) - return rv, customize + return tokens, customize def getOpcodeToDel(self, i): ''' @@ -866,49 +842,7 @@ class Scanner25(scan.Scanner2): 'start': start, 'end': rtarget}) self.return_end_ifs.add(pre[rtarget]) - - def find_jump_targets(self, code): - ''' - Detect all offsets in a byte code which are jump targets. - - Return the list of offsets. - - This procedure is modelled after dis.findlables(), but here - for each target the number of jumps are counted. - ''' - - n = len(code) - self.structs = [{'type': 'root', - 'start': 0, - 'end': n-1}] - self.loops = [] # All loop entry points - self.fixed_jumps = {} # Map fixed jumps to their real destination - self.ignore_if = set() - self.build_stmt_indices() - self.not_continue = set() - self.return_end_ifs = set() - - targets = {} - for i in self.op_range(0, n): - op = code[i] - - # Determine structures and fix jumps for 2.3+ - self.detect_structure(i, op) - - if self.op_hasArgument(op): - label = self.fixed_jumps.get(i) - oparg = self.get_argument(i) - if label is None: - if op in hasjrel and op != FOR_ITER: - label = i + 3 + oparg - # elif op in hasjabs: Pas de gestion des jump abslt - # if op in (PJIF, PJIT): Or pop a faire - # if (oparg > i): - # label = oparg - if label is not None and label != -1: - targets[label] = targets.get(label, []) + [i] - elif op == END_FINALLY and i in self.fixed_jumps: - label = self.fixed_jumps[i] - targets[label] = targets.get(label, []) + [i] - return targets + pass + pass + return pass diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 40bc645a..dee8f97b 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -2,7 +2,6 @@ # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock - """ Python 2.6 bytecode scanner @@ -11,10 +10,6 @@ other versions of Python. Also, we save token information for later use in deparsing. """ -from collections import namedtuple -from array import array - -from uncompyle6.opcodes.opcode_26 import * from xdis.bytecode import findlinestarts import uncompyle6.scanners.scanner2 as scan @@ -87,7 +82,7 @@ class Scanner26(scan.Scanner2): # linestarts contains block code adresses (addr,block) self.linestarts = list(findlinestarts(co)) - self.prev = [0] + # class and names if classname: classname = '_' + classname.lstrip('_') + '__' @@ -110,7 +105,9 @@ class Scanner26(scan.Scanner2): self.toChange = [] self.restructBytecode() codelen = len(self.code) - # mapping adresses of prev instru + + # mapping adresses of prev instruction + self.prev = [0] for i in self.op_range(0, codelen): op = self.code[i] self.prev.append(i) @@ -279,38 +276,38 @@ class Scanner26(scan.Scanner2): self.opc.RETURN_VALUE): toDel = [] # del POP_TOP - if self.code[i+opsize] == POP_TOP: + if self.code[i+opsize] == self.opc.POP_TOP: if self.code[i+opsize] == self.code[i+opsize+1] and self.code[i+opsize] == self.code[i+opsize+2] \ - and opcode in (JF, JA) and self.code[i+opsize] != self.code[i+opsize+3]: + and opcode in self.jump_forward and self.code[i+opsize] != self.code[i+opsize+3]: pass else: toDel += [i+opsize] # conditional tuple (not optimal at all, no good solution...) - if self.code[i] == JA and self.code[i+opsize] == POP_TOP \ - and self.code[i+opsize+1] == JA and self.code[i+opsize+4] == POP_BLOCK: + if self.code[i] == self.opc.JA and self.code[i+opsize] == self.opc.POP_TOP \ + and self.code[i+opsize+1] == self.opc.JA and self.code[i+opsize+4] == self.opc.POP_BLOCK: jmpabs1target = self.get_target(i) jmpabs2target = self.get_target(i+opsize+1) - if jmpabs1target == jmpabs2target and self.code[jmpabs1target] == FOR_ITER \ - and self.code[jmpabs1target-1] != GET_ITER: + if jmpabs1target == jmpabs2target and self.code[jmpabs1target] == self.opc.FOR_ITER \ + and self.code[jmpabs1target-1] != self.opc.GET_ITER: destFor = self.get_target(jmpabs1target) if destFor == i+opsize+4: - setupLoop = self.last_instr(0, jmpabs1target, SETUP_LOOP) - standarFor = self.last_instr(setupLoop, jmpabs1target, GET_ITER) + setupLoop = self.last_instr(0, jmpabs1target, self.opc.SETUP_LOOP) + standarFor = self.last_instr(setupLoop, jmpabs1target, self.opc.GET_ITER) if standarFor is None: - self.restructJump(jmpabs1target, destFor+self.op_size(POP_BLOCK)) + self.restructJump(jmpabs1target, destFor+self.op_size(self.opc.POP_BLOCK)) toDel += [setupLoop, i+opsize+1, i+opsize+4] if len(toDel) > 0: return toDel return None # raise_varags not realy handle for the moment - if opcode == RAISE_VARARGS: - if self.code[i+opsize] == POP_TOP: + if opcode == self.opc.RAISE_VARARGS: + if self.code[i+opsize] == self.opc.POP_TOP: return [i+opsize] # modification of list structure - if opcode == BUILD_LIST: - if (self.code[i+opsize] == DUP_TOP and - self.code[i+opsize+1] in (STORE_NAME, STORE_FAST)): + if opcode == self.opc.BUILD_LIST: + if (self.code[i+opsize] == self.opc.DUP_TOP and + self.code[i+opsize+1] in (self.opc.STORE_NAME, self.opc.STORE_FAST)): # del DUP/STORE_NAME x toDel = [i+opsize, i+opsize+1] nameDel = self.get_argument(i+opsize+1) @@ -318,58 +315,58 @@ class Scanner26(scan.Scanner2): end = start # del LOAD_NAME x while end < len(self.code): - end = self.first_instr(end, len(self.code), (LOAD_NAME, LOAD_FAST)) + end = self.first_instr(end, len(self.code), (self.opc.LOAD_NAME, self.opc.LOAD_FAST)) if nameDel == self.get_argument(end): toDel += [end] break - if self.code[end] == LOAD_NAME: - end += self.op_size(LOAD_NAME) + if self.code[end] == self.opc.LOAD_NAME: + end += self.op_size(self.opc.LOAD_NAME) else: - end += self.op_size(LOAD_FAST) + end += self.op_size(self.opc.LOAD_FAST) # log JA/POP_TOP to del and update PJIF while start < end: - start = self.first_instr(start, end, (PJIF, PJIT)) + start = self.first_instr(start, end, self.pop_jump_if) if start is None: break target = self.get_target(start) - if self.code[target] == POP_TOP and self.code[target-3] == JA: + if self.code[target] == self.opc.POP_TOP and self.code[target-3] == self.opc.JA: toDel += [target, target-3] # update PJIF target = self.get_target(target-3) self.restructJump(start, target) - start += self.op_size(PJIF) + start += self.op_size(self.opc.PJIF) # del DELETE_NAME x start = end while end < len(self.code): end = self.first_instr(end, len(self.code), - (DELETE_NAME, DELETE_FAST)) + (self.opc.DELETE_NAME, self.opc.DELETE_FAST)) if nameDel == self.get_argument(end): toDel += [end] break - if self.code[end] == DELETE_NAME: - end += self.op_size(DELETE_NAME) + if self.code[end] == self.opc.DELETE_NAME: + end += self.op_size(self.opc.DELETE_NAME) else: - end += self.op_size(DELETE_FAST) + end += self.op_size(self.opc.DELETE_FAST) return toDel # for / while struct - if opcode == SETUP_LOOP: + if opcode == self.opc.SETUP_LOOP: # change join(for..) struct - if self.code[i+3] == LOAD_FAST and self.code[i+6] == FOR_ITER: - end = self.first_instr(i, len(self.code), RETURN_VALUE) - end = self.first_instr(i, end, YIELD_VALUE) - if end and self.code[end+1] == POP_TOP and self.code[end+2] == JA and self.code[end+5] == POP_BLOCK: + if self.code[i+3] == self.opc.LOAD_FAST and self.code[i+6] == self.opc.FOR_ITER: + end = self.first_instr(i, len(self.code), self.opc.RETURN_VALUE) + end = self.first_instr(i, end, self.opc.YIELD_VALUE) + if end and self.code[end+1] == self.opc.POP_TOP and self.code[end+2] == self.opc.JA and self.code[end+5] == self.opc.POP_BLOCK: return [i, end+5] # with stmt - if opcode == WITH_CLEANUP: - allRot = self.all_instr(0, i, (ROT_TWO)) + if opcode == self.opc.WITH_CLEANUP: + allRot = self.all_instr(0, i, (self.opc.ROT_TWO)) chckRot = -1 for rot in allRot: - if self.code[rot+1] == LOAD_ATTR and self.code[rot-3] == LOAD_ATTR \ - and self.code[rot-4] == DUP_TOP: + if self.code[rot+1] == self.opc.LOAD_ATTR and self.code[rot-3] == self.opc.LOAD_ATTR \ + and self.code[rot-4] == self.opc.DUP_TOP: chckRot = rot assert chckRot > 0 toDel = [chckRot-4, chckRot-3, chckRot] chckStp = -1 - allSetup = self.all_instr(chckRot+1, i, (SETUP_FINALLY)) + allSetup = self.all_instr(chckRot+1, i, (self.opc.SETUP_FINALLY)) for stp in allSetup: if i == self.get_target(stp): chckStp = stp @@ -379,16 +376,16 @@ class Scanner26(scan.Scanner2): while chckDel < chckStp-3: toDel += [chckDel] chckDel += self.op_size(self.code[chckDel]) - if (self.code[chckStp-3] in (STORE_NAME, STORE_FAST) and - self.code[chckStp+3] in (LOAD_NAME, LOAD_FAST) - and self.code[chckStp+6] in (DELETE_NAME, DELETE_FAST)): + if (self.code[chckStp-3] in (self.opc.STORE_NAME, self.opc.STORE_FAST) and + self.code[chckStp+3] in (self.opc.LOAD_NAME, self.opc.LOAD_FAST) + and self.code[chckStp+6] in (self.opc.DELETE_NAME, self.opc.DELETE_FAST)): toDel += [chckStp-3, chckStp+3, chckStp+6] # SETUP_WITH opcode dosen't exist in 2.6 but is necessary for the grammar - self.code[chckRot+1] = JUMP_ABSOLUTE # ugly hack + self.code[chckRot+1] = self.opc.JUMP_ABSOLUTE # ugly hack self.restructJump(chckRot+1, i) self.toChange.append(chckRot+1) return toDel - if opcode == NOP: + if opcode == self.opc.NOP: return [i] return None @@ -580,7 +577,7 @@ class Scanner26(scan.Scanner2): (line_no, next_line_byte) = self.lines[pos] jump_back = self.last_instr(start, end, self.opc.JA, next_line_byte, False) if (jump_back and jump_back != self.prev[end] - and code[jump_back + 3] in (self.opc.JA, self.opc.JF)): + and code[jump_back + 3] in self.jump_forward): if (code[self.prev[end]] == self.opc.RETURN_VALUE or code[self.prev[end]] == self.opc.POP_BLOCK and code[self.prev[self.prev[end]]] == self.opc.RETURN_VALUE): @@ -711,16 +708,16 @@ class Scanner26(scan.Scanner2): or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]]) and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], self.pop_jump_if, target)))): pass - elif code[pre[pre[rtarget]]] == RETURN_VALUE \ + elif code[pre[pre[rtarget]]] == self.opc.RETURN_VALUE \ and self.remove_mid_line_ifs([pos]) \ and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], - (PJIF, PJIT), target))) + self.pop_jump_if, target))) | set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], - (PJIF, PJIT, JA), pre[rtarget], True))))): + (self.opc.PJIF, self.opc.PJIT, self.opc.JA), pre[rtarget], True))))): pass else: fix = None - jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF) + jump_ifs = self.all_instr(start, self.next_stmt[pos], self.opc.PJIF) last_jump_good = True for j in jump_ifs: if target == self.get_target(j): @@ -731,7 +728,7 @@ class Scanner26(scan.Scanner2): last_jump_good = False self.fixed_jumps[pos] = fix or match[-1] return - elif pos < rtarget and code[target] == ROT_TWO: + elif pos < rtarget and code[target] == self.opc.ROT_TWO: self.fixed_jumps[pos] = target return else: @@ -739,19 +736,19 @@ class Scanner26(scan.Scanner2): return else: # op == PJIT if (pos+3) in self.load_asserts: - if code[pre[rtarget]] == RAISE_VARARGS: + if code[pre[rtarget]] == self.opc.RAISE_VARARGS: return self.load_asserts.remove(pos+3) next = self.next_stmt[pos] if pre[next] == pos: pass - elif code[next] in (JF, JA) and target == self.get_target(next): - if code[pre[next]] == PJIF: - if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE): + elif code[next] in self.jump_forward and target == self.get_target(next): + if code[pre[next]] == self.opc.PJIF: + if code[next] == self.opc.JF or target != rtarget or code[pre[pre[rtarget]]] not in (self.opc.JA, self.opc.RETURN_VALUE): self.fixed_jumps[pos] = pre[next] return - elif code[next] == JA and code[target] in (JA, JF) \ + elif code[next] == self.opc.JA and code[target] in self.opc.jump_foward \ and self.get_target(target) == self.get_target(next): self.fixed_jumps[pos] = pre[next] return @@ -759,9 +756,9 @@ class Scanner26(scan.Scanner2): if pos in self.ignore_if: return - if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \ + if code[pre[rtarget]] == self.opc.JA and pre[rtarget] in self.stmts \ and pre[rtarget] != pos and pre[pre[rtarget]] != pos \ - and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA): + and not (code[rtarget] == self.opc.JA and code[rtarget+3] == self.opc.POP_BLOCK and code[pre[pre[rtarget]]] != self.opc.JA): rtarget = pre[rtarget] # does the if jump just beyond a jump op, then this is probably an if statement if code[pre[rtarget]] in (self.opc.JA, self.opc.JF):