From dea17cd7f18f8e0916b085c53ea443a8c8a6223c Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 00:38:22 -0500 Subject: [PATCH 1/2] xdis _disassemble->disassemble --- uncompyle6/scanners/scanner2.py | 2 +- uncompyle6/scanners/scanner26.py | 2 +- uncompyle6/scanners/scanner3.py | 2 +- uncompyle6/scanners/scanner36.py | 341 ++++++++++++++++++++++++++++++- 4 files changed, 343 insertions(+), 4 deletions(-) diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 538666d6..a006088e 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -91,7 +91,7 @@ class Scanner2(Scanner): from xdis.bytecode import Bytecode bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): - print(instr._disassemble()) + print(instr.disassemble()) # list of tokens/instructions tokens = [] diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 4936273c..b2d49b02 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -93,7 +93,7 @@ class Scanner26(scan.Scanner2): from xdis.bytecode import Bytecode bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): - print(instr._disassemble()) + print(instr.disassemble()) # Container for tokens tokens = [] diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index fb531367..e511a925 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -162,7 +162,7 @@ class Scanner3(Scanner): if show_asm in ('both', 'before'): bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): - print(instr._disassemble()) + print(instr.disassemble()) # list of tokens/instructions tokens = [] diff --git a/uncompyle6/scanners/scanner36.py b/uncompyle6/scanners/scanner36.py index 08746908..d3b5f862 100644 --- a/uncompyle6/scanners/scanner36.py +++ b/uncompyle6/scanners/scanner36.py @@ -13,6 +13,12 @@ from __future__ import print_function from uncompyle6.scanners.scanner3 import Scanner3 +from uncompyle6.scanner import Token, parse_fn_counts +from xdis.code import iscode +from xdis.bytecode import Bytecode +import xdis +from array import array + # bytecode verification, verify(), uses JUMP_OPS from here from xdis.opcodes import opcode_36 as opc JUMP_OPS = opc.JUMP_OPS @@ -24,7 +30,7 @@ class Scanner36(Scanner3): return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm) + tokens, customize = self.ingest_internal(co, classname, code_objects, show_asm) for t in tokens: # The lowest bit of flags indicates whether the # var-keyword argument is placed at the top of the stack @@ -40,6 +46,339 @@ class Scanner36(Scanner3): pass return tokens, customize + def ingest_internal(self, co, classname=None, code_objects={}, show_asm=None): + """ + Pick out tokens from an uncompyle6 code object, and transform them, + returning a list of uncompyle6 'Token's. + + The transformations are made to assist the deparsing grammar. + Specificially: + - various types of LOAD_CONST's are categorized in terms of what they load + - COME_FROM instructions are added to assist parsing control structures + - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + + Also, when we encounter certain tokens, we add them to a set which will cause custom + grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST + cause specific rules for the specific number of arguments they take. + """ + + # FIXME: remove this when all subsidiary functions have been removed. + # We should be able to get everything from the self.insts list. + self.code = array('B', co.co_code) + + show_asm = self.show_asm if not show_asm else show_asm + # show_asm = 'both' + if show_asm in ('both', 'before'): + bytecode = Bytecode(co, self.opc) + for instr in bytecode.get_instructions(co): + print(instr.disassemble()) + + # list of tokens/instructions + tokens = [] + + # "customize" is a dict whose keys are nonterminals + # and the value is the argument stack entries for that + # nonterminal. The count is a little hoaky. It is mostly + # not used, but sometimes it is. + customize = {} + if self.is_pypy: + customize['PyPy'] = 0 + + self.build_lines_data(co) + self.build_prev_op() + + bytecode = Bytecode(co, self.opc) + + # FIXME: put as its own method? + # Scan for assertions. Later we will + # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. + # 'LOAD_ASSERT' is used in assert statements. + self.load_asserts = set() + self.insts = list(bytecode) + n = len(self.insts) + for i, inst in enumerate(self.insts): + # We need to detect the difference between + # "raise AssertionError" and "assert" + # If we have a JUMP_FORWARD after the + # RAISE_VARARGS then we have a "raise" statement + # else we have an "assert" statement. + if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: + next_inst = self.insts[i+1] + if (next_inst.opname == 'LOAD_GLOBAL' and + next_inst.argval == 'AssertionError'): + if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')): + self.load_asserts.add(next_inst.offset) + pass + pass + + # Get jump targets + # Format: {target offset: [jump offsets]} + jump_targets = self.find_jump_targets(show_asm) + # print("XXX2", jump_targets) + last_op_was_break = False + + for i, inst in enumerate(bytecode): + + argval = inst.argval + op = inst.opcode + if op == self.opc.EXTENDED_ARG: + continue + + if inst.offset in jump_targets: + jump_idx = 0 + # We want to process COME_FROMs to the same offset to be in *descending* + # offset order so we have the larger range or biggest instruction interval + # last. (I think they are sorted in increasing order, but for safety + # we sort them). That way, specific COME_FROM tags will match up + # properly. For example, a "loop" with an "if" nested in it should have the + # "loop" tag last so the grammar rule matches that properly. + for jump_offset in sorted(jump_targets[inst.offset], reverse=True): + come_from_name = 'COME_FROM' + opname = self.opname_for_offset(jump_offset) + if opname.startswith('SETUP_'): + come_from_type = opname[len('SETUP_'):] + come_from_name = 'COME_FROM_%s' % come_from_type + pass + elif inst.offset in self.except_targets: + come_from_name = 'COME_FROM_EXCEPT_CLAUSE' + tokens.append(Token(come_from_name, + None, repr(jump_offset), + offset='%s_%s' % (inst.offset, jump_idx), + has_arg = True, opc=self.opc)) + jump_idx += 1 + pass + pass + elif inst.offset in self.else_start: + end_offset = self.else_start[inst.offset] + tokens.append(Token('ELSE', + None, repr(end_offset), + offset='%s' % (inst.offset), + has_arg = True, opc=self.opc)) + + pass + + pattr = inst.argrepr + opname = inst.opname + + if opname in ['LOAD_CONST']: + const = argval + if iscode(const): + if const.co_name == '': + opname = 'LOAD_LAMBDA' + elif const.co_name == '': + opname = 'LOAD_GENEXPR' + elif const.co_name == '': + opname = 'LOAD_DICTCOMP' + elif const.co_name == '': + opname = 'LOAD_SETCOMP' + elif const.co_name == '': + opname = 'LOAD_LISTCOMP' + # verify() uses 'pattr' for comparison, since 'attr' + # now holds Code(const) and thus can not be used + # for comparison (todo: think about changing this) + # pattr = 'code_object @ 0x%x %s->%s' %\ + # (id(const), const.co_filename, const.co_name) + pattr = '' + else: + pattr = const + pass + elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'): + if self.version >= 3.6: + # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION' + flags = argval + opname = 'MAKE_FUNCTION_%d' % (flags) + attr = [] + for flag in self.MAKE_FUNCTION_FLAGS: + bit = flags & 1 + if bit: + if pattr: + pattr += ", " + flag + else: + pattr += flag + attr.append(bit) + flags >>= 1 + attr = attr[:4] # remove last value: attr[5] == False + else: + pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval) + pattr = ("%d positional, %d keyword pair, %d annotated" % + (pos_args, name_pair_args, annotate_args)) + if name_pair_args > 0: + opname = '%s_N%d' % (opname, name_pair_args) + pass + if annotate_args > 0: + opname = '%s_A_%d' % (opname, annotate_args) + pass + opname = '%s_%d' % (opname, pos_args) + attr = (pos_args, name_pair_args, annotate_args) + tokens.append( + Token( + opname = opname, + attr = attr, + pattr = pattr, + offset = inst.offset, + linestart = inst.starts_line, + op = op, + has_arg = inst.has_arg, + opc = self.opc + ) + ) + continue + elif op in self.varargs_ops: + pos_args = argval + if self.is_pypy and not pos_args and opname == 'BUILD_MAP': + opname = 'BUILD_MAP_n' + else: + opname = '%s_%d' % (opname, pos_args) + elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'): + # The value in the dict is in special cases in semantic actions, such + # as CALL_FUNCTION. The value is not used in these cases, so we put + # in arbitrary value 0. + customize[opname] = 0 + elif opname == 'UNPACK_EX': + # FIXME: try with scanner and parser by + # changing argval + before_args = argval & 0xFF + after_args = (argval >> 8) & 0xff + pattr = "%d before vararg, %d after" % (before_args, after_args) + argval = (before_args, after_args) + opname = '%s_%d+%d' % (opname, before_args, after_args) + + elif op == self.opc.JUMP_ABSOLUTE: + # Further classify JUMP_ABSOLUTE into backward jumps + # which are used in loops, and "CONTINUE" jumps which + # may appear in a "continue" statement. The loop-type + # and continue-type jumps will help us classify loop + # boundaries The continue-type jumps help us get + # "continue" statements with would otherwise be turned + # into a "pass" statement because JUMPs are sometimes + # ignored in rules as just boundary overhead. In + # comprehensions we might sometimes classify JUMP_BACK + # as CONTINUE, but that's okay since we add a grammar + # rule for that. + pattr = argval + # FIXME: 0 isn't always correct + target = self.get_target(inst.offset, 0) + if target <= inst.offset: + next_opname = self.opname[self.code[inst.offset+3]] + if (inst.offset in self.stmts and + (self.version != 3.0 or (hasattr(inst, 'linestart'))) and + (next_opname not in ('END_FINALLY', 'POP_BLOCK', + # Python 3.0 only uses POP_TOP + 'POP_TOP'))): + opname = 'CONTINUE' + else: + opname = 'JUMP_BACK' + # FIXME: this is a hack to catch stuff like: + # if x: continue + # the "continue" is not on a new line. + # There are other situations where we don't catch + # CONTINUE as well. + if tokens[-1].kind == 'JUMP_BACK' and tokens[-1].attr <= argval: + if tokens[-2].kind == 'BREAK_LOOP': + del tokens[-1] + else: + # intern is used because we are changing the *previous* token + tokens[-1].kind = intern('CONTINUE') + if last_op_was_break and opname == 'CONTINUE': + last_op_was_break = False + continue + elif op == self.opc.RETURN_VALUE: + if inst.offset in self.return_end_ifs: + opname = 'RETURN_END_IF' + elif inst.offset in self.load_asserts: + opname = 'LOAD_ASSERT' + + last_op_was_break = opname == 'BREAK_LOOP' + tokens.append( + Token( + opname = opname, + attr = argval, + pattr = pattr, + offset = inst.offset, + linestart = inst.starts_line, + op = op, + has_arg = inst.has_arg, + opc = self.opc + ) + ) + pass + + if show_asm in ('both', 'after'): + for t in tokens: + print(t) + print() + return tokens, customize + + def find_jump_targets(self, debug): + """ + Detect all offsets in a byte code which are jump targets + where we might insert a COME_FROM instruction. + + Return the list of offsets. + + Return the list of offsets. An instruction can be jumped + to in from multiple instructions. + """ + code = self.code + n = len(code) + self.structs = [{'type': 'root', + 'start': 0, + 'end': n-1}] + + # All loop entry points + self.loops = [] + + # Map fixed jumps to their real destination + self.fixed_jumps = {} + self.except_targets = {} + self.ignore_if = set() + self.build_statement_indices() + self.else_start = {} + + # Containers filled by detect_control_flow() + self.not_continue = set() + self.return_end_ifs = set() + self.setup_loop_targets = {} # target given setup_loop offset + self.setup_loops = {} # setup_loop offset given target + + targets = {} + extended_arg = 0 + for i, inst in enumerate(self.insts): + offset = inst.offset + op = inst.opcode + + self.detect_control_flow(offset, targets, extended_arg) + + if inst.has_arg: + label = self.fixed_jumps.get(offset) + oparg = inst.arg + next_offset = xdis.next_offset(op, self.opc, offset) + + if label is None: + if op in self.opc.hasjrel and op != self.opc.FOR_ITER: + label = next_offset + oparg + elif op in self.opc.hasjabs: + if op in self.jump_if_pop: + if oparg > offset: + label = oparg + + if label is not None and label != -1: + targets[label] = targets.get(label, []) + [offset] + elif op == self.opc.END_FINALLY and offset in self.fixed_jumps: + label = self.fixed_jumps[offset] + targets[label] = targets.get(label, []) + [offset] + pass + + extended_arg = 0 + pass # for loop + + # DEBUG: + if debug in ('both', 'after'): + import pprint as pp + pp.pprint(self.structs) + + return targets + pass if __name__ == "__main__": From 6dbdaedf7abda769daef2a19a0795c177d7b59a4 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 6 Nov 2017 00:45:04 -0500 Subject: [PATCH 2/2] Revert change that should have been in a branch --- uncompyle6/scanners/scanner36.py | 341 +------------------------------ 1 file changed, 1 insertion(+), 340 deletions(-) diff --git a/uncompyle6/scanners/scanner36.py b/uncompyle6/scanners/scanner36.py index d3b5f862..08746908 100644 --- a/uncompyle6/scanners/scanner36.py +++ b/uncompyle6/scanners/scanner36.py @@ -13,12 +13,6 @@ from __future__ import print_function from uncompyle6.scanners.scanner3 import Scanner3 -from uncompyle6.scanner import Token, parse_fn_counts -from xdis.code import iscode -from xdis.bytecode import Bytecode -import xdis -from array import array - # bytecode verification, verify(), uses JUMP_OPS from here from xdis.opcodes import opcode_36 as opc JUMP_OPS = opc.JUMP_OPS @@ -30,7 +24,7 @@ class Scanner36(Scanner3): return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - tokens, customize = self.ingest_internal(co, classname, code_objects, show_asm) + tokens, customize = Scanner3.ingest(self, co, classname, code_objects, show_asm) for t in tokens: # The lowest bit of flags indicates whether the # var-keyword argument is placed at the top of the stack @@ -46,339 +40,6 @@ class Scanner36(Scanner3): pass return tokens, customize - def ingest_internal(self, co, classname=None, code_objects={}, show_asm=None): - """ - Pick out tokens from an uncompyle6 code object, and transform them, - returning a list of uncompyle6 'Token's. - - The transformations are made to assist the deparsing grammar. - Specificially: - - various types of LOAD_CONST's are categorized in terms of what they load - - COME_FROM instructions are added to assist parsing control structures - - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. - """ - - # FIXME: remove this when all subsidiary functions have been removed. - # We should be able to get everything from the self.insts list. - self.code = array('B', co.co_code) - - show_asm = self.show_asm if not show_asm else show_asm - # show_asm = 'both' - if show_asm in ('both', 'before'): - bytecode = Bytecode(co, self.opc) - for instr in bytecode.get_instructions(co): - print(instr.disassemble()) - - # list of tokens/instructions - tokens = [] - - # "customize" is a dict whose keys are nonterminals - # and the value is the argument stack entries for that - # nonterminal. The count is a little hoaky. It is mostly - # not used, but sometimes it is. - customize = {} - if self.is_pypy: - customize['PyPy'] = 0 - - self.build_lines_data(co) - self.build_prev_op() - - bytecode = Bytecode(co, self.opc) - - # FIXME: put as its own method? - # Scan for assertions. Later we will - # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. - # 'LOAD_ASSERT' is used in assert statements. - self.load_asserts = set() - self.insts = list(bytecode) - n = len(self.insts) - for i, inst in enumerate(self.insts): - # We need to detect the difference between - # "raise AssertionError" and "assert" - # If we have a JUMP_FORWARD after the - # RAISE_VARARGS then we have a "raise" statement - # else we have an "assert" statement. - if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: - next_inst = self.insts[i+1] - if (next_inst.opname == 'LOAD_GLOBAL' and - next_inst.argval == 'AssertionError'): - if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')): - self.load_asserts.add(next_inst.offset) - pass - pass - - # Get jump targets - # Format: {target offset: [jump offsets]} - jump_targets = self.find_jump_targets(show_asm) - # print("XXX2", jump_targets) - last_op_was_break = False - - for i, inst in enumerate(bytecode): - - argval = inst.argval - op = inst.opcode - if op == self.opc.EXTENDED_ARG: - continue - - if inst.offset in jump_targets: - jump_idx = 0 - # We want to process COME_FROMs to the same offset to be in *descending* - # offset order so we have the larger range or biggest instruction interval - # last. (I think they are sorted in increasing order, but for safety - # we sort them). That way, specific COME_FROM tags will match up - # properly. For example, a "loop" with an "if" nested in it should have the - # "loop" tag last so the grammar rule matches that properly. - for jump_offset in sorted(jump_targets[inst.offset], reverse=True): - come_from_name = 'COME_FROM' - opname = self.opname_for_offset(jump_offset) - if opname.startswith('SETUP_'): - come_from_type = opname[len('SETUP_'):] - come_from_name = 'COME_FROM_%s' % come_from_type - pass - elif inst.offset in self.except_targets: - come_from_name = 'COME_FROM_EXCEPT_CLAUSE' - tokens.append(Token(come_from_name, - None, repr(jump_offset), - offset='%s_%s' % (inst.offset, jump_idx), - has_arg = True, opc=self.opc)) - jump_idx += 1 - pass - pass - elif inst.offset in self.else_start: - end_offset = self.else_start[inst.offset] - tokens.append(Token('ELSE', - None, repr(end_offset), - offset='%s' % (inst.offset), - has_arg = True, opc=self.opc)) - - pass - - pattr = inst.argrepr - opname = inst.opname - - if opname in ['LOAD_CONST']: - const = argval - if iscode(const): - if const.co_name == '': - opname = 'LOAD_LAMBDA' - elif const.co_name == '': - opname = 'LOAD_GENEXPR' - elif const.co_name == '': - opname = 'LOAD_DICTCOMP' - elif const.co_name == '': - opname = 'LOAD_SETCOMP' - elif const.co_name == '': - opname = 'LOAD_LISTCOMP' - # verify() uses 'pattr' for comparison, since 'attr' - # now holds Code(const) and thus can not be used - # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = '' - else: - pattr = const - pass - elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'): - if self.version >= 3.6: - # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION' - flags = argval - opname = 'MAKE_FUNCTION_%d' % (flags) - attr = [] - for flag in self.MAKE_FUNCTION_FLAGS: - bit = flags & 1 - if bit: - if pattr: - pattr += ", " + flag - else: - pattr += flag - attr.append(bit) - flags >>= 1 - attr = attr[:4] # remove last value: attr[5] == False - else: - pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval) - pattr = ("%d positional, %d keyword pair, %d annotated" % - (pos_args, name_pair_args, annotate_args)) - if name_pair_args > 0: - opname = '%s_N%d' % (opname, name_pair_args) - pass - if annotate_args > 0: - opname = '%s_A_%d' % (opname, annotate_args) - pass - opname = '%s_%d' % (opname, pos_args) - attr = (pos_args, name_pair_args, annotate_args) - tokens.append( - Token( - opname = opname, - attr = attr, - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line, - op = op, - has_arg = inst.has_arg, - opc = self.opc - ) - ) - continue - elif op in self.varargs_ops: - pos_args = argval - if self.is_pypy and not pos_args and opname == 'BUILD_MAP': - opname = 'BUILD_MAP_n' - else: - opname = '%s_%d' % (opname, pos_args) - elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'): - # The value in the dict is in special cases in semantic actions, such - # as CALL_FUNCTION. The value is not used in these cases, so we put - # in arbitrary value 0. - customize[opname] = 0 - elif opname == 'UNPACK_EX': - # FIXME: try with scanner and parser by - # changing argval - before_args = argval & 0xFF - after_args = (argval >> 8) & 0xff - pattr = "%d before vararg, %d after" % (before_args, after_args) - argval = (before_args, after_args) - opname = '%s_%d+%d' % (opname, before_args, after_args) - - elif op == self.opc.JUMP_ABSOLUTE: - # Further classify JUMP_ABSOLUTE into backward jumps - # which are used in loops, and "CONTINUE" jumps which - # may appear in a "continue" statement. The loop-type - # and continue-type jumps will help us classify loop - # boundaries The continue-type jumps help us get - # "continue" statements with would otherwise be turned - # into a "pass" statement because JUMPs are sometimes - # ignored in rules as just boundary overhead. In - # comprehensions we might sometimes classify JUMP_BACK - # as CONTINUE, but that's okay since we add a grammar - # rule for that. - pattr = argval - # FIXME: 0 isn't always correct - target = self.get_target(inst.offset, 0) - if target <= inst.offset: - next_opname = self.opname[self.code[inst.offset+3]] - if (inst.offset in self.stmts and - (self.version != 3.0 or (hasattr(inst, 'linestart'))) and - (next_opname not in ('END_FINALLY', 'POP_BLOCK', - # Python 3.0 only uses POP_TOP - 'POP_TOP'))): - opname = 'CONTINUE' - else: - opname = 'JUMP_BACK' - # FIXME: this is a hack to catch stuff like: - # if x: continue - # the "continue" is not on a new line. - # There are other situations where we don't catch - # CONTINUE as well. - if tokens[-1].kind == 'JUMP_BACK' and tokens[-1].attr <= argval: - if tokens[-2].kind == 'BREAK_LOOP': - del tokens[-1] - else: - # intern is used because we are changing the *previous* token - tokens[-1].kind = intern('CONTINUE') - if last_op_was_break and opname == 'CONTINUE': - last_op_was_break = False - continue - elif op == self.opc.RETURN_VALUE: - if inst.offset in self.return_end_ifs: - opname = 'RETURN_END_IF' - elif inst.offset in self.load_asserts: - opname = 'LOAD_ASSERT' - - last_op_was_break = opname == 'BREAK_LOOP' - tokens.append( - Token( - opname = opname, - attr = argval, - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line, - op = op, - has_arg = inst.has_arg, - opc = self.opc - ) - ) - pass - - if show_asm in ('both', 'after'): - for t in tokens: - print(t) - print() - return tokens, customize - - def find_jump_targets(self, debug): - """ - Detect all offsets in a byte code which are jump targets - where we might insert a COME_FROM instruction. - - Return the list of offsets. - - Return the list of offsets. An instruction can be jumped - to in from multiple instructions. - """ - code = self.code - n = len(code) - self.structs = [{'type': 'root', - 'start': 0, - 'end': n-1}] - - # All loop entry points - self.loops = [] - - # Map fixed jumps to their real destination - self.fixed_jumps = {} - self.except_targets = {} - self.ignore_if = set() - self.build_statement_indices() - self.else_start = {} - - # Containers filled by detect_control_flow() - self.not_continue = set() - self.return_end_ifs = set() - self.setup_loop_targets = {} # target given setup_loop offset - self.setup_loops = {} # setup_loop offset given target - - targets = {} - extended_arg = 0 - for i, inst in enumerate(self.insts): - offset = inst.offset - op = inst.opcode - - self.detect_control_flow(offset, targets, extended_arg) - - if inst.has_arg: - label = self.fixed_jumps.get(offset) - oparg = inst.arg - next_offset = xdis.next_offset(op, self.opc, offset) - - if label is None: - if op in self.opc.hasjrel and op != self.opc.FOR_ITER: - label = next_offset + oparg - elif op in self.opc.hasjabs: - if op in self.jump_if_pop: - if oparg > offset: - label = oparg - - if label is not None and label != -1: - targets[label] = targets.get(label, []) + [offset] - elif op == self.opc.END_FINALLY and offset in self.fixed_jumps: - label = self.fixed_jumps[offset] - targets[label] = targets.get(label, []) + [offset] - pass - - extended_arg = 0 - pass # for loop - - # DEBUG: - if debug in ('both', 'after'): - import pprint as pp - pp.pprint(self.structs) - - return targets - pass if __name__ == "__main__":