diff --git a/test/bytecode_2.7/06-list-ifnot.pyc b/test/bytecode_2.7/06-list-ifnot.pyc new file mode 100644 index 00000000..2f00b7ff Binary files /dev/null and b/test/bytecode_2.7/06-list-ifnot.pyc differ diff --git a/test/bytecode_2.7/06_list_ifnot.pyc b/test/bytecode_2.7/06_list_ifnot.pyc new file mode 100644 index 00000000..a89eeb20 Binary files /dev/null and b/test/bytecode_2.7/06_list_ifnot.pyc differ diff --git a/test/bytecode_3.5/06_list_ifnot.pyc b/test/bytecode_3.5/06_list_ifnot.pyc new file mode 100644 index 00000000..8ca0449e Binary files /dev/null and b/test/bytecode_3.5/06_list_ifnot.pyc differ diff --git a/test/bytecode_3.5/10-list-ifnot.pyc b/test/bytecode_3.5/10-list-ifnot.pyc deleted file mode 100644 index bb0842df..00000000 Binary files a/test/bytecode_3.5/10-list-ifnot.pyc and /dev/null differ diff --git a/test/simple_source/comprehension/06_list_ifnot.py b/test/simple_source/comprehension/06_list_ifnot.py new file mode 100644 index 00000000..18b66b4f --- /dev/null +++ b/test/simple_source/comprehension/06_list_ifnot.py @@ -0,0 +1,5 @@ +# Test semantic handling of +# [x for x in names2 if not y] +# Bug seen in Python 3 +names2 = [] +names = [x for x in names2 if not len(x)] diff --git a/test/simple_source/comprehension/10-list-ifnot.py b/test/simple_source/comprehension/10-list-ifnot.py deleted file mode 100644 index 4cd32a02..00000000 --- a/test/simple_source/comprehension/10-list-ifnot.py +++ /dev/null @@ -1,17 +0,0 @@ -# Test semantic handling of -# [x for x in names if not y] -import os - -def bug(dirname, pattern): - if not dirname: - if isinstance(pattern, bytes): - dirname = bytes(os.curdir, 'ASCII') - else: - dirname = os.curdir - try: - names = os.listdir(dirname) - except os.error: - return [] - if not _ishidden(pattern): - names = [x for x in names if not _ishidden(x)] - return fnmatch.filter(names, pattern) diff --git a/uncompyle6/scanners/dis3.py b/uncompyle6/scanners/dis3.py deleted file mode 100644 index b0580ca8..00000000 --- a/uncompyle6/scanners/dis3.py +++ /dev/null @@ -1,405 +0,0 @@ -"""Disassembler of Python byte code into mnemonics. -Extracted from Python 3 dis module but generalized to -allow running on Python 2. -""" - -# This part is modified for cross Python compatability -from xdis.opcodes.opcode_3x import * - -from dis import findlinestarts -import types -import collections -import io - -from uncompyle6 import PYTHON3 - -if PYTHON3: - def code2num(code, i): - return code[i] -else: - def code2num(code, i): - return ord(code[i]) - -_have_code = (types.MethodType, types.FunctionType, types.CodeType, type) - -def _try_compile(source, name): - """Attempts to compile the given source, first as an expression and - then as a statement if the first approach fails. - - Utility function to accept strings in functions that otherwise - expect code objects - """ - try: - c = compile(source, name, 'eval') - except SyntaxError: - c = compile(source, name, 'exec') - return c - -def dis(x=None): - """Disassemble classes, methods, functions, generators, or code. - """ - if x is None: - distb() - return - if hasattr(x, '__func__'): # Method - x = x.__func__ - if hasattr(x, '__code__'): # Function - x = x.__code__ - if hasattr(x, 'gi_code'): # Generator - x = x.gi_code - if hasattr(x, '__dict__'): # Class or module - items = sorted(x.__dict__.items()) - for name, x1 in items: - if isinstance(x1, _have_code): - print("Disassembly of %s:" % name, file) - try: - dis(x1, file) - except TypeError as msg: - print("Sorry:", msg) - print(file) - elif isinstance(x, (bytes, bytearray)): # Raw bytecode - _disassemble_bytes(x, file) - else: - raise TypeError("don't know how to disassemble %s objects" % - type(x).__name__) - -# The inspect module interrogates this dictionary to build its -# list of CO_* constants. It is also used by pretty_flags to -# turn the co_flags field into a human readable list. -COMPILER_FLAG_NAMES = { - 1: "OPTIMIZED", - 2: "NEWLOCALS", - 4: "VARARGS", - 8: "VARKEYWORDS", - 16: "NESTED", - 32: "GENERATOR", - 64: "NOFREE", - 128: "COROUTINE", - 256: "ITERABLE_COROUTINE", -} - -def pretty_flags(flags): - """Return pretty representation of code flags.""" - names = [] - for i in range(32): - flag = 1<") - if hasattr(x, 'co_code'): # Code object - return x - raise TypeError("don't know how to disassemble %s objects" % - type(x).__name__) - -def code_info(x): - """Formatted details of methods, functions, or code.""" - return _format_code_info(_get_code_object(x)) - -def _format_code_info(co): - lines = [] - lines.append("Name: %s" % co.co_name) - lines.append("Filename: %s" % co.co_filename) - lines.append("Argument count: %s" % co.co_argcount) - lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount) - lines.append("Number of locals: %s" % co.co_nlocals) - lines.append("Stack size: %s" % co.co_stacksize) - lines.append("Flags: %s" % pretty_flags(co.co_flags)) - if co.co_consts: - lines.append("Constants:") - for i_c in enumerate(co.co_consts): - lines.append("%4d: %r" % i_c) - if co.co_names: - lines.append("Names:") - for i_n in enumerate(co.co_names): - lines.append("%4d: %s" % i_n) - if co.co_varnames: - lines.append("Variable names:") - for i_n in enumerate(co.co_varnames): - lines.append("%4d: %s" % i_n) - if co.co_freevars: - lines.append("Free variables:") - for i_n in enumerate(co.co_freevars): - lines.append("%4d: %s" % i_n) - if co.co_cellvars: - lines.append("Cell variables:") - for i_n in enumerate(co.co_cellvars): - lines.append("%4d: %s" % i_n) - return "\n".join(lines) - -def show_code(co): - """Print details of methods, functions, or code to *file*. - - If *file* is not provided, the output is printed on stdout. - """ - print(code_info(co)) - -_Instruction = collections.namedtuple("_Instruction", - "opname opcode arg argval argrepr offset starts_line is_jump_target") - -class Instruction3(_Instruction): - """Details for a bytecode operation - - Defined fields: - opname - human readable name for operation - opcode - numeric code for operation - arg - numeric argument to operation (if any), otherwise None - argval - resolved arg value (if known), otherwise same as arg - argrepr - human readable description of operation argument - offset - start index of operation within bytecode sequence - starts_line - line started by this opcode (if any), otherwise None - is_jump_target - True if other code jumps to here, otherwise False - """ - - def _disassemble(self, lineno_width=3, mark_as_current=False): - """Format instruction details for inclusion in disassembly output - - *lineno_width* sets the width of the line number field (0 omits it) - *mark_as_current* inserts a '-->' marker arrow as part of the line - """ - fields = [] - # Column: Source code line number - if lineno_width: - if self.starts_line is not None: - lineno_fmt = "%%%dd" % lineno_width - fields.append(lineno_fmt % self.starts_line) - else: - fields.append(' ' * lineno_width) - # Column: Current instruction indicator - if mark_as_current: - fields.append('-->') - else: - fields.append(' ') - # Column: Jump target marker - if self.is_jump_target: - fields.append('>>') - else: - fields.append(' ') - # Column: Instruction offset from start of code sequence - fields.append(repr(self.offset).rjust(4)) - # Column: Opcode name - fields.append(opname.ljust(20)) - # Column: Opcode argument - if self.arg is not None: - fields.append(repr(self.arg).rjust(5)) - # Column: Opcode argument details - if self.argrepr: - fields.append('(' + self.argrepr + ')') - return ' '.join(fields).rstrip() - - ## FIXME: figure out how to do _disassemble passing in opnames - -def get_instructions(x, opnames, first_line=None): - """Iterator for the opcodes in methods, functions or code - - Generates a series of Instruction named tuples giving the details of - each operations in the supplied code. - - If *first_line* is not None, it indicates the line number that should - be reported for the first source line in the disassembled code. - Otherwise, the source line information (if any) is taken directly from - the disassembled code object. - """ - co = _get_code_object(x) - cell_names = co.co_cellvars + co.co_freevars - linestarts = dict(findlinestarts(co)) - if first_line is not None: - line_offset = first_line - co.co_firstlineno - else: - line_offset = 0 - return _get_instructions_bytes(co.co_code, opnames, co.co_varnames, co.co_names, - co.co_consts, cell_names, linestarts, - line_offset) - -def _get_const_info(const_index, const_list): - """Helper to get optional details about const references - - Returns the dereferenced constant and its repr if the constant - list is defined. - Otherwise returns the constant index and its repr(). - """ - argval = const_index - if const_list is not None: - argval = const_list[const_index] - - return argval, repr(argval) - -def _get_name_info(name_index, name_list): - """Helper to get optional details about named references - - Returns the dereferenced name as both value and repr if the name - list is defined. - Otherwise returns the name index and its repr(). - """ - argval = name_index - if name_list is not None: - argval = name_list[name_index] - argrepr = argval - else: - argrepr = repr(argval) - return argval, argrepr - -def _get_instructions_bytes(code, opnames, varnames=None, names=None, constants=None, - cells=None, linestarts=None, line_offset=0): - """Iterate over the instructions in a bytecode string. - - Generates a sequence of Instruction namedtuples giving the details of each - opcode. Additional information about the code's runtime environment - (e.g. variable names, constants) can be specified using optional - arguments. - - """ - labels = findlabels(code) - extended_arg = 0 - starts_line = None - # enumerate() is not an option, since we sometimes process - # multiple elements on a single pass through the loop - n = len(code) - i = 0 - while i < n: - op = code2num(code, i) - offset = i - if linestarts is not None: - starts_line = linestarts.get(i, None) - if starts_line is not None: - starts_line += line_offset - is_jump_target = i in labels - i = i+1 - arg = None - argval = None - argrepr = '' - if op >= HAVE_ARGUMENT: - arg = code2num(code, i) + code2num(code, i+1)*256 + extended_arg - extended_arg = 0 - i = i+2 - if op == EXTENDED_ARG: - extended_arg = arg*65536 - # Set argval to the dereferenced value of the argument when - # availabe, and argrepr to the string representation of argval. - # _disassemble_bytes needs the string repr of the - # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. - argval = arg - if op in hasconst: - argval, argrepr = _get_const_info(arg, constants) - elif op in hasname: - argval, argrepr = _get_name_info(arg, names) - elif op in hasjrel: - argval = i + arg - argrepr = "to " + repr(argval) - elif op in haslocal: - argval, argrepr = _get_name_info(arg, varnames) - elif op in hascompare: - argval = cmp_op[arg] - argrepr = argval - elif op in hasfree: - argval, argrepr = _get_name_info(arg, cells) - elif op in hasnargs: - argrepr = ("%d positional, %d keyword pair" % - (code2num(code, i-2), code2num(code, i-1))) - opname = opnames[op] - yield Instruction3(opname, op, - arg, argval, argrepr, - offset, starts_line, is_jump_target) - -def findlabels(code): - """Detect all offsets in a byte code which are jump targets. - - Return the list of offsets. - - """ - labels = [] - # enumerate() is not an option, since we sometimes process - # multiple elements on a single pass through the loop - n = len(code) - i = 0 - while i < n: - op = code2num(code, i) - i = i+1 - if op >= HAVE_ARGUMENT: - arg = code2num(code, i) + code2num(code, i+1)*256 - i = i+2 - label = -1 - if op in hasjrel: - label = i+arg - elif op in hasjabs: - label = arg - if label >= 0: - if label not in labels: - labels.append(label) - return labels - -class Bytecode: - """The bytecode operations of a piece of code - - Instantiate this with a function, method, string of code, or a code object - (as returned by compile()). - - Iterating over this yields the bytecode operations as Instruction instances. - """ - def __init__(self, x, opnames, first_line=None, current_offset=None): - self.codeobj = co = _get_code_object(x) - if first_line is None: - self.first_line = co.co_firstlineno - self._line_offset = 0 - else: - self.first_line = first_line - self._line_offset = first_line - co.co_firstlineno - self._cell_names = co.co_cellvars + co.co_freevars - self._linestarts = dict(findlinestarts(co)) - self._original_object = x - self.opnames = opnames - self.current_offset = current_offset - - def __iter__(self): - co = self.codeobj - return _get_instructions_bytes(co.co_code, self.opnames, co.co_varnames, co.co_names, - co.co_consts, self._cell_names, - self._linestarts, - line_offset=self._line_offset) - - def __repr__(self): - return "{}({!r})".format(self.__class__.__name__, - self._original_object) - - @classmethod - def from_traceback(cls, tb): - """ Construct a Bytecode from the given traceback """ - while tb.tb_next: - tb = tb.tb_next - return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti) - - def info(self): - """Return formatted information about the code object.""" - return _format_code_info(self.codeobj) - - def dis(self): - """Return a formatted view of the bytecode operations.""" - co = self.codeobj - if self.current_offset is not None: - offset = self.current_offset - else: - offset = -1 - with io.StringIO() as output: - _disassemble_bytes(co.co_code, varnames=co.co_varnames, - names=co.co_names, constants=co.co_consts, - cells=self._cell_names, - linestarts=self._linestarts, - line_offset=self._line_offset, - file=output, - lasti=offset) - return output.getvalue() diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 4bfbb66c..0ac3c93a 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -23,13 +23,11 @@ Finally we save token information. from __future__ import print_function -import dis -import uncompyle6.scanners.dis3 as dis3 - from collections import namedtuple from array import array from xdis.code import iscode +from xdis.bytecode import Bytecode, findlinestarts from uncompyle6.scanner import Token from uncompyle6 import PYTHON3 @@ -46,10 +44,7 @@ import uncompyle6.scanner as scan class Scanner3(scan.Scanner): def __init__(self, version): - if PYTHON3: - super().__init__(version) - else: - super(Scanner3, self).__init__(version) + super(Scanner3, self).__init__(version) def disassemble3(self, co, classname=None, code_objects={}): """ @@ -72,7 +67,7 @@ class Scanner3(scan.Scanner): self.build_lines_data(co) self.build_prev_op() - bytecode = dis3.Bytecode(co, self.opname) + bytecode = Bytecode(co, self.opc) # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those @@ -94,6 +89,7 @@ class Scanner3(scan.Scanner): jump_targets = self.find_jump_targets() for inst in bytecode: + if inst.offset in jump_targets: jump_idx = 0 for jump_offset in jump_targets[inst.offset]: @@ -192,7 +188,7 @@ class Scanner3(scan.Scanner): self.code = array('B', co.co_code) - bytecode = dis3.Bytecode(co, self.opname) + bytecode = Bytecode(co, self.opc) for inst in bytecode: pattr = inst.argrepr @@ -277,6 +273,7 @@ class Scanner3(scan.Scanner): extended_arg = 0 for offset in self.op_range(0, codelen): + # Add jump target tokens if offset in jump_targets: jump_idx = 0 @@ -421,7 +418,7 @@ class Scanner3(scan.Scanner): """ # Offset: lineno pairs, only for offsets which start line. # Locally we use list for more convenient iteration using indices - linestarts = list(dis.findlinestarts(code_obj)) + linestarts = list(findlinestarts(code_obj)) self.linestarts = dict(linestarts) # Plain set with offsets of first ops on line self.linestart_offsets = set(a for (a, _) in linestarts) @@ -464,7 +461,7 @@ class Scanner3(scan.Scanner): Return size of operator with its arguments for given opcode . """ - if op < dis.HAVE_ARGUMENT: + if op < self.opc.HAVE_ARGUMENT: return 1 else: return 3 diff --git a/uncompyle6/scanners/scanner35.py b/uncompyle6/scanners/scanner35.py index f5480e25..5358e059 100644 --- a/uncompyle6/scanners/scanner35.py +++ b/uncompyle6/scanners/scanner35.py @@ -12,11 +12,13 @@ from uncompyle6.scanners.scanner3 import Scanner3 # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes.opcode_35 import JUMP_OPs +import xdis class Scanner35(Scanner3): def __init__(self): - super(Scanner3, self).__init__(3.5) + super(Scanner35, self).__init__(3.5) + self.opc = xdis.opcodes.opcode_35 def disassemble(self, co, classname=None, code_objects={}): return self.disassemble3(co, classname, code_objects) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 4aac5dbc..5fb5e498 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -975,7 +975,7 @@ class SourceWalker(GenericASTTraversal, object): self.prec = 27 n = node[-1] assert n == 'list_iter' - # find innerst node + # find innermost node while n == 'list_iter': n = n[0] # recurse one step if n == 'list_for': n = n[3] @@ -1059,31 +1059,38 @@ class SourceWalker(GenericASTTraversal, object): self.customize(code._customize) ast = ast[0][0][0][0][0] - try: - n = ast[iter_index] - except: - from trepan.api import debug; debug() + n = ast[iter_index] assert n == 'list_iter' + ## FIXME: I'm not totally sure this is right. + # find innermost node + designator = None + list_if_node = None while n == 'list_iter': n = n[0] # recurse one step if n == 'list_for': - designator = n[2] + if n[2] == 'designator': + designator = n[2] n = n[3] elif n in ['list_if', 'list_if_not']: - # FIXME: just a guess - designator = n[1] + list_if_node = n[0] + if n[1] == 'designator': + designator = n[1] n = n[2] pass pass assert n == 'lc_body', ast + assert designator, "Couldn't find designator in list comprehension" self.preorder(n[0]) self.write(' for ') self.preorder(designator) self.write(' in ') self.preorder(node[-3]) + if list_if_node: + self.write(' if ') + self.preorder(list_if_node) self.prec = p def listcomprehension_walk2(self, node):