diff --git a/test/bytecode_2.6/05_unicode_literals.pyc b/test/bytecode_2.6/05_unicode_literals.pyc new file mode 100644 index 00000000..6e843727 Binary files /dev/null and b/test/bytecode_2.6/05_unicode_literals.pyc differ diff --git a/test/bytecode_2.7/05_unicode_literals.pyc b/test/bytecode_2.7/05_unicode_literals.pyc new file mode 100644 index 00000000..0d8041f1 Binary files /dev/null and b/test/bytecode_2.7/05_unicode_literals.pyc differ diff --git a/test/bytecode_3.0/05_unicode_literals.pyc b/test/bytecode_3.0/05_unicode_literals.pyc new file mode 100644 index 00000000..a8dc2410 Binary files /dev/null and b/test/bytecode_3.0/05_unicode_literals.pyc differ diff --git a/test/bytecode_3.1/05_unicode_literals.pyc b/test/bytecode_3.1/05_unicode_literals.pyc new file mode 100644 index 00000000..f35c74e8 Binary files /dev/null and b/test/bytecode_3.1/05_unicode_literals.pyc differ diff --git a/test/simple_source/stmts/05_unicode_literals.py b/test/simple_source/stmts/05_unicode_literals.py new file mode 100644 index 00000000..a6215597 --- /dev/null +++ b/test/simple_source/stmts/05_unicode_literals.py @@ -0,0 +1,8 @@ +from __future__ import unicode_literals + +# __future__ unicode_literals changes the way we need to print +# the below +# In Python assembler code "a" is u"a" and b"a" is "a". +a = "a" +ba = b"a" +bb = b"b" diff --git a/test/stdlib/runtests.sh b/test/stdlib/runtests.sh index e0d58906..45f51132 100755 --- a/test/stdlib/runtests.sh +++ b/test/stdlib/runtests.sh @@ -55,15 +55,13 @@ case $PYVERSION in [test_codecs.py]=1 # need to fix tryelse [test_coercion.py]=1 # Control flow? [test_cookielib.py]=1 # Control flow? - [test_contextlib.py]=1 # decorators - [test_decorators.py]=1 # decorators + [test_decorators.py]=1 # Syntax Error - look at [test_enumerate.py]=1 # Control flow? [test_file.py]=1 # Control flow? [test_format.py]=1 # Control flow? [test_frozen.py]=1 # Control flow? [test_ftplib.py]=1 # Control flow? [test_funcattrs.py]=1 # Control flow? - [test_future4.py]=1 # Possible additional rule for future mechanism? [test_grp.py]=1 # Long test - might work Control flow? [test_pwd.py]=1 # Long test - might work? Control flow? [test_queue.py]=1 # Control flow? @@ -71,14 +69,11 @@ case $PYVERSION in ;; 2.7) SKIP_TESTS=( - [test_builtin.py]=1 - [test_contextlib.py]=1 # decorators - [test_decorators.py]=1 # decorators - [test_descr.py]=1 # syntax error look at + [test_builtin.py]=1 # Syntax error, look at [test_dis.py]=1 # We change line numbers - duh! - [test_future4.py]=1 # Possible additional rule for future mechanism? [test_grammar.py]=1 # Too many stmts. Handle large stmts - [test_importlib.py]=1 # Control flow? + [test_ioctl.py]=1 # Test takes too long to run + [test_itertools.py]=1 # Syntax error - look at! ) ;; *) @@ -108,7 +103,7 @@ if [[ -n $1 ]] ; then files=$1 SKIP_TESTS=() else - files=test_*.py + files=test_[m]*.py fi for file in $files; do [[ -v SKIP_TESTS[$file] ]] && continue diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index a6e3a2dc..84cd8629 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -214,6 +214,10 @@ class SourceWalker(GenericASTTraversal, object): self.line_number = 0 self.ast_errors = [] + # This is in Python 2.6 on. It changes the way + # strings get interpreted. See n_LOAD_CONST + self.FUTURE_UNICODE_LITERALS = False + # Sometimes we may want to continue decompiling when there are errors # and sometimes not self.tolerate_errors = tolerate_errors @@ -644,7 +648,8 @@ class SourceWalker(GenericASTTraversal, object): if self.pending_newlines: out = out[:-self.pending_newlines] - if isinstance(out, str) and not PYTHON3: + if (isinstance(out, str) and + not (PYTHON3 or self.FUTURE_UNICODE_LITERALS)): out = unicode(out, 'utf-8') self.f.write(out) @@ -843,6 +848,27 @@ class SourceWalker(GenericASTTraversal, object): self.write('None') elif isinstance(data, tuple): self.pp_tuple(data) + elif self.FUTURE_UNICODE_LITERALS: + # The FUTURE_UNICODE_LITERALS compiler flag + # in 2.6 on change the way + # strings are interpreted: + # u'xxx' -> 'xxx' + # xxx' -> b'xxx' + if not PYTHON3 and isinstance(data, unicode): + try: + try: + data = str(data) + except UnicodeEncodeError: + # Have to keep data as it is: in Unicode. + pass + self.write(repr(data)) + except: + from trepan.api import debug; debug() + self.write(repr(data)) + elif isinstance(data, str): + self.write('b'+repr(data)) + else: + self.write(repr(data)) else: self.write(repr(data)) # LOAD_CONST is a terminal, so stop processing/recursing early @@ -1592,7 +1618,7 @@ class SourceWalker(GenericASTTraversal, object): n_classdefdeco2 = n_classdef def print_super_classes(self, node): - if not (node == 'list'): + if not (node == 'tuple'): return n_subclasses = len(node[:-1]) @@ -2382,6 +2408,9 @@ def deparse_code(version, co, out=sys.stdout, showasm=None, showast=False, except: pass + deparsed.FUTURE_UNICODE_LITERALS = ( + COMPILER_FLAG_BIT['FUTURE_UNICODE_LITERALS'] & co.co_flags != 0) + # What we've been waiting for: Generate source from AST! deparsed.gen_source(deparsed.ast, co.co_name, customize)