From a5d2237435ee51e681c73db9e7ea379d56456205 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 9 Dec 2016 21:10:10 -0500 Subject: [PATCH] Python 3.x else clause detection and.. - Strengthen verify check. - weak verification on Python 3.5 for now --- test/Makefile | 2 +- uncompyle6/parser.py | 3 +-- uncompyle6/parsers/parse3.py | 12 ++++++--- uncompyle6/scanners/scanner3.py | 45 ++++++++++++++++++++++++++------- uncompyle6/scanners/tok.py | 15 +++++++++++ uncompyle6/verify.py | 5 +++- 6 files changed, 66 insertions(+), 16 deletions(-) diff --git a/test/Makefile b/test/Makefile index 343d4c19..0a5e7df8 100644 --- a/test/Makefile +++ b/test/Makefile @@ -44,7 +44,7 @@ check-3.4: check-bytecode check-3.4-ok check-2.7-ok #: Run working tests from Python 3.5 check-3.5: check-bytecode - $(PYTHON) test_pythonlib.py --bytecode-3.5 --verify $(COMPILE) + $(PYTHON) test_pythonlib.py --bytecode-3.5 --weak-verify $(COMPILE) #: Run working tests from Python 3.6 check-3.6: check-bytecode diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 05b71188..6c473f78 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -264,8 +264,7 @@ class PythonParser(GenericASTBuilder): # Zero or more COME_FROMs # loops can have this - _come_from ::= _come_from COME_FROM - _come_from ::= + _come_from ::= COME_FROM* # Zero or one COME_FROM # And/or expressions have this diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 5d58845c..dcdbdc06 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -138,9 +138,15 @@ class Python3Parser(PythonParser): iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK COME_FROM_LOOP + # These are used to keep AST indices the same + jf_else ::= JUMP_FORWARD ELSE + ja_else ::= JUMP_ABSOLUTE ELSE + ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite COME_FROM + ifelsestmt ::= testexpr c_stmts_opt jf_else else_suite _come_from ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec + ifelsestmtc ::= testexpr c_stmts_opt ja_else else_suitec ifelsestmtr ::= testexpr return_if_stmts return_stmts @@ -367,14 +373,14 @@ class Python3Parser(PythonParser): ''' def p_expr3(self, args): - ''' + """ + conditional ::= expr jmp_false expr jf_else expr COME_FROM expr ::= LOAD_CLASSNAME # Python 3.4+ expr ::= LOAD_CLASSDEREF - # Python3 drops slice0..slice3 - ''' + """ @staticmethod def call_fn_name(token): diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index b6526cb8..651a5712 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -226,6 +226,14 @@ class Scanner3(Scanner): jump_idx += 1 pass pass + elif inst.offset in self.else_start: + end_offset = self.else_start[inst.offset] + tokens.append(Token('ELSE', + None, repr(end_offset), + offset='%s' % (inst.offset), + has_arg = True, opc=self.opc)) + + pass pattr = inst.argrepr opname = inst.opname @@ -425,6 +433,7 @@ class Scanner3(Scanner): self.fixed_jumps = {} self.ignore_if = set() self.build_statement_indices() + self.else_start = {} # Containers filled by detect_structure() self.not_continue = set() @@ -759,15 +768,28 @@ class Scanner3(Scanner): code[prev_op[prev_op[rtarget]]] != self.opc.JUMP_ABSOLUTE)): rtarget = prev_op[rtarget] - # Does the "if" jump just beyond a jump op, then this can be - # a block inside an "if" statement + # Does the "jump if" jump beyond a jump op? + # That is, we have something like: + # POP_JUMP_IF_FALSE HERE + # ... + # JUMP_FORWARD + # HERE: + # + # If so, this can be block inside an "if" statement + # or a conditional assignment like: + # x = 1 if x else 2 + # + # There are other contexts we may need to consider + # like whether the target is "END_FINALLY" + # or if the condition jump is to a forward location if self.is_jump_forward(prev_op[rtarget]): - if_end = self.get_target(prev_op[rtarget]) + rrtarget = prev_op[rtarget] + if_end = self.get_target(rrtarget) - # Is this a loop and not an "if" statement? - if ((if_end < prev_op[rtarget]) and + # If the jump target is back, we are looping + if (if_end < rrtarget and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)): - if(if_end > start): + if (if_end > start): return end = self.restrict_to_parent(if_end, parent) @@ -777,10 +799,13 @@ class Scanner3(Scanner): 'end': prev_op[rtarget]}) self.not_continue.add(prev_op[rtarget]) - if rtarget < end: - self.structs.append({'type': 'if-else', + if rtarget < end and ( + code[rtarget] != self.opc.END_FINALLY + and code[prev_op[rrtarget]] != self.opc.POP_EXCEPT): + self.structs.append({'type': 'else', 'start': rtarget, 'end': end}) + self.else_start[rtarget] = end elif code[prev_op[rtarget]] == self.opc.RETURN_VALUE: self.structs.append({'type': 'if-then', 'start': start, @@ -870,7 +895,9 @@ class Scanner3(Scanner): op = self.code[i] if op == self.opc.END_FINALLY: if count_END_FINALLY == count_SETUP_: - assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE) + assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, + JUMP_FORWARD, + RETURN_VALUE) self.not_continue.add(self.prev_op[i]) return self.prev_op[i] count_END_FINALLY += 1 diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index a260a24c..debb967a 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -43,6 +43,21 @@ class Token: else: return self.type == o + def __cmp__(self, o): + t = self.type # shortcut + if t == 'BUILD_TUPLE_0' and o.type == 'LOAD_CONST' and o.pattr == (): + return 0 + if t == 'COME_FROM' == o.type: + return 0 + if t == 'PRINT_ITEM_CONT' and o.type == 'PRINT_ITEM': + return 0 + if t == 'RETURN_VALUE' and o.type == 'RETURN_END_IF': + return 0 + if t == 'JUMP_IF_FALSE_OR_POP' and o.type == 'POP_JUMP_IF_FALSE': + return 0 + return (t == o.type) or self.pattr == o.pattr + + def __repr__(self): return str(self.type) diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 0063be7b..96521156 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -268,7 +268,7 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1], tokens2[idx2], tokens1, tokens2) - if tokens1[i1].type != tokens2[i2].type: + if tokens1[i1] != tokens2[i2]: if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type: i = 1 while tokens1[i1+i].type == 'LOAD_CONST': @@ -353,6 +353,9 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, if is_pypy: # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8: flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8 + # We also don't care about COROUTINE or GENERATOR for now + flags1 &= ~0x000000a0 + flags2 &= ~0x000000a0 if flags1 != flags2: raise CmpErrorMember(name, 'co_flags', pretty_flags(flags1),