You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 00:45:53 +08:00
Some reorg and Lame attempt to disasmbiguate "and" and "if .. if"...
More work is needed.
This commit is contained in:
@@ -241,6 +241,7 @@ case $PYVERSION in
|
|||||||
SKIP_TESTS=(
|
SKIP_TESTS=(
|
||||||
[test_atexit.py]=1 #
|
[test_atexit.py]=1 #
|
||||||
[test_bdb.py]=1 #
|
[test_bdb.py]=1 #
|
||||||
|
[test_buffer.py]=1 # parse error
|
||||||
[test_builtin.py]=1 # Fails on its own
|
[test_builtin.py]=1 # Fails on its own
|
||||||
[test_compile.py]=1
|
[test_compile.py]=1
|
||||||
[test_contains.py]=1 # Code "while False: yield None" is optimized away in compilation
|
[test_contains.py]=1 # Code "while False: yield None" is optimized away in compilation
|
||||||
|
@@ -0,0 +1,11 @@
|
|||||||
|
"""Here we have parser grammars for the different Python versions.
|
||||||
|
Instead of full grammars, we have full grammars for certain Python versions
|
||||||
|
and the others indicate differences between a neighboring version.
|
||||||
|
|
||||||
|
For example Python 2.6, 2.7, 3.2, and 3.7 are largely "base" versions
|
||||||
|
which work off off parse2.py, parse3.py, and parse37base.py.
|
||||||
|
|
||||||
|
Some examples:
|
||||||
|
Python 3.3 diffs off of 3.2; 3.1 and 3.0 diff off of 3.2; Python 1.0..Python 2.5 diff off of
|
||||||
|
Python 2.6 and Python 3.8 diff off of 3.7
|
||||||
|
"""
|
||||||
|
@@ -1459,6 +1459,7 @@ class Python3Parser(PythonParser):
|
|||||||
self.check_reduce["while1stmt"] = "noAST"
|
self.check_reduce["while1stmt"] = "noAST"
|
||||||
self.check_reduce["while1elsestmt"] = "noAST"
|
self.check_reduce["while1elsestmt"] = "noAST"
|
||||||
self.check_reduce["ifelsestmt"] = "AST"
|
self.check_reduce["ifelsestmt"] = "AST"
|
||||||
|
self.check_reduce["ifstmt"] = "AST"
|
||||||
self.check_reduce["annotate_tuple"] = "noAST"
|
self.check_reduce["annotate_tuple"] = "noAST"
|
||||||
if not PYTHON3:
|
if not PYTHON3:
|
||||||
self.check_reduce["kwarg"] = "noAST"
|
self.check_reduce["kwarg"] = "noAST"
|
||||||
@@ -1479,50 +1480,24 @@ class Python3Parser(PythonParser):
|
|||||||
elif lhs == "kwarg":
|
elif lhs == "kwarg":
|
||||||
arg = tokens[first].attr
|
arg = tokens[first].attr
|
||||||
return not (isinstance(arg, str) or isinstance(arg, unicode))
|
return not (isinstance(arg, str) or isinstance(arg, unicode))
|
||||||
elif lhs == "while1elsestmt":
|
elif rule == ("ifstmt", ("testexpr", "_ifstmts_jump")):
|
||||||
|
condition_jump = ast[0].last_child()
|
||||||
n = len(tokens)
|
if condition_jump.kind.startswith("POP_JUMP_IF"):
|
||||||
if last == n:
|
condition_jump2 = tokens[min(last-1, len(tokens)-1)]
|
||||||
# Adjust for fuzziness in parsing
|
if condition_jump2.kind.startswith("POP_JUMP_IF"):
|
||||||
|
return condition_jump.attr == condition_jump2.attr
|
||||||
|
# if condition_jump.attr < condition_jump2.off2int():
|
||||||
|
# print("XXX", first, last)
|
||||||
|
# for t in range(first, last): print(tokens[t])
|
||||||
|
# from trepan.api import debug; debug()
|
||||||
|
# return condition_jump.attr < condition_jump2.off2int()
|
||||||
|
return False
|
||||||
|
elif lhs == "ifelsestmt" and rule[1][2] == "jump_forward_else":
|
||||||
|
last = min(last, len(tokens)-1)
|
||||||
|
if tokens[last].off2int() == -1:
|
||||||
last -= 1
|
last -= 1
|
||||||
|
jump_forward_else = ast[2]
|
||||||
if tokens[last] == "COME_FROM_LOOP":
|
return tokens[first].off2int() <= jump_forward_else[0].attr < tokens[last].off2int()
|
||||||
last -= 1
|
|
||||||
elif tokens[last - 1] == "COME_FROM_LOOP":
|
|
||||||
last -= 2
|
|
||||||
if tokens[last] in ("JUMP_BACK", "CONTINUE"):
|
|
||||||
# These indicate inside a loop, but token[last]
|
|
||||||
# should not be in a loop.
|
|
||||||
# FIXME: Not quite right: refine by using target
|
|
||||||
return True
|
|
||||||
|
|
||||||
# if SETUP_LOOP target spans the else part, then this is
|
|
||||||
# not while1else. Also do for whileTrue?
|
|
||||||
last += 1
|
|
||||||
while last < n and isinstance(tokens[last].offset, str):
|
|
||||||
last += 1
|
|
||||||
if last == n:
|
|
||||||
return False
|
|
||||||
# 3.8+ Doesn't have SETUP_LOOP
|
|
||||||
return self.version < 3.8 and tokens[first].attr > tokens[last].offset
|
|
||||||
|
|
||||||
elif rule == (
|
|
||||||
"try_except",
|
|
||||||
(
|
|
||||||
"SETUP_EXCEPT",
|
|
||||||
"suite_stmts_opt",
|
|
||||||
"POP_BLOCK",
|
|
||||||
"except_handler",
|
|
||||||
"opt_come_from_except",
|
|
||||||
),
|
|
||||||
):
|
|
||||||
come_from_except = ast[-1]
|
|
||||||
if come_from_except[0] == "COME_FROM":
|
|
||||||
# There should be at last two COME_FROMs, one from an
|
|
||||||
# exception handler and one from the try. Otherwise
|
|
||||||
# we have a try/else.
|
|
||||||
return True
|
|
||||||
pass
|
|
||||||
elif lhs == "while1stmt":
|
elif lhs == "while1stmt":
|
||||||
|
|
||||||
# If there is a fall through to the COME_FROM_LOOP, then this is
|
# If there is a fall through to the COME_FROM_LOOP, then this is
|
||||||
@@ -1571,6 +1546,50 @@ class Python3Parser(PythonParser):
|
|||||||
if offset != tokens[first].attr:
|
if offset != tokens[first].attr:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
elif lhs == "while1elsestmt":
|
||||||
|
|
||||||
|
n = len(tokens)
|
||||||
|
if last == n:
|
||||||
|
# Adjust for fuzziness in parsing
|
||||||
|
last -= 1
|
||||||
|
|
||||||
|
if tokens[last] == "COME_FROM_LOOP":
|
||||||
|
last -= 1
|
||||||
|
elif tokens[last - 1] == "COME_FROM_LOOP":
|
||||||
|
last -= 2
|
||||||
|
if tokens[last] in ("JUMP_BACK", "CONTINUE"):
|
||||||
|
# These indicate inside a loop, but token[last]
|
||||||
|
# should not be in a loop.
|
||||||
|
# FIXME: Not quite right: refine by using target
|
||||||
|
return True
|
||||||
|
|
||||||
|
# if SETUP_LOOP target spans the else part, then this is
|
||||||
|
# not while1else. Also do for whileTrue?
|
||||||
|
last += 1
|
||||||
|
while last < n and isinstance(tokens[last].offset, str):
|
||||||
|
last += 1
|
||||||
|
if last == n:
|
||||||
|
return False
|
||||||
|
# 3.8+ Doesn't have SETUP_LOOP
|
||||||
|
return self.version < 3.8 and tokens[first].attr > tokens[last].offset
|
||||||
|
|
||||||
|
elif rule == (
|
||||||
|
"try_except",
|
||||||
|
(
|
||||||
|
"SETUP_EXCEPT",
|
||||||
|
"suite_stmts_opt",
|
||||||
|
"POP_BLOCK",
|
||||||
|
"except_handler",
|
||||||
|
"opt_come_from_except",
|
||||||
|
),
|
||||||
|
):
|
||||||
|
come_from_except = ast[-1]
|
||||||
|
if come_from_except[0] == "COME_FROM":
|
||||||
|
# There should be at last two COME_FROMs, one from an
|
||||||
|
# exception handler and one from the try. Otherwise
|
||||||
|
# we have a try/else.
|
||||||
|
return True
|
||||||
|
pass
|
||||||
elif rule == (
|
elif rule == (
|
||||||
"ifelsestmt",
|
"ifelsestmt",
|
||||||
(
|
(
|
||||||
@@ -1586,12 +1605,6 @@ class Python3Parser(PythonParser):
|
|||||||
if not isinstance(come_froms, Token):
|
if not isinstance(come_froms, Token):
|
||||||
return tokens[first].offset > come_froms[-1].attr
|
return tokens[first].offset > come_froms[-1].attr
|
||||||
return False
|
return False
|
||||||
elif lhs == "ifelsestmt" and rule[1][2] == "jump_forward_else":
|
|
||||||
last = min(last, len(tokens)-1)
|
|
||||||
if tokens[last].off2int() == -1:
|
|
||||||
last -= 1
|
|
||||||
jump_forward_else = ast[2]
|
|
||||||
return tokens[first].off2int() <= jump_forward_else[0].attr < tokens[last].off2int()
|
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@@ -60,5 +60,13 @@ class SyntaxTree(spark_AST):
|
|||||||
child = self[0]
|
child = self[0]
|
||||||
if not isinstance(child, SyntaxTree):
|
if not isinstance(child, SyntaxTree):
|
||||||
return child
|
return child
|
||||||
return self[0].first_child()
|
return child.first_child()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def last_child(self):
|
||||||
|
if len(self) > 0:
|
||||||
|
child = self[-1]
|
||||||
|
if not isinstance(child, SyntaxTree):
|
||||||
|
return child
|
||||||
|
return child.last_child()
|
||||||
return self
|
return self
|
||||||
|
@@ -0,0 +1,29 @@
|
|||||||
|
"""Here we have "scanners" for the different Python versions.
|
||||||
|
"scanner" is a compiler-centric term, but it is really a bit different from
|
||||||
|
a traditional compiler scanner/lexer.
|
||||||
|
|
||||||
|
Here we start out with text disasembly and change that to be more
|
||||||
|
ameanable to parsing in which we look only at the opcode name, and not
|
||||||
|
and instruction's operand.
|
||||||
|
|
||||||
|
In some cases this is done by changing the opcode name. For example
|
||||||
|
"LOAD_CONST" it customized based on the type of its operand into
|
||||||
|
"LOAD_ASSERT", "LOAD_CODE", "LOAD_STR".
|
||||||
|
|
||||||
|
instructions that take a variable number of arguments will have the argument count
|
||||||
|
suffixed to the opcode name. "CALL", "MAKE_FUNCTION", "BUILD_TUPLE", "BUILD_LIST",
|
||||||
|
work this way for example
|
||||||
|
|
||||||
|
We also add pseudo instructions like "COME_FROM" which have an operand
|
||||||
|
|
||||||
|
Instead of full grammars, we have full grammars for certain Python versions
|
||||||
|
and the others indicate differences between a neighboring version.
|
||||||
|
|
||||||
|
For example Python 2.6, 2.7, 3.2, and 3.7 are largely "base" versions
|
||||||
|
which work off of scanner2.py, scanner3.py, and scanner37base.py.
|
||||||
|
|
||||||
|
Some examples:
|
||||||
|
Python 3.3 diffs off of 3.2; 3.1 and 3.0 diff off of 3.2; Python 1.0..Python 2.5 diff off of
|
||||||
|
Python 2.6 and Python 3.8 diff off of 3.7
|
||||||
|
|
||||||
|
"""
|
||||||
|
Reference in New Issue
Block a user