Use "co_consts" in docstring detection.

Note: this is an upheaval because we need to pass "code" or at least
"code.co_consts" to the docstring detection routine
This commit is contained in:
rocky
2020-07-21 10:31:07 -04:00
parent f62512dd65
commit a215ee2f00
15 changed files with 75 additions and 42 deletions

View File

@@ -4,7 +4,22 @@ about: Tell us about uncompyle6 bugs
---
<!-- __Note:__ Bugs are not for asking questions about a problem you are trying to solve that involve the use of uncompyle6 along the way, although I may be more tolerent of this if you sponsor the project. Also, the unless you are a sponsor of the project, it may take a while, maybe a week or so, before the bug report is noticed, let alone acted upon. To set expectations, some legitimate bugs can take years to fix, but they eventually do get fixed. Funding the project was added to address the problem that there are lots of people seeking help and reporting bugs, but few people who are willing or capable of providing help or fixing bugs. Have you read https://github.com/rocky/python-uncompyle6/blob/master/HOW-TO-REPORT-A-BUG.md ?
<!-- __Note:__ Bugs are not for asking questions about a problem you
are trying to solve that involve the use of uncompyle6 along the way,
although I may be more tolerent of this if you sponsor the project.
Also, the unless you are a sponsor of the project, it may take a
while, maybe a week or so, before the bug report is noticed, let alone
acted upon.
To set expectations, some legitimate bugs can take years
to fix, but they eventually do get fixed. Funding the project was
added to address the problem that there are lots of people seeking
help and reporting bugs, but few people who are willing or capable of
providing help or fixing bugs.
Finally, have you read https://github.com/rocky/python-uncompyle6/blob/master/HOW-TO-REPORT-A-BUG.md
?
Please remove any of the optional sections if they are not applicable.

View File

@@ -216,7 +216,8 @@ valid bytecode before trying this tool. This program can't decompile
Microsoft Windows EXE files created by Py2EXE_, although we can
probably decompile the code after you extract the bytecode
properly. Handling pathologically long lists of expressions or
statements is slow. We don't handle Cython_ or MicroPython_ which don't use bytecode.
statements is slow. We don't handle Cython_ or MicroPython which don't
use bytecode.
There are numerous bugs in decompilation. And that's true for every
other CPython decompiler I have encountered, even the ones that
@@ -263,7 +264,6 @@ See Also
.. _Cython: https://en.wikipedia.org/wiki/Cython
.. _MicroPython: https://micropotyon.org
.. _trepan: https://pypi.python.org/pypi/trepan2g
.. _compiler: https://pypi.python.org/pypi/spark_parser
.. _HISTORY: https://github.com/rocky/python-uncompyle6/blob/master/HISTORY.md

View File

@@ -3,15 +3,22 @@
"""
import os, sys, py_compile
assert len(sys.argv) >= 2
assert (2 <= len(sys.argv) <= 4)
version = sys.version[0:3]
vers = sys.version_info[:2]
if sys.argv[1] in ("--run", "-r"):
suffix = "_run"
py_source = sys.argv[2:]
i = 2
else:
suffix = ""
py_source = sys.argv[1:]
i = 1
try:
optimize = int(sys.argv[-1])
py_source = sys.argv[i:-1]
except:
optimize = 2
for path in py_source:
short = os.path.basename(path)
@@ -20,7 +27,7 @@ for path in py_source:
else:
cfile = "bytecode_%s%s/%s" % (version, suffix, short) + "c"
print("byte-compiling %s to %s" % (path, cfile))
optimize = 2
optimize = optimize
if vers > (3, 1):
py_compile.compile(path, cfile, optimize=optimize)
else:

Binary file not shown.

View File

@@ -0,0 +1,9 @@
# From 2.7.17 test_bdb.py
# The problem was detecting a docstring at the begining of the module
# It must be detected and change'd or else the "from __future__" below
# is invalid.
# Note that this has to be compiled with optimation < 2 or else optimization
# will remove the docstring
"""Rational, infinite-precision, real numbers."""
from __future__ import division

View File

@@ -623,7 +623,7 @@ class PythonParser(GenericASTBuilder):
"""
def parse(p, tokens, customize):
def parse(p, tokens, customize, code):
p.customize_grammar_rules(tokens, customize)
ast = p.parse(tokens)
# p.cleanup()
@@ -878,7 +878,7 @@ def python_parser(
# parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
# 'showstack': 'full'}
p = get_python_parser(version, parser_debug)
return parse(p, tokens, customize)
return parse(p, tokens, customize, co)
if __name__ == "__main__":

View File

@@ -142,7 +142,7 @@ def code_deparse_align(co, out=sys.stderr, version=None, is_pypy=None,
is_pypy = is_pypy)
isTopLevel = co.co_name == '<module>'
deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel)
deparsed.ast = deparsed.build_ast(tokens, customize, co, isTopLevel=isTopLevel)
assert deparsed.ast == 'stmts', 'Should have parsed grammar start'

View File

@@ -142,17 +142,12 @@ PASS = SyntaxTree(
)
ASSIGN_DOC_STRING = lambda doc_string, doc_load: SyntaxTree(
"stmt",
"assign",
[
SyntaxTree(
"assign",
[
SyntaxTree(
"expr", [Token(doc_load, pattr=doc_string, attr=doc_string)]
),
SyntaxTree("store", [Token("STORE_NAME", pattr="__doc__")]),
],
)
"expr", [Token(doc_load, pattr=doc_string, attr=doc_string)]
),
SyntaxTree("store", [Token("STORE_NAME", pattr="__doc__")]),
],
)

View File

@@ -90,7 +90,7 @@ def customize_for_version3(self, version):
code_obj = node[1].attr
assert iscode(code_obj)
code = Code(code_obj, self.scanner, self.currentclass)
ast = self.build_ast(code._tokens, code._customize)
ast = self.build_ast(code._tokens, code._customize, code)
self.customize(code._customize)
# skip over: sstmt, stmt, return, ret_expr

View File

@@ -681,7 +681,7 @@ class FragmentsWalker(pysource.SourceWalker, object):
assert iscode(cn.attr)
code = Code(cn.attr, self.scanner, self.currentclass)
ast = self.build_ast(code._tokens, code._customize)
ast = self.build_ast(code._tokens, code._customize, code)
self.customize(code._customize)
ast = ast[0][0][0]
@@ -728,7 +728,7 @@ class FragmentsWalker(pysource.SourceWalker, object):
code_name = code.co_name
code = Code(code, self.scanner, self.currentclass)
ast = self.build_ast(code._tokens, code._customize)
ast = self.build_ast(code._tokens, code._customize, code)
self.customize(code._customize)
if ast[0] == "sstmt":
@@ -850,7 +850,7 @@ class FragmentsWalker(pysource.SourceWalker, object):
self.prec = 27
code = Code(node[1].attr, self.scanner, self.currentclass)
ast = self.build_ast(code._tokens, code._customize)
ast = self.build_ast(code._tokens, code._customize, code)
self.customize(code._customize)
if node == "set_comp":
ast = ast[0][0][0]
@@ -992,7 +992,7 @@ class FragmentsWalker(pysource.SourceWalker, object):
self.prec = 27
code = Code(node[1].attr, self.scanner, self.currentclass)
ast = self.build_ast(code._tokens, code._customize)
ast = self.build_ast(code._tokens, code._customize, code)
self.customize(code._customize)
ast = ast[0][0][0]
store = ast[3]
@@ -1142,9 +1142,8 @@ class FragmentsWalker(pysource.SourceWalker, object):
self.name = old_name
self.return_none = rn
def build_ast(
self, tokens, customize, is_lambda=False, noneInNames=False, isTopLevel=False
):
def build_ast(self, tokens, customize, code, is_lambda=False,
noneInNames=False, isTopLevel=False):
# FIXME: DRY with pysource.py

View File

@@ -100,6 +100,7 @@ def make_function2(self, node, is_lambda, nested=1, code_node=None):
ast = self.build_ast(
code._tokens,
code._customize,
code,
is_lambda=is_lambda,
noneInNames=("None" in code.co_names),
)

View File

@@ -132,6 +132,7 @@ def make_function3_annotate(
ast = self.build_ast(
code._tokens,
code._customize,
code,
is_lambda=is_lambda,
noneInNames=("None" in code.co_names),
)
@@ -491,6 +492,7 @@ def make_function3(self, node, is_lambda, nested=1, code_node=None):
ast = self.build_ast(
scanner_code._tokens,
scanner_code._customize,
scanner_code,
is_lambda=is_lambda,
noneInNames=("None" in code.co_names),
)

View File

@@ -173,6 +173,7 @@ def make_function36(self, node, is_lambda, nested=1, code_node=None):
ast = self.build_ast(
scanner_code._tokens,
scanner_code._customize,
scanner_code,
is_lambda=is_lambda,
noneInNames=("None" in code.co_names),
)

View File

@@ -1119,7 +1119,7 @@ class SourceWalker(GenericASTTraversal, object):
assert iscode(cn.attr)
code = Code(cn.attr, self.scanner, self.currentclass)
ast = self.build_ast(code._tokens, code._customize)
ast = self.build_ast(code._tokens, code._customize, code)
self.customize(code._customize)
# Remove single reductions as in ("stmts", "sstmt"):
@@ -1203,7 +1203,7 @@ class SourceWalker(GenericASTTraversal, object):
assert iscode(code), node[code_index]
code = Code(code, self.scanner, self.currentclass)
ast = self.build_ast(code._tokens, code._customize)
ast = self.build_ast(code._tokens, code._customize, code)
self.customize(code._customize)
# skip over: sstmt, stmt, return, ret_expr
@@ -1398,7 +1398,7 @@ class SourceWalker(GenericASTTraversal, object):
self.prec = 27
code = Code(node[1].attr, self.scanner, self.currentclass)
ast = self.build_ast(code._tokens, code._customize)
ast = self.build_ast(code._tokens, code._customize, code)
self.customize(code._customize)
# Remove single reductions as in ("stmts", "sstmt"):
@@ -2312,7 +2312,7 @@ class SourceWalker(GenericASTTraversal, object):
indent = self.indent
# self.println(indent, '#flags:\t', int(code.co_flags))
ast = self.build_ast(code._tokens, code._customize)
ast = self.build_ast(code._tokens, code._customize, code)
code._tokens = None # save memory
assert ast == "stmts"
@@ -2387,10 +2387,10 @@ class SourceWalker(GenericASTTraversal, object):
# if docstring exists, dump it
if code.co_consts and code.co_consts[0] is not None and len(ast) > 0:
do_doc = False
if is_docstring(ast[0]):
if is_docstring(ast[0], self.version, code.co_consts):
i = 0
do_doc = True
elif len(ast) > 1 and is_docstring(ast[1]):
elif len(ast) > 1 and is_docstring(ast[1], self.version, code.co_consts):
i = 1
do_doc = True
if do_doc and self.hide_internal:
@@ -2466,7 +2466,7 @@ class SourceWalker(GenericASTTraversal, object):
self.return_none = rn
def build_ast(
self, tokens, customize, is_lambda=False, noneInNames=False, isTopLevel=False
self, tokens, customize, code, is_lambda=False, noneInNames=False, isTopLevel=False
):
# FIXME: DRY with fragments.py
@@ -2486,13 +2486,13 @@ class SourceWalker(GenericASTTraversal, object):
p_insts = self.p.insts
self.p.insts = self.scanner.insts
self.p.offset2inst_index = self.scanner.offset2inst_index
ast = python_parser.parse(self.p, tokens, customize)
ast = python_parser.parse(self.p, tokens, customize, code)
self.customize(customize)
self.p.insts = p_insts
except (python_parser.ParserError, AssertionError) as e:
raise ParserError(e, tokens, self.p.debug['reduce'])
transform_ast = self.treeTransform.transform(ast)
transform_ast = self.treeTransform.transform(ast, code)
self.maybe_show_tree(ast)
del ast # Save memory
return transform_ast
@@ -2524,7 +2524,7 @@ class SourceWalker(GenericASTTraversal, object):
self.p.insts = self.scanner.insts
self.p.offset2inst_index = self.scanner.offset2inst_index
self.p.opc = self.scanner.opc
ast = python_parser.parse(self.p, tokens, customize)
ast = python_parser.parse(self.p, tokens, customize, code)
self.p.insts = p_insts
except (python_parser.ParserError, AssertionError) as e:
raise ParserError(e, tokens, self.p.debug['reduce'])
@@ -2532,7 +2532,7 @@ class SourceWalker(GenericASTTraversal, object):
checker(ast, False, self.ast_errors)
self.customize(customize)
transform_ast = self.treeTransform.transform(ast)
transform_ast = self.treeTransform.transform(ast, code)
self.maybe_show_tree(ast)
@@ -2594,7 +2594,7 @@ def code_deparse(
)
isTopLevel = co.co_name == "<module>"
deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel)
deparsed.ast = deparsed.build_ast(tokens, customize, co, isTopLevel=isTopLevel)
#### XXX workaround for profiling
if deparsed.ast is None:

View File

@@ -23,7 +23,7 @@ from uncompyle6.scanners.tok import NoneToken, Token
from uncompyle6.semantics.consts import RETURN_NONE, ASSIGN_DOC_STRING
def is_docstring(node):
def is_docstring(node, version, co_consts):
if node == "sstmt":
node = node[0]
# TODO: the test below on 2.7 succeeds for
@@ -48,7 +48,11 @@ def is_docstring(node):
# return node.kind == "assign" and node[1][0].pattr == "__doc__"
# except:
# return False
return node == ASSIGN_DOC_STRING
if version <= 2.7:
doc_load = "LOAD_CONST"
else:
doc_load = "LOAD_STR"
return node == ASSIGN_DOC_STRING(co_consts[0], doc_load)
def is_not_docstring(call_stmt_node):
@@ -435,7 +439,7 @@ class TreeTransform(GenericASTTraversal, object):
node = self.preorder(node)
return node
def transform(self, ast):
def transform(self, ast, code):
self.maybe_show_tree(ast)
self.ast = copy(ast)
self.ast = self.traverse(self.ast, is_lambda=False)
@@ -456,9 +460,9 @@ class TreeTransform(GenericASTTraversal, object):
for i in range(len(self.ast)):
sstmt = ast[i]
if len(sstmt) == 1 and sstmt == "sstmt":
ast[i] = ast[i][0]
self.ast[i] = self.ast[i][0]
if is_docstring(self.ast[i]):
if is_docstring(self.ast[i], self.version, code.co_consts):
load_const = self.ast[i].first_child()
docstring_ast = SyntaxTree(
"docstring",