Skip to content

CI: Make pep8 validation work with multiple files simultaneously #57914

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions scripts/tests/test_validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,6 @@ def test_validate_all_ignore_errors(self, monkeypatch):
assert exit_status == 2 * 2 - 1



class TestApiItems:
@property
def api_doc(self):
Expand Down Expand Up @@ -374,9 +373,10 @@ class TestPandasDocstringClass:
)
def test_encode_content_write_to_file(self, name) -> None:
# GH25466
docstr = validate_docstrings.PandasDocstring(name).validate_pep8()
docstr = validate_docstrings.PandasDocstring(name)
errors = validate_docstrings.validate_pep8(docstr)[docstr]
# the list of pep8 errors should be empty
assert not list(docstr)
assert not list(errors)


class TestMainFunction:
Expand Down
158 changes: 101 additions & 57 deletions scripts/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import doctest
import importlib
import json
import os
import pathlib
import subprocess
import sys
Expand Down Expand Up @@ -153,6 +152,100 @@ def get_api_items(api_doc_fd):
previous_line = line_stripped


def validate_pep8(docs: list[PandasDocstring] | PandasDocstring
) -> dict[PandasDocstring, list[tuple]]:
"""
Call the pep8 validation for docstrings with examples and add the found errors.

Parameters
----------
docs : list[PandasDocString]
List of docstrings to validate.

Returns
-------
dict[PandasDocstring, list]
Dict of function names and the pep8 error messages found in their docstrings.
The errors messages are of the form
(error_code, message, line_number, col_number).
"""
if isinstance(docs, PandasDocstring):
docs = [docs]

with tempfile.TemporaryDirectory() as temp_dir:
doc_to_filename = {}
for doc in docs:
if not doc.examples:
continue

# F401 is needed to not generate flake8 errors in examples
# that do not use numpy or pandas
content = "".join(
(
"import numpy as np # noqa: F401\n",
"import pandas as pd # noqa: F401\n",
*doc.examples_source_code,
)
)

temp_file = tempfile.NamedTemporaryFile(mode="w",
dir=temp_dir,
encoding="utf-8",
delete=False)
temp_file.write(content)
temp_file.flush()
doc_to_filename[doc] = temp_file.name

# No docs with examples to process
if not doc_to_filename:
return {}

cmd = [
sys.executable,
"-m",
"flake8",
"--format=%(row)d\t%(col)d\t%(code)s\t%(text)s",
"--max-line-length=88",
"--ignore=E203,E3,W503,W504,E402,E731,E128,E124,E704",
]
cmd.extend(doc_to_filename.values())
response = subprocess.run(cmd, capture_output=True, check=False,
text=True)

all_docs_error_messages = {doc: [] for doc in docs}
for doc, temp_file_name in doc_to_filename.items():
# one output for each error, each error must be mapped to the func_name
for output in ("stdout", "stderr"):
out = getattr(response, output)
out = out.replace(temp_file_name, "").strip("\n").splitlines()
if out:
all_docs_error_messages[doc].extend(out)

for doc, raw_error_messages in all_docs_error_messages.items():
doc_error_messages = []
for raw_error_message in raw_error_messages:
line_num, col_num, err_code, msg = raw_error_message.split("\t", maxsplit=3)
# Note: we subtract 2 from the line number because
# 'import numpy as np\nimport pandas as pd\n'
# is prepended to the docstrings.
doc_error_messages.append(
(
err_code,
msg,
int(line_num) - 2,
int(col_num)
)
)
all_docs_error_messages[doc] = doc_error_messages

for doc in docs:
if doc.examples and doc not in all_docs_error_messages.keys():
raise KeyError(f"Docstring\n###\n{doc}\n###\nhas examples but "
f"no pep8 validation results.")

return all_docs_error_messages


class PandasDocstring(Validator):
def __init__(self, func_name: str, doc_obj=None) -> None:
self.func_name = func_name
Expand All @@ -173,55 +266,6 @@ def examples_source_code(self):
lines = doctest.DocTestParser().get_examples(self.raw_doc)
return [line.source for line in lines]

def validate_pep8(self):
if not self.examples:
return

# F401 is needed to not generate flake8 errors in examples
# that do not user numpy or pandas
content = "".join(
(
"import numpy as np # noqa: F401\n",
"import pandas as pd # noqa: F401\n",
*self.examples_source_code,
)
)

error_messages = []

file = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False)
try:
file.write(content)
file.flush()
cmd = [
sys.executable,
"-m",
"flake8",
"--format=%(row)d\t%(col)d\t%(code)s\t%(text)s",
"--max-line-length=88",
"--ignore=E203,E3,W503,W504,E402,E731,E128,E124,E704",
file.name,
]
response = subprocess.run(cmd, capture_output=True, check=False, text=True)
for output in ("stdout", "stderr"):
out = getattr(response, output)
out = out.replace(file.name, "")
messages = out.strip("\n").splitlines()
if messages:
error_messages.extend(messages)
finally:
file.close()
os.unlink(file.name)

for error_message in error_messages:
line_number, col_number, error_code, message = error_message.split(
"\t", maxsplit=3
)
# Note: we subtract 2 from the line number because
# 'import numpy as np\nimport pandas as pd\n'
# is prepended to the docstrings.
yield error_code, message, int(line_number) - 2, int(col_number)

def non_hyphenated_array_like(self):
return "array_like" in self.raw_doc

Expand Down Expand Up @@ -264,14 +308,14 @@ def pandas_validate(func_name: str):

result["examples_errs"] = ""
if doc.examples:
for error_code, error_message, line_number, col_number in doc.validate_pep8():
for err_code, err_message, line_num, col_num in validate_pep8(doc)[doc]:
result["errors"].append(
pandas_error(
"EX03",
error_code=error_code,
error_message=error_message,
line_number=line_number,
col_number=col_number,
error_code=err_code,
error_message=err_message,
line_number=line_num,
col_number=col_num,
)
)
examples_source_code = "".join(doc.examples_source_code)
Expand Down Expand Up @@ -346,7 +390,7 @@ def print_validate_all_results(
output_format: str,
prefix: str | None,
ignore_deprecated: bool,
ignore_errors: dict[str, set[str]],
ignore_errors: dict[str | None, set[str]],
):
if output_format not in ("default", "json", "actions"):
raise ValueError(f'Unknown output_format "{output_format}"')
Expand Down Expand Up @@ -384,7 +428,7 @@ def print_validate_all_results(


def print_validate_one_results(func_name: str,
ignore_errors: dict[str, set[str]]) -> int:
ignore_errors: dict[str | None, set[str]]) -> int:
def header(title, width=80, char="#") -> str:
full_line = char * width
side_len = (width - len(title) - 2) // 2
Expand Down