Skip to content

[CI] refine check_api_label_cn #7256

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 113 additions & 72 deletions ci_scripts/check_api_label_cn.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
from __future__ import annotations

import argparse
import logging
import os
import re
import sys
from pathlib import Path

# precompile regex patterns
CN_API_LABEL_PATTERN = re.compile(r".. _([a-zA-Z0-9_]+):")
REF_PATTERN = re.compile(r":ref:`([^`]+)`")
API_LABEL_EXTRACT_PATTERN = re.compile(r".+?<(?P<api_label>.+?)>")
CN_API_PREFIX = "cn_api_paddle"

logger = logging.getLogger()
if logger.handlers:
# we assume the first handler is the one we want to configure
Expand All @@ -20,87 +27,123 @@
logger.setLevel(logging.INFO)


# check file's api_label
def check_api_label(rootdir, file):
real_file = Path(rootdir) / file
with open(real_file, "r", encoding="utf-8") as f:
def check_api_label(file_path: Path, doc_root: Path) -> bool:
"""Check if the first line of the file matches the expected api_label format."""
with open(file_path, "r", encoding="utf-8") as f:
first_line = f.readline().strip()
return first_line == generate_en_label_by_path(file)
return first_line == generate_cn_label(file_path, doc_root)


# path -> api_label (the first line's style)
def generate_en_label_by_path(file):
result = file.removesuffix("_cn.rst")
result = "_".join(Path(result).parts)
result = f".. _cn_{result}:"
return result
def generate_cn_label(file_path: Path, doc_root: Path) -> str:
"""Generate the expected api_label format from file path."""
relative_path = file_path.relative_to(doc_root)
stem = relative_path.stem.removesuffix("_cn")
parts = relative_path.with_name(stem).parts
label = "_".join(parts)
return f".. _cn_{label}:"


# traverse doc/api to append api_label in list
def find_all_api_labels_in_dir(rootdir):
all_api_labels = []
for root, dirs, files in os.walk(rootdir + API):
for file in files:
real_path = Path(root) / file
path = str(real_path).removeprefix(rootdir)
if not should_test(path):
continue
for label in find_api_labels_in_one_file(real_path):
all_api_labels.append(label)
return all_api_labels
def collect_api_labels(api_root: Path) -> set[str]:
"""Collect all valid api labels."""
labels = set()
for rst_file in api_root.rglob("*.rst"):
if not rst_file.is_file():
continue
if not need_check(rst_file, api_root):
continue
labels.update(extract_api_labels(rst_file))
return labels


# api_labels in a file
def find_api_labels_in_one_file(file_path):
api_labels_in_one_file = []
def extract_api_labels(file_path: Path) -> set[str]:
labels = set()
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines:
line = re.search(".. _([a-zA-Z0-9_]+)", line)
if not line:
match = CN_API_LABEL_PATTERN.search(line)
if not match:
continue
api_labels_in_one_file.append(line.group(1))
return api_labels_in_one_file
label = match.group(1)
if not label.startswith("cn_api_paddle"):
continue
labels.add(label)
return labels


# api doc for checking
def should_test(file):
def need_check(file_path: Path, api_root: Path) -> bool:
return (
file.endswith("_cn.rst")
and not Path(file).name == "Overview_cn.rst"
and not Path(file).name == "index_cn.rst"
and file.startswith(API)
file_path.name.endswith("_cn.rst")
and file_path.name not in {"Overview_cn.rst", "index_cn.rst"}
and file_path.is_relative_to(api_root)
)


def run_cn_api_label_checking(rootdir, files):
def validate_api_label_references(
files: list[Path], valid_api_labels: set[str]
) -> list[str]:
errors = []
for file in files:
if should_test(file) and not check_api_label(rootdir, file):
logger.error(
f"The first line in {rootdir}/{file} is not avaiable, please re-check it!"
)
sys.exit(1)
valid_api_labels = find_all_api_labels_in_dir(rootdir)
for file in files:
if not file.endswith(".rst"):
continue
with open(Path(rootdir) / file, "r", encoding="utf-8") as f:
pattern = f.read()
matches = re.findall(r":ref:`([^`]+)`", pattern)
with open(file, "r", encoding="utf-8") as f:
content = f.read()
matches = REF_PATTERN.findall(content)
for match in matches:
api_label = match
if api_label_match := re.match(
r".+<(?P<api_label>.+?)>", api_label
):
if api_label_match := API_LABEL_EXTRACT_PATTERN.match(api_label):
api_label = api_label_match.group("api_label")
if (
api_label.startswith("cn_api_paddle")
and api_label not in valid_api_labels
):
logger.error(
f"Found api label {api_label} in {rootdir}/{file}, but it is not a valid api label, please re-check it!"
)
sys.exit(1)
if not api_label.startswith(CN_API_PREFIX):
continue
if api_label in valid_api_labels:
continue
errors.append(f"api label `{api_label}` in `{file}`")
return errors


def get_custom_files_for_checking_usage(api_root: Path) -> set[Path]:
# TODO: add more dir for checking
custom_files = set()
for rst_file in api_root.rglob("*.rst"):
if not rst_file.is_file():
continue
if rst_file.name in {"set_global_initializer_cn.rst"}:
# TODO: how to deal with `api_paddle_Tensor_create_tensor`?
continue
custom_files.add(rst_file)
return custom_files


def run_cn_api_label_checking(
doc_root: Path, api_root: Path, files: list[Path]
) -> None:
# get real path for changed files
real_path_files_set = {Path(doc_root) / file for file in files}

# check the api_label in the first line for increased files
for file_path in real_path_files_set:
if need_check(file_path, api_root) and not check_api_label(
file_path, doc_root
):
logger.error(
f"The first line in {file_path} is not available, please re-check it!"
)
sys.exit(1)

# collect all api_labels in api_root
valid_api_labels = collect_api_labels(api_root)

# check the usage of api_label in custom files
api_label_usage_file_set = (
real_path_files_set | get_custom_files_for_checking_usage(doc_root)
)

if errors := validate_api_label_references(
api_label_usage_file_set, valid_api_labels
):
logger.error("Found valid api labels usage as follows:")
for i, error in enumerate(errors, 1):
logger.error(f"{i}: {error}")
sys.exit(1)

print("All api_label check success in PR !")


Expand All @@ -110,29 +153,27 @@ def parse_args():
"""
parser = argparse.ArgumentParser(description="cn api_label checking")
parser.add_argument(
"rootdir",
"doc_root",
type=Path,
help="the dir DOCROOT",
type=str,
default="/FluidDoc/docs/",
default=Path("/FluidDoc/docs"),
)

parser.add_argument(
"apiroot",
type=str,
help="the dir APIROOT",
default="/FluidDoc/docs/api/",
"api_root",
type=Path,
help="the dir api_root",
default=Path("/FluidDoc/docs/api"),
)
parser.add_argument(
"all_git_files",
type=str,
type=Path,
nargs="*",
help="files need to check",
)
args = parser.parse_args()
return args
return parser.parse_args()


if __name__ == "__main__":
args = parse_args()
API = args.apiroot.removeprefix(args.rootdir + "/")
run_cn_api_label_checking(args.rootdir, args.all_git_files)
run_cn_api_label_checking(args.doc_root, args.api_root, args.all_git_files)
4 changes: 2 additions & 2 deletions ci_scripts/check_api_label_cn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ set -x

FLUIDDOCDIR=${FLUIDDOCDIR:=/FluidDoc}

DOCROOT=${FLUIDDOCDIR}/docs/
APIROOT=${DOCROOT}/api/
DOCROOT=${FLUIDDOCDIR}/docs
APIROOT=${DOCROOT}/api

SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
source ${SCRIPT_DIR}/utils.sh
Expand Down