Skip to content

Commit 7fb004b

Browse files
committed
fix: improve searching file with index, ignoring image
1 parent 84cd36f commit 7fb004b

File tree

4 files changed

+119
-80
lines changed

4 files changed

+119
-80
lines changed

mkdocs_embed_file_plugins/plugin.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
import logging
1414

1515
from mkdocs_embed_file_plugins.src.links_correction import (
16-
convert_links_if_markdown,
16+
MULTIMEDIA_EXTENSIONS, convert_links_if_markdown,
1717
mini_ez_links,
18-
)
18+
)
1919
from mkdocs_embed_file_plugins.src.search_quote import (
2020
search_file_in_documentation,
2121
search_in_file,
2222
)
23-
from mkdocs_embed_file_plugins.src.utils import create_link, strip_comments
23+
from mkdocs_embed_file_plugins.src.utils import add_not_found_class, create_link, strip_comments
2424

2525

2626
def cite(
@@ -115,7 +115,7 @@ def cite(
115115
def tooltip_not_found(link, soup, msg) -> BeautifulSoup:
116116
tooltip_template = (
117117
"<div class='citation'> <a class='link_citation'><i class='fas fa-link'></i> </a>"
118-
+ '<p style="text-align: center; display: block"><i class="not_found">'
118+
+ f'<p style="text-align: center; display: block"><i class="not_found" src={link["src"]}>'
119119
+ str(link["alt"])
120120
+ f"</i> {msg}</p>"
121121
+ "</div>"
@@ -146,7 +146,7 @@ def on_post_page(self, output_content, page, config) -> str:
146146
"img",
147147
src=lambda src: src is not None
148148
and "favicon" not in src
149-
and not src.endswith(("png", "jpg", "jpeg", "gif", "svg"))
149+
and not any(src.lower().endswith(ext) for ext in MULTIMEDIA_EXTENSIONS)
150150
and "www" not in src
151151
and "http" not in src
152152
and "://" not in src,
@@ -223,4 +223,4 @@ def on_post_page(self, output_content, page, config) -> str:
223223
self.config["custom-attributes"],
224224
language_message,
225225
)
226-
return str(soup)
226+
return add_not_found_class(str(soup))

mkdocs_embed_file_plugins/src/links_correction.py

Lines changed: 62 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,47 @@
66

77
from mkdocs_embed_file_plugins.src.search_quote import search_file_in_documentation
88

9+
import re
10+
from pathlib import Path
11+
MULTIMEDIA_EXTENSIONS = (
12+
".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg", # Images
13+
".mp4", ".avi", ".mov", ".mkv", # Vidéos
14+
".mp3", ".wav", ".flac", # Audio
15+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", # Documents
16+
)
917

1018
def mini_ez_links(link, base, end, url_whitespace, url_case):
1119
base_data, url_blog, md_link_path = base
12-
url_blog_path = [x for x in url_blog.split("/") if len(x) > 0]
13-
url_blog_path = url_blog_path[len(url_blog_path) - 1]
20+
url_blog_path = [x for x in url_blog.split("/") if x]
21+
url_blog_path = url_blog_path[-1]
22+
23+
# Vérifie si c'est une image (ne pas ajouter notfound:: pour les images)
24+
if any(link[2].lower().endswith(ext) for ext in MULTIMEDIA_EXTENSIONS) :
25+
internal_link = Path(md_link_path, link[2]).resolve()
26+
if internal_link.is_file() :
27+
return create_url(internal_link, link[2], base, url_blog_path, True)
28+
else :
29+
# Retourne simplement le chemin brut pour les fichiers multimédias non trouvés
30+
return link[2]
31+
32+
# Résout le chemin interne pour les fichiers Markdown
1433
internal_link = Path(md_link_path, link[2]).resolve()
15-
return create_url(internal_link, link[2], base, url_blog_path, True)
34+
if internal_link.is_file():
35+
return create_url(internal_link, link[2], base, url_blog_path, True)
1636

37+
# Si le fichier Markdown n'est pas trouvé, marque avec "notfound::"
38+
return f"notfound::{create_url(internal_link, link[2], base, url_blog_path, True)}"
1739

1840
def convert_links_if_markdown(quote_str, base):
1941
"""Convert links if the file is a markdown file."""
20-
# search for links
42+
# Search for links
2143
links = re.findall(r"\[([^\]]*)\]\(([^\)]*)\)", quote_str)
2244
base_data, url_blog, md_link_path = base
2345
if not url_blog:
24-
# generate a fake url for the links
2546
raise Exception("site_url is not defined in mkdocs.yml")
2647

27-
url_blog_path = [x for x in url_blog.split("/") if len(x) > 0]
28-
url_blog_path = url_blog_path[len(url_blog_path) - 1]
48+
url_blog_path = [x for x in url_blog.split("/") if x]
49+
url_blog_path = url_blog_path[-1]
2950
for link in links:
3051
if not link[1].startswith("http"):
3152
internal_link = Path(md_link_path, link[1]).resolve()
@@ -34,48 +55,40 @@ def convert_links_if_markdown(quote_str, base):
3455
return quote_str
3556

3657

37-
def create_url(internal_link, link, base, url_blog_path, wikilinks=False):
58+
def create_url(internal_link, link, base, url_blog_path, wikilinks=False) :
3859
base, url_blog, md_link_path = base
39-
if os.path.isfile(internal_link):
40-
internal_link = str(internal_link).replace(base, "")
41-
else:
42-
if link.endswith(".md"):
43-
if wikilinks:
44-
internal_link = str(
45-
search_file_in_documentation(
46-
Path(link).resolve(), md_link_path.parent, base
47-
)
48-
)
49-
else:
50-
internal_link = str(
51-
search_file_in_documentation(link, md_link_path.parent, base)
52-
)
53-
if not os.path.isfile(internal_link):
54-
file_name = link.replace("index", "")
55-
file_name = file_name.replace("../", "")
56-
file_name = file_name.replace("./", "")
57-
file_name = file_name.replace(".md", "")
58-
all_docs = [
59-
re.sub(
60-
rf"(.*)({url_blog_path})?/docs/*", "", x.replace("\\", "/")
61-
).replace(".md", "")
62-
for x in iglob(str(base) + os.sep + "**", recursive=True)
63-
if os.path.isfile(x)
64-
]
65-
file_found = [
66-
"/" + x
67-
for x in all_docs
68-
if os.path.basename(x) == file_name or x == file_name
69-
]
70-
if file_found:
71-
internal_link = file_found[0]
72-
else:
73-
internal_link = file_name
74-
filepath = internal_link.replace(base, "")
75-
url = filepath.replace("\\", "/").replace(".md", "")
76-
url = re.sub(r"\/$", "", str(url_blog)) + "/" + quote(url)
77-
if not url.startswith("http"):
60+
internal_path = Path(internal_link)
61+
# Vérifie si le lien est une image ou un fichier multimédia
62+
if any(link.lower().endswith(ext) for ext in MULTIMEDIA_EXTENSIONS) :
63+
# Normalise le chemin des images sans les transformer en URLs Markdown
64+
image_path = Path(url_blog) / link.replace("\\", "/")
65+
final_url = str(image_path).replace("\\", "/")
66+
return final_url
67+
68+
# Vérifie si le chemin est un fichier Markdown valide
69+
if internal_path.is_file() :
70+
internal_link = str(internal_path).replace(str(base), "")
71+
else :
72+
resolved = search_file_in_documentation(link, md_link_path.parent, base)
73+
74+
# Fallback explicite pour `/index.md` via dossier parent
75+
if resolved == 0 and not link.endswith("index.md") :
76+
folder_name = os.path.splitext(link)[0]
77+
resolved = search_file_in_documentation(f"{folder_name}/index.md", md_link_path.parent, base)
78+
79+
if resolved == 0 :
80+
internal_link = str(link).replace("../", "").replace("./", "").replace(".md", "")
81+
else :
82+
internal_link = str(resolved).replace(str(base), "")
83+
84+
# Normalisation du chemin final pour les fichiers Markdown
85+
filepath = internal_link.replace("\\", "/").replace(".md", "")
86+
url = re.sub(r"/+$", "", str(url_blog)) + "/" + quote(filepath)
87+
88+
# Ajout du protocole si manquant
89+
if not url.startswith("http") :
7890
url = "https://" + url
79-
if not url.endswith("/") and not re.search(r"\.(.*)$", url):
80-
url = url + "/"
91+
if not url.endswith("/") and not re.search(r"\\.(.*)$", url) :
92+
url += "/"
93+
8194
return url

mkdocs_embed_file_plugins/src/search_quote.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -41,34 +41,40 @@ def search_in_file(citation_part: str, contents: str) -> str:
4141
citation_part = citation_part.replace("#", "")
4242
for i in data:
4343
if re.search(re.escape(citation_part) + "$", i):
44-
print("found!", i.replace(citation_part, ""))
4544
return i.replace(citation_part, "")
4645
return ""
4746

4847

49-
def search_file_in_documentation(
50-
link: Union[Path, str],
51-
config_dir: Path,
52-
base: any, # type: ignore
53-
) -> Union[Path, int]:
48+
def search_file_in_documentation(link: Union[Path, str], config_dir: Path, base: Path) -> Union[Path, int]:
49+
"""
50+
Recherche un fichier spécifique dans la documentation.
51+
"""
5452
file_name = os.path.basename(link)
53+
54+
# Ignorer les liens non pertinents (par exemple, images, scripts, etc.)
55+
if not re.search(r"(\.md$|[^./\\]+$)", file_name, re.IGNORECASE):
56+
return 0
57+
58+
# Ajout de ".md" si absent
5559
if not file_name.endswith(".md"):
56-
file_name = file_name + ".md"
57-
if not file_name.startswith("index"):
58-
for p in config_dir.rglob(f"*{file_name}"):
59-
return p
60-
else:
61-
baseParent = Path(base).parents
62-
linksParent = Path(link).parents
63-
linksBaseEquals = [i for i in linksParent if i in baseParent]
64-
if (
65-
(len(baseParent) == 0)
66-
or (len(linksParent) == 0)
67-
or (len(linksBaseEquals) == 0)
68-
):
69-
return 0
70-
linksBaseEquals = linksBaseEquals[0]
71-
relative = Path(str(link).replace(str(linksBaseEquals), ""))
72-
for p in Path(base).rglob(f"**{relative}"):
73-
return p
60+
file_name += ".md"
61+
62+
# Recherche directe du fichier dans la structure
63+
for p in config_dir.rglob(f"*{file_name}"):
64+
return p
65+
66+
# Recherche un dossier correspondant au nom sans extension
67+
folder_name = os.path.splitext(file_name)[0]
68+
folder_path = config_dir / folder_name / "index.md"
69+
if folder_path.is_file():
70+
return folder_path
71+
72+
# Recherche élargie dans tous les sous-dossiers
73+
for parent in config_dir.rglob("*"):
74+
potential_path = parent / folder_name / "index.md"
75+
if potential_path.is_file():
76+
return potential_path
77+
78+
# Aucun fichier trouvé
79+
7480
return 0

mkdocs_embed_file_plugins/src/utils.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import re
2-
2+
from bs4 import BeautifulSoup
33

44
def strip_comments(markdown):
55
file_content = markdown.split("\n")
@@ -20,3 +20,23 @@ def create_link(link):
2020
return link[:-1] + ".md"
2121
else:
2222
return link + ".md"
23+
24+
25+
def add_not_found_class(html) :
26+
soup = BeautifulSoup(html, "html.parser")
27+
28+
for a_tag in soup.find_all("a") :
29+
href = a_tag.get("href", "")
30+
if href.startswith("notfound::") :
31+
clean_href = href.replace("notfound::", "")
32+
a_tag["href"] = clean_href
33+
a_tag["class"] = a_tag.get("class", []) + ["ezlinks_not_found"]
34+
new_tag = soup.new_tag("span")
35+
new_tag.string = a_tag.string
36+
for attr in a_tag.attrs :
37+
if attr != "href" :
38+
new_tag[attr] = a_tag[attr]
39+
new_tag["src"] = clean_href
40+
a_tag.replaceWith(new_tag)
41+
42+
return str(soup)

0 commit comments

Comments
 (0)