fix: improve searching file with index, ignoring image

Mara-Li · Mara-Li · commit 7fb004b07f9a · 2024-12-23T01:39:17.000+01:00
diff --git a/mkdocs_embed_file_plugins/plugin.py b/mkdocs_embed_file_plugins/plugin.py
@@ -13,14 +13,14 @@
 import logging
 
 from mkdocs_embed_file_plugins.src.links_correction import (
-    convert_links_if_markdown,
+    MULTIMEDIA_EXTENSIONS, convert_links_if_markdown,
     mini_ez_links,
-)
+    )
 from mkdocs_embed_file_plugins.src.search_quote import (
     search_file_in_documentation,
     search_in_file,
 )
-from mkdocs_embed_file_plugins.src.utils import create_link, strip_comments
+from mkdocs_embed_file_plugins.src.utils import add_not_found_class, create_link, strip_comments
 
 
 def cite(
@@ -115,7 +115,7 @@ def cite(
 def tooltip_not_found(link, soup, msg) -> BeautifulSoup:
     tooltip_template = (
         "<div class='citation'> <a class='link_citation'><i class='fas fa-link'></i> </a>"
-        + '<p style="text-align: center; display: block"><i class="not_found">'
+        + f'<p style="text-align: center; display: block"><i class="not_found" src={link["src"]}>'
         + str(link["alt"])
         + f"</i> {msg}</p>"
         + "</div>"
@@ -146,7 +146,7 @@ def on_post_page(self, output_content, page, config) -> str:
             "img",
             src=lambda src: src is not None
             and "favicon" not in src
-            and not src.endswith(("png", "jpg", "jpeg", "gif", "svg"))
+            and not any(src.lower().endswith(ext) for ext in MULTIMEDIA_EXTENSIONS)
             and "www" not in src
             and "http" not in src
             and "://" not in src,
@@ -223,4 +223,4 @@ def on_post_page(self, output_content, page, config) -> str:
                                 self.config["custom-attributes"],
                                 language_message,
                             )
-        return str(soup)
+        return add_not_found_class(str(soup))
diff --git a/mkdocs_embed_file_plugins/src/links_correction.py b/mkdocs_embed_file_plugins/src/links_correction.py
@@ -6,26 +6,47 @@
 
 from mkdocs_embed_file_plugins.src.search_quote import search_file_in_documentation
 
+import re
+from pathlib import Path
+MULTIMEDIA_EXTENSIONS = (
+    ".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg",  # Images
+    ".mp4", ".avi", ".mov", ".mkv",  # Vidéos
+    ".mp3", ".wav", ".flac",  # Audio
+    ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",  # Documents
+)
 
 def mini_ez_links(link, base, end, url_whitespace, url_case):
     base_data, url_blog, md_link_path = base
-    url_blog_path = [x for x in url_blog.split("/") if len(x) > 0]
-    url_blog_path = url_blog_path[len(url_blog_path) - 1]
+    url_blog_path = [x for x in url_blog.split("/") if x]
+    url_blog_path = url_blog_path[-1]
+
+    # Vérifie si c'est une image (ne pas ajouter notfound:: pour les images)
+    if any(link[2].lower().endswith(ext) for ext in MULTIMEDIA_EXTENSIONS) :
+        internal_link = Path(md_link_path, link[2]).resolve()
+        if internal_link.is_file() :
+            return create_url(internal_link, link[2], base, url_blog_path, True)
+        else :
+            # Retourne simplement le chemin brut pour les fichiers multimédias non trouvés
+            return link[2]
+
+    # Résout le chemin interne pour les fichiers Markdown
     internal_link = Path(md_link_path, link[2]).resolve()
-    return create_url(internal_link, link[2], base, url_blog_path, True)
+    if internal_link.is_file():
+        return create_url(internal_link, link[2], base, url_blog_path, True)
 
+    # Si le fichier Markdown n'est pas trouvé, marque avec "notfound::"
+    return f"notfound::{create_url(internal_link, link[2], base, url_blog_path, True)}"
 
 def convert_links_if_markdown(quote_str, base):
     """Convert links if the file is a markdown file."""
-    # search for links
+    # Search for links
     links = re.findall(r"\[([^\]]*)\]\(([^\)]*)\)", quote_str)
     base_data, url_blog, md_link_path = base
     if not url_blog:
-        # generate a fake url for the links
         raise Exception("site_url is not defined in mkdocs.yml")
 
-    url_blog_path = [x for x in url_blog.split("/") if len(x) > 0]
-    url_blog_path = url_blog_path[len(url_blog_path) - 1]
+    url_blog_path = [x for x in url_blog.split("/") if x]
+    url_blog_path = url_blog_path[-1]
     for link in links:
         if not link[1].startswith("http"):
             internal_link = Path(md_link_path, link[1]).resolve()
@@ -34,48 +55,40 @@ def convert_links_if_markdown(quote_str, base):
     return quote_str
 
 
-def create_url(internal_link, link, base, url_blog_path, wikilinks=False):
+def create_url(internal_link, link, base, url_blog_path, wikilinks=False) :
     base, url_blog, md_link_path = base
-    if os.path.isfile(internal_link):
-        internal_link = str(internal_link).replace(base, "")
-    else:
-        if link.endswith(".md"):
-            if wikilinks:
-                internal_link = str(
-                    search_file_in_documentation(
-                        Path(link).resolve(), md_link_path.parent, base
-                    )
-                )
-            else:
-                internal_link = str(
-                    search_file_in_documentation(link, md_link_path.parent, base)
-                )
-        if not os.path.isfile(internal_link):
-            file_name = link.replace("index", "")
-            file_name = file_name.replace("../", "")
-            file_name = file_name.replace("./", "")
-            file_name = file_name.replace(".md", "")
-            all_docs = [
-                re.sub(
-                    rf"(.*)({url_blog_path})?/docs/*", "", x.replace("\\", "/")
-                ).replace(".md", "")
-                for x in iglob(str(base) + os.sep + "**", recursive=True)
-                if os.path.isfile(x)
-            ]
-            file_found = [
-                "/" + x
-                for x in all_docs
-                if os.path.basename(x) == file_name or x == file_name
-            ]
-            if file_found:
-                internal_link = file_found[0]
-            else:
-                internal_link = file_name
-    filepath = internal_link.replace(base, "")
-    url = filepath.replace("\\", "/").replace(".md", "")
-    url = re.sub(r"\/$", "", str(url_blog)) + "/" + quote(url)
-    if not url.startswith("http"):
+    internal_path = Path(internal_link)
+    # Vérifie si le lien est une image ou un fichier multimédia
+    if any(link.lower().endswith(ext) for ext in MULTIMEDIA_EXTENSIONS) :
+        # Normalise le chemin des images sans les transformer en URLs Markdown
+        image_path = Path(url_blog) / link.replace("\\", "/")
+        final_url = str(image_path).replace("\\", "/")
+        return final_url
+
+    # Vérifie si le chemin est un fichier Markdown valide
+    if internal_path.is_file() :
+        internal_link = str(internal_path).replace(str(base), "")
+    else :
+        resolved = search_file_in_documentation(link, md_link_path.parent, base)
+
+        # Fallback explicite pour `/index.md` via dossier parent
+        if resolved == 0 and not link.endswith("index.md") :
+            folder_name = os.path.splitext(link)[0]
+            resolved = search_file_in_documentation(f"{folder_name}/index.md", md_link_path.parent, base)
+
+        if resolved == 0 :
+            internal_link = str(link).replace("../", "").replace("./", "").replace(".md", "")
+        else :
+            internal_link = str(resolved).replace(str(base), "")
+
+    # Normalisation du chemin final pour les fichiers Markdown
+    filepath = internal_link.replace("\\", "/").replace(".md", "")
+    url = re.sub(r"/+$", "", str(url_blog)) + "/" + quote(filepath)
+
+    # Ajout du protocole si manquant
+    if not url.startswith("http") :
         url = "https://" + url
-    if not url.endswith("/") and not re.search(r"\.(.*)$", url):
-        url = url + "/"
+    if not url.endswith("/") and not re.search(r"\\.(.*)$", url) :
+        url += "/"
+
     return url
diff --git a/mkdocs_embed_file_plugins/src/search_quote.py b/mkdocs_embed_file_plugins/src/search_quote.py
@@ -41,34 +41,40 @@ def search_in_file(citation_part: str, contents: str) -> str:
         citation_part = citation_part.replace("#", "")
         for i in data:
             if re.search(re.escape(citation_part) + "$", i):
-                print("found!", i.replace(citation_part, ""))
                 return i.replace(citation_part, "")
     return ""
 
 
-def search_file_in_documentation(
-    link: Union[Path, str],
-    config_dir: Path,
-    base: any,  # type: ignore
-) -> Union[Path, int]:
+def search_file_in_documentation(link: Union[Path, str], config_dir: Path, base: Path) -> Union[Path, int]:
+    """
+    Recherche un fichier spécifique dans la documentation.
+    """
     file_name = os.path.basename(link)
+
+    # Ignorer les liens non pertinents (par exemple, images, scripts, etc.)
+    if not re.search(r"(\.md$|[^./\\]+$)", file_name, re.IGNORECASE):
+        return 0
+
+    # Ajout de ".md" si absent
     if not file_name.endswith(".md"):
-        file_name = file_name + ".md"
-    if not file_name.startswith("index"):
-        for p in config_dir.rglob(f"*{file_name}"):
-            return p
-    else:
-        baseParent = Path(base).parents
-        linksParent = Path(link).parents
-        linksBaseEquals = [i for i in linksParent if i in baseParent]
-        if (
-            (len(baseParent) == 0)
-            or (len(linksParent) == 0)
-            or (len(linksBaseEquals) == 0)
-        ):
-            return 0
-        linksBaseEquals = linksBaseEquals[0]
-        relative = Path(str(link).replace(str(linksBaseEquals), ""))
-        for p in Path(base).rglob(f"**{relative}"):
-            return p
+        file_name += ".md"
+
+    # Recherche directe du fichier dans la structure
+    for p in config_dir.rglob(f"*{file_name}"):
+        return p
+
+    # Recherche un dossier correspondant au nom sans extension
+    folder_name = os.path.splitext(file_name)[0]
+    folder_path = config_dir / folder_name / "index.md"
+    if folder_path.is_file():
+        return folder_path
+
+    # Recherche élargie dans tous les sous-dossiers
+    for parent in config_dir.rglob("*"):
+        potential_path = parent / folder_name / "index.md"
+        if potential_path.is_file():
+            return potential_path
+
+    # Aucun fichier trouvé
+
     return 0
diff --git a/mkdocs_embed_file_plugins/src/utils.py b/mkdocs_embed_file_plugins/src/utils.py
@@ -1,5 +1,5 @@
 import re
-
+from bs4 import BeautifulSoup
 
 def strip_comments(markdown):
     file_content = markdown.split("\n")
@@ -20,3 +20,23 @@ def create_link(link):
         return link[:-1] + ".md"
     else:
         return link + ".md"
+
+
+def add_not_found_class(html) :
+    soup = BeautifulSoup(html, "html.parser")
+
+    for a_tag in soup.find_all("a") :
+        href = a_tag.get("href", "")
+        if href.startswith("notfound::") :
+            clean_href = href.replace("notfound::", "")
+            a_tag["href"] = clean_href
+            a_tag["class"] = a_tag.get("class", []) + ["ezlinks_not_found"]
+            new_tag = soup.new_tag("span")
+            new_tag.string = a_tag.string
+            for attr in a_tag.attrs :
+                if attr != "href" :
+                    new_tag[attr] = a_tag[attr]
+            new_tag["src"] = clean_href
+            a_tag.replaceWith(new_tag)
+
+    return str(soup)