Exclude pages recursively

Erik Thorsell · Erik Thorsell · commit 11f8aa66a36a · 2021-06-09T07:56:01.000+02:00
Add get_excluded_pages() which will recursively expand user-excluded paths and return a list of all pages to be excluded. The previous implementation only looks at the exact paths provided in the mkdocs.yaml whereas this implementation will work both for excluding files and directories. Possible improvement: get_excluded_pages() is called once per is_excluded() which in turn is called once per on_post_page(). Since self._options.exclude_pages will not change between os_post_page() calls, this is inefficient and could be an issue in very large projects. Suggestions for how to make get_excluded_pages() only run once per .pdf-generation are appreciated! Closes #58
diff --git a/mkdocs_with_pdf/generator.py b/mkdocs_with_pdf/generator.py
@@ -3,6 +3,7 @@
 import re
 from importlib import import_module
 from importlib.util import module_from_spec, spec_from_file_location
+from typing import List
 
 from bs4 import BeautifulSoup, PageElement
 from weasyprint import HTML, urls
@@ -43,8 +44,31 @@ def on_nav(self, nav):
     def on_post_page(self, output_content: str, page, pdf_path: str) -> str:
         """ on_post_page """
 
+        def get_excluded_pages(e_paths: List[str]) -> List[str]:
+
+            def get_files_in_dir(path: str) -> List[str]:
+                files = list()
+                for f in os.listdir(path):
+                    sub_path = os.path.join(path, f)
+                    if os.path.isdir(sub_path):
+                        files += get_files_in_dir(sub_path)
+                    else:
+                        files.append(os.path.splitext(sub_path)[0] + '/')
+                return files
+
+            excluded_pages = list()
+            cwd = os.getcwd()
+            os.chdir("docs")
+            for path in e_paths:
+                if os.path.isdir(path):
+                    excluded_pages += get_files_in_dir(path)
+                else:
+                    excluded_pages.append(path)
+            os.chdir(cwd)
+            return excluded_pages
+
         def is_excluded(url: str) -> bool:
-            return url in self._options.exclude_pages
+            return url in get_excluded_pages(self._options.exclude_pages)
 
         if is_excluded(page.url):
             self.logger.info(f'Page skipped: [{page.title}]({page.url})')