|
1 | 1 | import os
|
2 | 2 | import xml.etree.ElementTree as ET
|
3 |
| -from datetime import datetime |
4 |
| -from email.utils import format_datetime, parsedate_to_datetime |
| 3 | +from datetime import datetime, timezone |
| 4 | +from email.utils import parsedate_to_datetime |
5 | 5 |
|
6 | 6 | # Path to your docs folder
|
7 | 7 | docs_folder = 'docs'
|
8 |
| -rss_file = os.path.join(docs_folder, 'rss.xml') |
| 8 | +rss_file = 'docs/rss.xml' |
9 | 9 |
|
10 |
| -# Get a list of all PDF files in docs and subdirectories |
| 10 | +# Get a list of all PDF files in docs and its subdirectories |
11 | 11 | pdf_files = []
|
12 |
| -for root_dir, dirs, files in os.walk(docs_folder): |
| 12 | +for root, dirs, files in os.walk(docs_folder): |
13 | 13 | for file in files:
|
14 | 14 | if file.endswith(".pdf"):
|
15 |
| - pdf_files.append(os.path.join(root_dir, file)) |
| 15 | + pdf_files.append(os.path.join(root, file)) |
16 | 16 |
|
17 | 17 | print(f"Found {len(pdf_files)} PDFs in the 'docs' folder.")
|
18 | 18 |
|
|
28 | 28 | ET.SubElement(channel, "link").text = "https://github.com/damirlj/modern_cpp_tutorials"
|
29 | 29 | ET.SubElement(channel, "description").text = "New articles and updates in the docs/ folder"
|
30 | 30 |
|
31 |
| -# Create a map of current items to remove duplicates |
32 |
| -existing_items = {item.find("guid").text: item for item in channel.findall("item") if item.find("guid") is not None} |
| 31 | +# Create a dictionary of existing items by GUID (commit URL) |
| 32 | +existing_items = {item.find("guid").text: item for item in channel.findall("item")} |
33 | 33 |
|
34 |
| -# Get current date in RFC 2822 format (for RSS) |
35 |
| -current_date = format_datetime(datetime.utcnow()) |
| 34 | +# Get current UTC date with tzinfo |
| 35 | +current_date = datetime.now(timezone.utc).strftime("%a, %d %b %Y %H:%M:%S GMT") |
36 | 36 |
|
37 |
| -# Add or update RSS items |
| 37 | +# Add or update items |
38 | 38 | for pdf in pdf_files:
|
39 | 39 | relative_path = os.path.relpath(pdf, docs_folder)
|
40 | 40 | commit_url = f"https://github.com/damirlj/modern_cpp_tutorials/blob/main/{relative_path}"
|
41 | 41 |
|
42 |
| - # Remove old item if it exists |
| 42 | + # If item exists, update pubDate; otherwise, create new |
43 | 43 | if commit_url in existing_items:
|
44 |
| - channel.remove(existing_items[commit_url]) |
| 44 | + existing_items[commit_url].find("pubDate").text = current_date |
| 45 | + else: |
| 46 | + item = ET.Element("item") |
| 47 | + ET.SubElement(item, "title").text = relative_path |
| 48 | + ET.SubElement(item, "link").text = commit_url |
| 49 | + ET.SubElement(item, "guid").text = commit_url |
| 50 | + ET.SubElement(item, "pubDate").text = current_date |
| 51 | + channel.append(item) |
45 | 52 |
|
46 |
| - # Create and add the new item |
47 |
| - item = ET.Element("item") |
48 |
| - ET.SubElement(item, "title").text = relative_path |
49 |
| - ET.SubElement(item, "link").text = commit_url |
50 |
| - ET.SubElement(item, "guid").text = commit_url |
51 |
| - ET.SubElement(item, "pubDate").text = current_date |
52 |
| - channel.append(item) |
53 |
| - |
54 |
| -# Sort items by pubDate descending |
55 |
| -items = channel.findall("item") |
56 |
| - |
57 |
| -# Parse pubDate strings to datetime objects for sorting |
| 53 | +# Sort all items by pubDate descending |
58 | 54 | def get_pub_date(item):
|
59 |
| - pub_date = item.find("pubDate").text |
60 |
| - return parsedate_to_datetime(pub_date) |
| 55 | + pub_date_text = item.find("pubDate").text |
| 56 | + dt = parsedate_to_datetime(pub_date_text) |
| 57 | + return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc) |
61 | 58 |
|
| 59 | +items = channel.findall("item") |
62 | 60 | items.sort(key=get_pub_date, reverse=True)
|
63 | 61 |
|
64 |
| -# Optional: Keep only the latest N entries (e.g., 20) |
65 |
| -#MAX_ENTRIES = 20 |
66 |
| -#for item in channel.findall("item"): |
67 |
| -# channel.remove(item) |
68 |
| -#for item in items[:MAX_ENTRIES]: |
69 |
| -# channel.append(item) |
| 62 | +# Clear old items and re-append in sorted order |
| 63 | +for item in channel.findall("item"): |
| 64 | + channel.remove(item) |
| 65 | +for item in items: |
| 66 | + channel.append(item) |
70 | 67 |
|
71 |
| -# Save updated RSS feed |
| 68 | +# Save the updated RSS feed |
72 | 69 | tree = ET.ElementTree(root)
|
73 | 70 | tree.write(rss_file, encoding="UTF-8", xml_declaration=True)
|
74 | 71 |
|
75 |
| -print(f"Generated RSS feed with {min(len(items), MAX_ENTRIES)} articles.") |
| 72 | +print(f"Generated RSS feed with {len(items)} article(s).") |
0 commit comments