|
1 | 1 | import os
|
2 | 2 | import xml.etree.ElementTree as ET
|
3 |
| -from datetime import datetime, timezone |
| 3 | +from datetime import datetime |
4 | 4 | from email.utils import parsedate_to_datetime
|
5 | 5 |
|
6 | 6 | # Path to your docs folder
|
|
28 | 28 | ET.SubElement(channel, "link").text = "https://github.com/damirlj/modern_cpp_tutorials"
|
29 | 29 | ET.SubElement(channel, "description").text = "New articles and updates in the docs/ folder"
|
30 | 30 |
|
31 |
| -# Create a dictionary of existing items by GUID (commit URL) |
32 |
| -existing_items = {item.find("guid").text: item for item in channel.findall("item")} |
| 31 | +# Map from GUID to <item> for quick lookup |
| 32 | +existing_items = {} |
| 33 | +for item in channel.findall("item"): |
| 34 | + guid = item.find("guid").text if item.find("guid") is not None else None |
| 35 | + if guid: |
| 36 | + existing_items[guid] = item |
33 | 37 |
|
34 |
| -# Get current UTC date with tzinfo |
35 |
| -current_date = datetime.now(timezone.utc).strftime("%a, %d %b %Y %H:%M:%S GMT") |
| 38 | +# Track updated list of items |
| 39 | +new_items = [] |
36 | 40 |
|
37 |
| -# Add or update items |
| 41 | +# Generate items from current PDF list |
38 | 42 | for pdf in pdf_files:
|
39 | 43 | relative_path = os.path.relpath(pdf, docs_folder)
|
40 | 44 | commit_url = f"https://github.com/damirlj/modern_cpp_tutorials/blob/main/{relative_path}"
|
41 | 45 |
|
42 |
| - # If item exists, update pubDate; otherwise, create new |
| 46 | + # If this article already exists, preserve pubDate |
43 | 47 | if commit_url in existing_items:
|
44 |
| - existing_items[commit_url].find("pubDate").text = current_date |
| 48 | + old_item = existing_items[commit_url] |
| 49 | + pub_date = old_item.find("pubDate").text |
| 50 | + pub_datetime = parsedate_to_datetime(pub_date) |
45 | 51 | else:
|
46 |
| - item = ET.Element("item") |
47 |
| - ET.SubElement(item, "title").text = relative_path |
48 |
| - ET.SubElement(item, "link").text = commit_url |
49 |
| - ET.SubElement(item, "guid").text = commit_url |
50 |
| - ET.SubElement(item, "pubDate").text = current_date |
51 |
| - channel.append(item) |
| 52 | + # New item - use current time |
| 53 | + pub_datetime = datetime.utcnow() |
| 54 | + |
| 55 | + item = ET.Element("item") |
| 56 | + ET.SubElement(item, "title").text = relative_path |
| 57 | + ET.SubElement(item, "link").text = commit_url |
| 58 | + ET.SubElement(item, "guid").text = commit_url |
| 59 | + ET.SubElement(item, "pubDate").text = pub_datetime.strftime("%a, %d %b %Y %H:%M:%S GMT") |
52 | 60 |
|
53 |
| -# Sort all items by pubDate descending |
54 |
| -def get_pub_date(item): |
55 |
| - pub_date_text = item.find("pubDate").text |
56 |
| - dt = parsedate_to_datetime(pub_date_text) |
57 |
| - return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc) |
| 61 | + new_items.append((pub_datetime, item)) |
58 | 62 |
|
59 |
| -items = channel.findall("item") |
60 |
| -items.sort(key=get_pub_date, reverse=True) |
| 63 | +# Sort items newest-first by pubDate |
| 64 | +new_items.sort(key=lambda x: x[0], reverse=True) |
61 | 65 |
|
62 | 66 | # Clear old items and re-append in sorted order
|
63 |
| -for item in channel.findall("item"): |
64 |
| - channel.remove(item) |
65 |
| -for item in items: |
| 67 | +for old_item in channel.findall("item"): |
| 68 | + channel.remove(old_item) |
| 69 | +for _, item in new_items: |
66 | 70 | channel.append(item)
|
67 | 71 |
|
68 | 72 | # Save the updated RSS feed
|
69 | 73 | tree = ET.ElementTree(root)
|
70 | 74 | tree.write(rss_file, encoding="UTF-8", xml_declaration=True)
|
71 | 75 |
|
72 |
| -print(f"Generated RSS feed with {len(items)} article(s).") |
| 76 | +print(f"Generated RSS feed with {len(new_items)} articles.") |
0 commit comments