From 9c7c9203eec45a25708e7b3e1e860f9ab9b87d5c Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 26 Nov 2024 17:15:43 -0500
Subject: [PATCH 01/57] Add list endpoint to get sorted list of URLs in
 collection

Response is sorted desc by page count match and includes an array
containing page_id, ts, and status for each snapshot with that URL.
---
 backend/btrixcloud/colls.py  | 109 +++++++++++++++++++++++++++++++++++
 backend/btrixcloud/models.py |  27 ++++++++-
 2 files changed, 135 insertions(+), 1 deletion(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 411e659ac9..f0035d803a 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -33,6 +33,9 @@
     OrgPublicCollections,
     PublicOrgDetails,
     CollAccessType,
+    PageUrlCount,
+    PageIdTimestamp,
+    PaginatedPageUrlCountResponse,
 )
 from .utils import dt_now
 
@@ -60,6 +63,7 @@ def __init__(self, mdb, storage_ops, orgs, event_webhook_ops):
         self.collections = mdb["collections"]
         self.crawls = mdb["crawls"]
         self.crawl_configs = mdb["crawl_configs"]
+        self.pages = mdb["pages"]
         self.crawl_ops = cast(CrawlOps, None)
 
         self.orgs = orgs
@@ -312,6 +316,17 @@ async def get_collection_search_values(self, org: Organization):
         names = [name for name in names if name]
         return {"names": names}
 
+    async def get_collection_crawl_ids(self, coll_id: UUID) -> List[str]:
+        """Return list of crawl ids in collection"""
+        crawl_ids = []
+        async for crawl_raw in self.crawls.find(
+            {"collectionIds": coll_id}, projection=["_id"]
+        ):
+            crawl_id = crawl_raw.get("_id")
+            if crawl_id:
+                crawl_ids.append(crawl_id)
+        return crawl_ids
+
     async def delete_collection(self, coll_id: UUID, org: Organization):
         """Delete collection and remove from associated crawls."""
         await self.crawl_ops.remove_collection_from_all_crawls(coll_id)
@@ -422,6 +437,78 @@ async def get_org_public_collections(self, org_slug: str):
 
         return OrgPublicCollections(org=public_org_details, collections=collections)
 
+    async def list_urls_in_collection(
+        self,
+        coll_id: UUID,
+        oid: UUID,
+        url_prefix: Optional[str] = None,
+        page_size: int = DEFAULT_PAGE_SIZE,
+        page: int = 1,
+    ) -> Tuple[List[PageUrlCount], int]:
+        """List all URLs in collection sorted desc by snapshot count"""
+        # pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements
+        # Zero-index page for query
+        page = page - 1
+        skip = page_size * page
+
+        crawl_ids = await self.get_collection_crawl_ids(coll_id)
+
+        match_query: dict[str, object] = {"oid": oid, "crawl_id": {"$in": crawl_ids}}
+
+        if url_prefix:
+            regex_pattern = f"^{url_prefix}"
+            match_query["url"] = {"$regex": regex_pattern, "$options": "i"}
+
+        aggregate = [{"$match": match_query}]
+
+        aggregate.extend(
+            [
+                {
+                    "$group": {
+                        "_id": "$url",
+                        "pages": {"$push": "$$ROOT"},
+                        "count": {"$sum": 1},
+                    },
+                },
+                {"$sort": {"count": -1}},
+                {"$set": {"url": "$_id"}},
+                {
+                    "$facet": {
+                        "items": [
+                            {"$skip": skip},
+                            {"$limit": page_size},
+                        ],
+                        "total": [{"$count": "count"}],
+                    }
+                },
+            ]
+        )
+
+        # Get total
+        cursor = self.pages.aggregate(aggregate)
+        results = await cursor.to_list(length=1)
+        result = results[0]
+        items = result["items"]
+
+        try:
+            total = int(result["total"][0]["count"])
+        except (IndexError, ValueError):
+            total = 0
+
+        return [
+            PageUrlCount(
+                url=data.get("url", ""),
+                count=data.get("count", 0),
+                snapshots=[
+                    PageIdTimestamp(
+                        pageId=p["_id"], ts=p.get("ts"), status=p.get("status", 200)
+                    )
+                    for p in data.get("pages", [])
+                ],
+            )
+            for data in items
+        ], total
+
 
 # ============================================================================
 # pylint: disable=too-many-locals
@@ -617,4 +704,26 @@ async def download_collection(
     async def get_org_public_collections(org_slug: str):
         return await colls.get_org_public_collections(org_slug)
 
+    @app.get(
+        "/orgs/{oid}/collections/{coll_id}/urls",
+        tags=["collections"],
+        response_model=PaginatedPageUrlCountResponse,
+    )
+    async def get_collection_url_list(
+        coll_id: UUID,
+        oid: UUID,
+        urlPrefix: Optional[str] = None,
+        pageSize: int = DEFAULT_PAGE_SIZE,
+        page: int = 1,
+    ):
+        """Retrieve paginated list of urls in collection sorted by snapshot count"""
+        pages, total = await ops.list_urls_in_collection(
+            coll_id=coll_id,
+            oid=oid,
+            url_prefix=urlPrefix,
+            page_size=pageSize,
+            page=page,
+        )
+        return paginated_format(pages, total, page, pageSize)
+
     return colls
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 93e708c4d0..126b67928f 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -2240,7 +2240,7 @@ class PageWithAllQA(Page):
 class PageOut(Page):
     """Model for pages output, no QA"""
 
-    status: Optional[int] = 200
+    status: int = 200
 
 
 # ============================================================================
@@ -2266,6 +2266,24 @@ class PageNoteUpdatedResponse(BaseModel):
     data: PageNote
 
 
+# ============================================================================
+class PageIdTimestamp(BaseModel):
+    """Simplified model for page info to include in PageUrlCount"""
+
+    pageId: UUID
+    ts: Optional[datetime] = None
+    status: int = 200
+
+
+# ============================================================================
+class PageUrlCount(BaseModel):
+    """Model for counting pages by URL"""
+
+    url: AnyHttpUrl
+    count: int = 0
+    snapshots: List[PageIdTimestamp] = []
+
+
 # ============================================================================
 
 ### GENERIC RESPONSE MODELS ###
@@ -2512,3 +2530,10 @@ class PaginatedUserEmailsResponse(PaginatedResponse):
     """Response model for user emails with org info"""
 
     items: List[UserEmailWithOrgInfo]
+
+
+# ============================================================================
+class PaginatedPageUrlCountResponse(PaginatedResponse):
+    """Response model for page count by url"""
+
+    items: List[PageUrlCount]

From 2041be5553f472eb6e732489619b7c8c9d7788bc Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 27 Nov 2024 11:00:04 -0500
Subject: [PATCH 02/57] Add endpoint to set or update collection home url

---
 backend/btrixcloud/colls.py  | 37 ++++++++++++++++++++++++++++++++++++
 backend/btrixcloud/main.py   |  2 ++
 backend/btrixcloud/models.py | 11 +++++++++++
 backend/btrixcloud/ops.py    |  2 ++
 4 files changed, 52 insertions(+)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index f0035d803a..1d3fbac26b 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -36,6 +36,7 @@
     PageUrlCount,
     PageIdTimestamp,
     PaginatedPageUrlCountResponse,
+    UpdateCollHomeUrl,
 )
 from .utils import dt_now
 
@@ -74,6 +75,10 @@ def set_crawl_ops(self, ops):
         """set crawl ops"""
         self.crawl_ops = ops
 
+    def set_page_ops(self, ops):
+        """set page ops"""
+        self.page_ops = ops
+
     async def init_index(self):
         """init lookup index"""
         await self.collections.create_index(
@@ -509,6 +514,26 @@ async def list_urls_in_collection(
             for data in items
         ], total
 
+    async def set_home_url(
+        self, coll_id: UUID, update: UpdateCollHomeUrl, org: Organization
+    ) -> Dict[str, bool]:
+        """Set home URL for collection and save thumbnail to database"""
+        page = await self.page_ops.get_page(update.pageId, org.id)
+
+        update_query = {
+            "homeUrl": page.url,
+            "homeUrlTs": page.ts,
+            "homeUrlPageId": page.id,
+        }
+
+        await self.collections.find_one_and_update(
+            {"_id": coll_id, "oid": org.id},
+            {"$set": query},
+            return_document=pymongo.ReturnDocument.AFTER,
+        )
+
+        return {"success": True}
+
 
 # ============================================================================
 # pylint: disable=too-many-locals
@@ -726,4 +751,16 @@ async def get_collection_url_list(
         )
         return paginated_format(pages, total, page, pageSize)
 
+    @app.post(
+        "/orgs/{oid}/collections/{coll_id}/home-url",
+        tags=["collections"],
+        response_model=SuccessResponse,
+    )
+    async def set_collection_home_url(
+        update: UpdateCollHomeUrl,
+        coll_id: UUID,
+        org: Organization = Depends(org_crawl_dep),
+    ):
+        return await colls.set_home_url(coll_id, update, org)
+
     return colls
diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py
index f6b678cb82..0abd384316 100644
--- a/backend/btrixcloud/main.py
+++ b/backend/btrixcloud/main.py
@@ -260,6 +260,8 @@ def main() -> None:
 
     crawl_config_ops.set_coll_ops(coll_ops)
 
+    coll_ops.set_page_ops(page_ops)
+
     # run only in first worker
     if run_once_lock("btrix-init-db"):
         asyncio.create_task(
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 126b67928f..51be8c43a1 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1082,6 +1082,10 @@ class Collection(BaseMongoModel):
 
     access: CollAccessType = CollAccessType.PRIVATE
 
+    homeUrl: Optional[AnyHttpUrl] = None
+    homeUrlTs: Optional[datetime] = None
+    homeUrlPageId: Optional[UUID] = None
+
 
 # ============================================================================
 class CollIn(BaseModel):
@@ -1110,6 +1114,13 @@ class UpdateColl(BaseModel):
     access: Optional[CollAccessType] = None
 
 
+# ============================================================================
+class UpdateCollHomeUrl(BaseModel):
+    """Update home url for collection"""
+
+    pageId: UUID
+
+
 # ============================================================================
 class AddRemoveCrawlList(BaseModel):
     """Collections to add or remove from collection"""
diff --git a/backend/btrixcloud/ops.py b/backend/btrixcloud/ops.py
index 23629de2aa..32e5e5fee1 100644
--- a/backend/btrixcloud/ops.py
+++ b/backend/btrixcloud/ops.py
@@ -109,6 +109,8 @@ def init_ops() -> Tuple[
 
     crawl_config_ops.set_coll_ops(coll_ops)
 
+    coll_ops.set_page_ops(page_ops)
+
     return (
         org_ops,
         crawl_config_ops,

From 02a7d1e6a436a368f4e9594523e9e3978786bfab Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 27 Nov 2024 16:15:57 -0500
Subject: [PATCH 03/57] Fixups

---
 backend/btrixcloud/colls.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 1d3fbac26b..e65b3c85a0 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -4,7 +4,7 @@
 
 from collections import Counter
 from uuid import UUID, uuid4
-from typing import Optional, List, TYPE_CHECKING, cast, Dict
+from typing import Optional, List, TYPE_CHECKING, cast, Dict, Tuple
 
 import asyncio
 import pymongo
@@ -53,7 +53,7 @@
 class CollectionOps:
     """ops for working with named collections of crawls"""
 
-    # pylint: disable=too-many-arguments
+    # pylint: disable=too-many-arguments, too-many-instance-attributes, too-many-public-methods
 
     orgs: OrgOps
     storage_ops: StorageOps
@@ -77,6 +77,7 @@ def set_crawl_ops(self, ops):
 
     def set_page_ops(self, ops):
         """set page ops"""
+        # pylint: disable=attribute-defined-outside-init
         self.page_ops = ops
 
     async def init_index(self):
@@ -528,8 +529,7 @@ async def set_home_url(
 
         await self.collections.find_one_and_update(
             {"_id": coll_id, "oid": org.id},
-            {"$set": query},
-            return_document=pymongo.ReturnDocument.AFTER,
+            {"$set": update_query},
         )
 
         return {"success": True}
@@ -742,7 +742,7 @@ async def get_collection_url_list(
         page: int = 1,
     ):
         """Retrieve paginated list of urls in collection sorted by snapshot count"""
-        pages, total = await ops.list_urls_in_collection(
+        pages, total = await colls.list_urls_in_collection(
             coll_id=coll_id,
             oid=oid,
             url_prefix=urlPrefix,

From d75c4af6f4f89bb068add5993b804d9d6be4aebc Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 27 Nov 2024 17:03:23 -0500
Subject: [PATCH 04/57] Use updated response for /home-urls endpoint

---
 backend/btrixcloud/colls.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index e65b3c85a0..ff23f68f32 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -532,7 +532,7 @@ async def set_home_url(
             {"$set": update_query},
         )
 
-        return {"success": True}
+        return {"updated": True}
 
 
 # ============================================================================
@@ -754,7 +754,7 @@ async def get_collection_url_list(
     @app.post(
         "/orgs/{oid}/collections/{coll_id}/home-url",
         tags=["collections"],
-        response_model=SuccessResponse,
+        response_model=UpdatedResponse,
     )
     async def set_collection_home_url(
         update: UpdateCollHomeUrl,

From 867fc7aab79d8b47a6ad619d1b1aaf9848e164bf Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 27 Nov 2024 17:03:33 -0500
Subject: [PATCH 05/57] Add tests

---
 backend/test/test_collections.py | 67 ++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 4faf42540d..f76d842bef 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -844,6 +844,73 @@ def test_list_public_collections_no_colls(non_default_org_id, admin_auth_headers
     assert data["collections"] == []
 
 
+def test_set_collection_home_url(
+    crawler_auth_headers, default_org_id, crawler_crawl_id
+):
+    # Get a page id from crawler_crawl_id
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data["total"] >= 1
+
+    page = data["items"][0]
+    assert page
+
+    page_id = page["id"]
+    assert page_id
+
+    page_url = page["url"]
+    page_ts = page["ts"]
+
+    # Set page as home url
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/home-url",
+        headers=crawler_auth_headers,
+        json={"pageId": page_id},
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    # Check that fields were set in collection as expected
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data["homeUrl"] == page_url
+    assert data["homeUrlTs"] == page_ts
+    assert data["homeUrlPageId"] == page_id
+
+
+def test_collection_url_list(crawler_auth_headers, default_org_id):
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/urls",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+
+    assert data["total"] >= 1
+    urls = data["items"]
+    assert urls
+
+    for url in urls:
+        assert url["url"]
+        assert url["count"] >= 1
+
+        snapshots = url["snapshots"]
+        assert snapshots
+
+        for snapshot in snapshots:
+            assert snapshot["pageId"]
+            assert snapshot["ts"]
+            assert snapshot["status"]
+
+
 def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
     # Delete second collection
     r = requests.delete(

From 52404c6c3f88d0998aff024354f7256550ebc186 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Thu, 28 Nov 2024 15:09:49 -0500
Subject: [PATCH 06/57] WIP: Add collection thumbnail upload

Also reorganizes some utility classes and collection get methods.

Caveats:
- Generates a new presigned url for the thumbnail image each time
CollOut is created (may want to do something similar to crawl files
and track expiration date)
---
 backend/btrixcloud/basecrawls.py |  19 +---
 backend/btrixcloud/colls.py      | 135 ++++++++++++++++++----
 backend/btrixcloud/main.py       |   4 +-
 backend/btrixcloud/models.py     | 190 ++++++++++++++++++++++++++++++-
 backend/btrixcloud/uploads.py    |  43 +------
 5 files changed, 313 insertions(+), 78 deletions(-)

diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py
index d913d3362b..5106487610 100644
--- a/backend/btrixcloud/basecrawls.py
+++ b/backend/btrixcloud/basecrawls.py
@@ -1,6 +1,5 @@
 """ base crawl type """
 
-import os
 from datetime import timedelta
 from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast, Tuple
 from uuid import UUID
@@ -29,6 +28,7 @@
     UpdatedResponse,
     DeletedResponseQuota,
     CrawlSearchValuesResponse,
+    PRESIGN_DURATION_SECONDS,
 )
 from .pagination import paginated_format, DEFAULT_PAGE_SIZE
 from .utils import dt_now, date_to_str
@@ -47,11 +47,6 @@
     CrawlConfigOps = UserManager = OrgOps = CollectionOps = PageOps = object
     StorageOps = EventWebhookOps = BackgroundJobOps = object
 
-# Presign duration must be less than 604800 seconds (one week),
-# so set this one minute short of a week.
-PRESIGN_MINUTES_MAX = 10079
-PRESIGN_MINUTES_DEFAULT = PRESIGN_MINUTES_MAX
-
 
 # ============================================================================
 # pylint: disable=too-many-instance-attributes, too-many-public-methods, too-many-lines
@@ -93,16 +88,8 @@ def __init__(
         self.background_job_ops = background_job_ops
         self.page_ops = cast(PageOps, None)
 
-        presign_duration_minutes = int(
-            os.environ.get("PRESIGN_DURATION_MINUTES") or PRESIGN_MINUTES_DEFAULT
-        )
-
-        self.presign_duration_seconds = (
-            min(presign_duration_minutes, PRESIGN_MINUTES_MAX) * 60
-        )
-
         # renew when <25% of time remaining
-        self.expire_at_duration_seconds = int(self.presign_duration_seconds * 0.75)
+        self.expire_at_duration_seconds = int(PRESIGN_DURATION_SECONDS * 0.75)
 
     def set_page_ops(self, page_ops):
         """set page ops reference"""
@@ -474,7 +461,7 @@ async def resolve_signed_urls(
             ):
                 exp = now + delta
                 presigned_url = await self.storage_ops.get_presigned_url(
-                    org, file_, self.presign_duration_seconds
+                    org, file_, PRESIGN_DURATION_SECONDS
                 )
 
                 prefix = "files"
diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index ff23f68f32..f9a64df2cd 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -4,12 +4,14 @@
 
 from collections import Counter
 from uuid import UUID, uuid4
-from typing import Optional, List, TYPE_CHECKING, cast, Dict, Tuple
+from typing import Optional, List, TYPE_CHECKING, cast, Dict, Tuple, Any
+import os
 
 import asyncio
 import pymongo
 from fastapi import Depends, HTTPException, Response
 from fastapi.responses import StreamingResponse
+from starlette.requests import Request
 
 from .pagination import DEFAULT_PAGE_SIZE, paginated_format
 from .models import (
@@ -29,6 +31,7 @@
     EmptyResponse,
     UpdatedResponse,
     SuccessResponse,
+    AddedResponse,
     CollectionSearchValuesResponse,
     OrgPublicCollections,
     PublicOrgDetails,
@@ -37,6 +40,10 @@
     PageIdTimestamp,
     PaginatedPageUrlCountResponse,
     UpdateCollHomeUrl,
+    User,
+    ImageFile,
+    ImageFilePreparer,
+    MIN_UPLOAD_PART_SIZE,
 )
 from .utils import dt_now
 
@@ -170,7 +177,7 @@ async def add_crawls_to_collection(
             )
         )
 
-        return await self.get_collection(coll_id, org)
+        return await self.get_collection_out(coll_id, org)
 
     async def remove_crawls_from_collection(
         self, coll_id: UUID, crawl_ids: List[str], org: Organization
@@ -194,12 +201,12 @@ async def remove_crawls_from_collection(
             )
         )
 
-        return await self.get_collection(coll_id, org)
+        return await self.get_collection_out(coll_id, org)
 
-    async def get_collection(
-        self, coll_id: UUID, org: Organization, resources=False, public_only=False
-    ) -> CollOut:
-        """Get collection by id"""
+    async def get_collection_raw(
+        self, coll_id: UUID, public_only: bool = False
+    ) -> Dict[str, Any]:
+        """Get collection by id as dict from database"""
         query: dict[str, object] = {"_id": coll_id}
         if public_only:
             query["access"] = {"$in": ["public", "unlisted"]}
@@ -208,10 +215,31 @@ async def get_collection(
         if not result:
             raise HTTPException(status_code=404, detail="collection_not_found")
 
+        return result
+
+    async def get_collection(
+        self, coll_id: UUID, public_only: bool = False
+    ) -> Collection:
+        """Get collection by id"""
+        result = await self.get_collection_raw(coll_id, public_only)
+        return Collection.from_dict(result)
+
+    async def get_collection_out(
+        self, coll_id: UUID, org: Organization, resources=False, public_only=False
+    ) -> CollOut:
+        """Get CollOut by id"""
+        result = await self.get_collection_raw(coll_id, public_only)
+
         if resources:
-            result["resources"] = await self.get_collection_crawl_resources(
-                coll_id, org
+            result["resources"] = await self.get_collection_crawl_resources(coll_id)
+
+        thumbnail = result.get("thumbnail")
+        if thumbnail:
+            image_file = ImageFile(**thumbnail)
+            result["thumbnail"] = await image_file.get_image_file_out(
+                org, self.storage_ops
             )
+
         return CollOut.from_dict(result)
 
     async def list_collections(
@@ -283,11 +311,10 @@ async def list_collections(
 
         return collections, total
 
-    async def get_collection_crawl_resources(self, coll_id: UUID, org: Organization):
+    async def get_collection_crawl_resources(self, coll_id: UUID):
         """Return pre-signed resources for all collection crawl files."""
-        coll = await self.get_collection(coll_id, org)
-        if not coll:
-            raise HTTPException(status_code=404, detail="collection_not_found")
+        # Ensure collection exists
+        _ = await self.get_collection_raw(coll_id)
 
         all_files = []
 
@@ -349,7 +376,7 @@ async def delete_collection(self, coll_id: UUID, org: Organization):
 
     async def download_collection(self, coll_id: UUID, org: Organization):
         """Download all WACZs in collection as streaming nested WACZ"""
-        coll = await self.get_collection(coll_id, org, resources=True)
+        coll = await self.get_collection_out(coll_id, org, resources=True)
 
         metadata = {
             "type": "collection",
@@ -534,10 +561,64 @@ async def set_home_url(
 
         return {"updated": True}
 
+    async def upload_thumbnail_stream(
+        self, stream, filename: str, coll_id: UUID, org: Organization, user: User
+    ) -> Dict[str, bool]:
+        """Upload file as stream to use as collection thumbnail"""
+        coll = await self.get_collection(coll_id)
+
+        _, extension = os.path.splitext(filename)
+
+        image_filename = f"thumbnail-{str(coll_id)}{extension}"
+
+        prefix = org.storage.get_storage_extra_path(str(org.id)) + "images/"
+
+        file_prep = ImageFilePreparer(
+            prefix,
+            image_filename,
+            original_filename=filename,
+            user=user,
+            created=dt_now(),
+        )
+
+        async def stream_iter():
+            """iterate over each chunk and compute and digest + total size"""
+            async for chunk in stream:
+                file_prep.add_chunk(chunk)
+                yield chunk
+
+        print("Collection Thumbnail Stream Upload Start", flush=True)
+
+        if not await self.storage_ops.do_upload_multipart(
+            org,
+            file_prep.upload_name,
+            stream_iter(),
+            MIN_UPLOAD_PART_SIZE,
+        ):
+            print("Collection Thumbnail Stream Upload Failed", flush=True)
+            raise HTTPException(status_code=400, detail="upload_failed")
+
+        print("Collection Thumbnail Stream Upload Complete", flush=True)
+
+        thumbnail_file = file_prep.get_image_file(org.storage)
+
+        if coll.thumbnail:
+            if not await self.storage_ops.delete_crawl_file_object(org, coll.thumbnail):
+                print(
+                    f"Unable to delete previous collection thumbnail: {coll.thumbnail.filename}"
+                )
+
+        await self.collections.find_one_and_update(
+            {"_id": coll_id, "oid": org.id},
+            {"$set": {"thumbnail": dict(thumbnail_file)}},
+        )
+
+        return {"added": True}
+
 
 # ============================================================================
 # pylint: disable=too-many-locals
-def init_collections_api(app, mdb, orgs, storage_ops, event_webhook_ops):
+def init_collections_api(app, mdb, orgs, storage_ops, event_webhook_ops, user_dep):
     """init collections api"""
     # pylint: disable=invalid-name, unused-argument, too-many-arguments
 
@@ -595,7 +676,7 @@ async def get_collection_all(org: Organization = Depends(org_viewer_dep)):
             all_collections, _ = await colls.list_collections(org.id, page_size=10_000)
             for collection in all_collections:
                 results[collection.name] = await colls.get_collection_crawl_resources(
-                    collection.id, org
+                    collection.id
                 )
         except Exception as exc:
             # pylint: disable=raise-missing-from
@@ -623,7 +704,7 @@ async def get_collection_search_values(
     async def get_collection(
         coll_id: UUID, org: Organization = Depends(org_viewer_dep)
     ):
-        return await colls.get_collection(coll_id, org)
+        return await colls.get_collection_out(coll_id, org)
 
     @app.get(
         "/orgs/{oid}/collections/{coll_id}/replay.json",
@@ -633,7 +714,7 @@ async def get_collection(
     async def get_collection_replay(
         coll_id: UUID, org: Organization = Depends(org_viewer_dep)
     ):
-        return await colls.get_collection(coll_id, org, resources=True)
+        return await colls.get_collection_out(coll_id, org, resources=True)
 
     @app.get(
         "/orgs/{oid}/collections/{coll_id}/public/replay.json",
@@ -645,7 +726,7 @@ async def get_collection_public_replay(
         coll_id: UUID,
         org: Organization = Depends(org_public),
     ):
-        coll = await colls.get_collection(
+        coll = await colls.get_collection_out(
             coll_id, org, resources=True, public_only=True
         )
         response.headers["Access-Control-Allow-Origin"] = "*"
@@ -763,4 +844,20 @@ async def set_collection_home_url(
     ):
         return await colls.set_home_url(coll_id, update, org)
 
+    @app.put(
+        "/orgs/{oid}/collections/{coll_id}/upload/thumbnail",
+        tags=["collections"],
+        response_model=AddedResponse,
+    )
+    async def upload_stream(
+        request: Request,
+        filename: str,
+        coll_id: UUID,
+        org: Organization = Depends(org_crawl_dep),
+        user: User = Depends(user_dep),
+    ):
+        return await colls.upload_thumbnail_stream(
+            request.stream(), filename, coll_id, org, user
+        )
+
     return colls
diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py
index 0abd384316..7119214fc5 100644
--- a/backend/btrixcloud/main.py
+++ b/backend/btrixcloud/main.py
@@ -223,7 +223,9 @@ def main() -> None:
         profiles,
     )
 
-    coll_ops = init_collections_api(app, mdb, org_ops, storage_ops, event_webhook_ops)
+    coll_ops = init_collections_api(
+        app, mdb, org_ops, storage_ops, event_webhook_ops, current_active_user
+    )
 
     base_crawl_init = (
         app,
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 51be8c43a1..a7b1c13e8a 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -5,6 +5,9 @@
 from datetime import datetime
 from enum import Enum, IntEnum
 from uuid import UUID
+import base64
+import hashlib
+import mimetypes
 import os
 
 from typing import Optional, List, Dict, Union, Literal, Any, get_args
@@ -21,6 +24,7 @@
     BeforeValidator,
     TypeAdapter,
 )
+from pathvalidate import sanitize_filename
 
 # from fastapi_users import models as fastapi_users_models
 
@@ -29,6 +33,20 @@
 # crawl scale for constraint
 MAX_CRAWL_SCALE = int(os.environ.get("MAX_CRAWL_SCALE", 3))
 
+# Presign duration must be less than 604800 seconds (one week),
+# so set this one minute short of a week
+PRESIGN_MINUTES_MAX = 10079
+PRESIGN_MINUTES_DEFAULT = PRESIGN_MINUTES_MAX
+
+# Expire duration seconds for presigned urls
+PRESIGN_DURATION_MINUTES = int(
+    os.environ.get("PRESIGN_DURATION_MINUTES") or PRESIGN_MINUTES_DEFAULT
+)
+PRESIGN_DURATION_SECONDS = min(PRESIGN_DURATION_MINUTES, PRESIGN_MINUTES_MAX) * 60
+
+# Minimum part size for file uploads
+MIN_UPLOAD_PART_SIZE = 10000000
+
 # annotated types
 # ============================================================================
 
@@ -1050,6 +1068,155 @@ class UpdateUpload(UpdateCrawl):
     """Update modal that also includes name"""
 
 
+# ============================================================================
+class FilePreparer:
+    """wrapper to compute digest / name for streaming upload"""
+
+    def __init__(self, prefix, filename):
+        self.upload_size = 0
+        self.upload_hasher = hashlib.sha256()
+        self.upload_name = prefix + self.prepare_filename(filename)
+
+    def add_chunk(self, chunk):
+        """add chunk for file"""
+        self.upload_size += len(chunk)
+        self.upload_hasher.update(chunk)
+
+    def get_crawl_file(self, storage: StorageRef):
+        """get crawl file"""
+        return CrawlFile(
+            filename=self.upload_name,
+            hash=self.upload_hasher.hexdigest(),
+            size=self.upload_size,
+            storage=storage,
+        )
+
+    def prepare_filename(self, filename):
+        """prepare filename by sanitizing and adding extra string
+        to avoid duplicates"""
+        name = sanitize_filename(filename.rsplit("/", 1)[-1])
+        parts = name.split(".")
+        randstr = base64.b32encode(os.urandom(5)).lower()
+        parts[0] += "-" + randstr.decode("utf-8")
+        return ".".join(parts)
+
+
+# ============================================================================
+
+### USER-UPLOADED IMAGES ###
+
+
+# ============================================================================
+class ImageFileOut(BaseModel):
+    """output for user-upload imaged file (conformance to Data Resource Spec)"""
+
+    name: str
+    path: str
+    hash: str
+    size: int
+
+    originalFilename: str
+    mime: str
+    userid: UUID
+    userName: str
+    created: datetime
+
+
+# ============================================================================
+# class PublicImageFileOut(BaseModel):
+#     """public output for user-upload imaged file (conformance to Data Resource Spec)"""
+
+#     name: str
+#     path: str
+#     hash: str
+#     size: int
+
+#     mime: str
+
+
+# ============================================================================
+class ImageFile(BaseFile):
+    """User-uploaded image file"""
+
+    originalFilename: str
+    mime: str
+    userid: UUID
+    userName: str
+    created: datetime
+
+    async def get_image_file_out(self, org, storage_ops) -> ImageFileOut:
+        """Get ImageFileOut with new presigned url"""
+        presigned_url = await storage_ops.get_presigned_url(
+            org, self, PRESIGN_DURATION_SECONDS
+        )
+
+        return ImageFileOut(
+            name=self.filename,
+            path=presigned_url or "",
+            hash=self.hash,
+            size=self.size,
+            originalFilename=self.originalFilename,
+            mime=self.mime,
+            userid=self.userid,
+            userName=self.userName,
+            created=self.created,
+        )
+
+    # async def get_public_image_file_out(self, org, storage_ops) -> PublicImageFileOut:
+    #     """Get PublicImageFileOut with new presigned url"""
+    #     presigned_url = await storage_ops.get_presigned_url(
+    #         org, self, PRESIGN_DURATION_SECONDS
+    #     )
+
+    #     return PublicImageFileOut(
+    #         name=self.filename,
+    #         path=presigned_url or "",
+    #         hash=self.hash,
+    #         size=self.size,
+    #         mime=self.mime,
+    #     )
+
+
+# ============================================================================
+class ImageFilePreparer(FilePreparer):
+    """Wrapper for user image streaming uploads"""
+
+    # pylint: disable=too-many-arguments, too-many-function-args
+
+    def __init__(
+        self,
+        prefix,
+        filename,
+        original_filename: str,
+        user: User,
+        created: datetime,
+    ):
+        super().__init__(prefix, filename)
+
+        self.original_filename = original_filename
+        self.mime, _ = mimetypes.guess_type(original_filename) or ("image/jpeg", None)
+        self.userid = user.id
+        self.user_name = user.name
+        self.created = created
+
+    def get_image_file(
+        self,
+        storage: StorageRef,
+    ) -> ImageFile:
+        """get user-uploaded image file"""
+        return ImageFile(
+            filename=self.upload_name,
+            hash=self.upload_hasher.hexdigest(),
+            size=self.upload_size,
+            storage=storage,
+            originalFilename=self.original_filename,
+            mime=self.mime,
+            userid=self.userid,
+            userName=self.user_name,
+            created=self.created,
+        )
+
+
 # ============================================================================
 
 ### COLLECTIONS ###
@@ -1086,6 +1253,8 @@ class Collection(BaseMongoModel):
     homeUrlTs: Optional[datetime] = None
     homeUrlPageId: Optional[UUID] = None
 
+    thumbnail: Optional[ImageFile] = None
+
 
 # ============================================================================
 class CollIn(BaseModel):
@@ -1099,10 +1268,29 @@ class CollIn(BaseModel):
 
 
 # ============================================================================
-class CollOut(Collection):
+class CollOut(BaseMongoModel):
     """Collection output model with annotations."""
 
+    name: str
+    oid: UUID
+    description: Optional[str] = None
+    modified: Optional[datetime] = None
+
+    crawlCount: Optional[int] = 0
+    pageCount: Optional[int] = 0
+    totalSize: Optional[int] = 0
+
+    # Sorted by count, descending
+    tags: Optional[List[str]] = []
+
+    access: CollAccessType = CollAccessType.PRIVATE
+
+    homeUrl: Optional[AnyHttpUrl] = None
+    homeUrlTs: Optional[datetime] = None
+    homeUrlPageId: Optional[UUID] = None
+
     resources: List[CrawlFileOut] = []
+    thumbnail: Optional[ImageFileOut] = None
 
 
 # ============================================================================
diff --git a/backend/btrixcloud/uploads.py b/backend/btrixcloud/uploads.py
index ded0630719..7257c875e6 100644
--- a/backend/btrixcloud/uploads.py
+++ b/backend/btrixcloud/uploads.py
@@ -1,9 +1,6 @@
 """ handle user uploads into browsertrix """
 
 import uuid
-import hashlib
-import os
-import base64
 from urllib.parse import unquote
 from uuid import UUID
 
@@ -13,7 +10,6 @@
 from fastapi import Depends, UploadFile, File
 from fastapi import HTTPException
 from starlette.requests import Request
-from pathvalidate import sanitize_filename
 
 from .basecrawls import BaseCrawlOps
 from .storages import CHUNK_SIZE
@@ -27,18 +23,16 @@
     Organization,
     PaginatedCrawlOutResponse,
     User,
-    StorageRef,
     UpdatedResponse,
     DeletedResponseQuota,
     AddedResponseIdQuota,
+    FilePreparer,
+    MIN_UPLOAD_PART_SIZE,
 )
 from .pagination import paginated_format, DEFAULT_PAGE_SIZE
 from .utils import dt_now
 
 
-MIN_UPLOAD_PART_SIZE = 10000000
-
-
 # ============================================================================
 class UploadOps(BaseCrawlOps):
     """upload ops"""
@@ -224,39 +218,6 @@ async def delete_uploads(
         return {"deleted": True, "storageQuotaReached": quota_reached}
 
 
-# ============================================================================
-class FilePreparer:
-    """wrapper to compute digest / name for streaming upload"""
-
-    def __init__(self, prefix, filename):
-        self.upload_size = 0
-        self.upload_hasher = hashlib.sha256()
-        self.upload_name = prefix + self.prepare_filename(filename)
-
-    def add_chunk(self, chunk):
-        """add chunk for file"""
-        self.upload_size += len(chunk)
-        self.upload_hasher.update(chunk)
-
-    def get_crawl_file(self, storage: StorageRef):
-        """get crawl file"""
-        return CrawlFile(
-            filename=self.upload_name,
-            hash=self.upload_hasher.hexdigest(),
-            size=self.upload_size,
-            storage=storage,
-        )
-
-    def prepare_filename(self, filename):
-        """prepare filename by sanitizing and adding extra string
-        to avoid duplicates"""
-        name = sanitize_filename(filename.rsplit("/", 1)[-1])
-        parts = name.split(".")
-        randstr = base64.b32encode(os.urandom(5)).lower()
-        parts[0] += "-" + randstr.decode("utf-8")
-        return ".".join(parts)
-
-
 # ============================================================================
 class UploadFileReader(BufferedReader):
     """Compute digest on file upload"""

From 130ee51ed2bd206dcceafd47a9785c8cf6516f13 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Thu, 28 Nov 2024 15:55:23 -0500
Subject: [PATCH 07/57] Add collection thumbnail upload test

---
 backend/btrixcloud/colls.py      |   8 +-
 backend/test/data/thumbnail.jpg  | 738 +++++++++++++++++++++++++++++++
 backend/test/test_collections.py |  29 ++
 3 files changed, 773 insertions(+), 2 deletions(-)
 create mode 100644 backend/test/data/thumbnail.jpg

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index f9a64df2cd..22ad1e8d3d 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -608,9 +608,13 @@ async def stream_iter():
                     f"Unable to delete previous collection thumbnail: {coll.thumbnail.filename}"
                 )
 
+        coll.thumbnail = thumbnail_file
+
+        # Update entire document to avoid bson.errors.InvalidDocument error
+        # with thumbnail
         await self.collections.find_one_and_update(
             {"_id": coll_id, "oid": org.id},
-            {"$set": {"thumbnail": dict(thumbnail_file)}},
+            {"$set": coll.to_dict()},
         )
 
         return {"added": True}
@@ -845,7 +849,7 @@ async def set_collection_home_url(
         return await colls.set_home_url(coll_id, update, org)
 
     @app.put(
-        "/orgs/{oid}/collections/{coll_id}/upload/thumbnail",
+        "/orgs/{oid}/collections/{coll_id}/stream/thumbnail",
         tags=["collections"],
         response_model=AddedResponse,
     )
diff --git a/backend/test/data/thumbnail.jpg b/backend/test/data/thumbnail.jpg
new file mode 100644
index 0000000000..e746e341f2
--- /dev/null
+++ b/backend/test/data/thumbnail.jpg
@@ -0,0 +1,738 @@
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+      
+      
+      
+      
+      
+      <link rel="icon" href="/docs/assets/brand/favicon.svg">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.45">
+    
+    
+      
+        <title>ReplayWeb.page Docs</title>
+      
+    
+    
+      <link rel="stylesheet" href="/assets/stylesheets/main.0253249f.min.css">
+      
+        
+        <link rel="stylesheet" href="/assets/stylesheets/palette.06af60db.min.css">
+      
+      
+  
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+    
+    
+  
+  
+  <style>:root{--md-admonition-icon--note:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-pencil-fill" viewBox="0 0 16 16">  <path d="M12.854.146a.5.5 0 0 0-.707 0L10.5 1.793 14.207 5.5l1.647-1.646a.5.5 0 0 0 0-.708l-3-3zm.646 6.061L9.793 2.5 3.293 9H3.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.207l6.5-6.5zm-7.468 7.468A.5.5 0 0 1 6 13.5V13h-.5a.5.5 0 0 1-.5-.5V12h-.5a.5.5 0 0 1-.5-.5V11h-.5a.5.5 0 0 1-.5-.5V10h-.5a.499.499 0 0 1-.175-.032l-.179.178a.5.5 0 0 0-.11.168l-2 5a.5.5 0 0 0 .65.65l5-2a.5.5 0 0 0 .168-.11l.178-.178z"/></svg>');--md-admonition-icon--abstract:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-file-earmark-text-fill" viewBox="0 0 16 16">  <path d="M9.293 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2V4.707A1 1 0 0 0 13.707 4L10 .293A1 1 0 0 0 9.293 0zM9.5 3.5v-2l3 3h-2a1 1 0 0 1-1-1zM4.5 9a.5.5 0 0 1 0-1h7a.5.5 0 0 1 0 1h-7zM4 10.5a.5.5 0 0 1 .5-.5h7a.5.5 0 0 1 0 1h-7a.5.5 0 0 1-.5-.5zm.5 2.5a.5.5 0 0 1 0-1h4a.5.5 0 0 1 0 1h-4z"/></svg>');--md-admonition-icon--info:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-info-circle-fill" viewBox="0 0 16 16">  <path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm.93-9.412-1 4.705c-.07.34.029.533.304.533.194 0 .487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703 0-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381 2.29-.287zM8 5.5a1 1 0 1 1 0-2 1 1 0 0 1 0 2z"/></svg>');--md-admonition-icon--tip:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-exclamation-circle-fill" viewBox="0 0 16 16">  <path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM8 4a.905.905 0 0 0-.9.995l.35 3.507a.552.552 0 0 0 1.1 0l.35-3.507A.905.905 0 0 0 8 4zm.002 6a1 1 0 1 0 0 2 1 1 0 0 0 0-2z"/></svg>');--md-admonition-icon--success:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-check-circle-fill" viewBox="0 0 16 16">  <path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/></svg>');--md-admonition-icon--question:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-question-circle-fill" viewBox="0 0 16 16">  <path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.496 6.033h.825c.138 0 .248-.113.266-.25.09-.656.54-1.134 1.342-1.134.686 0 1.314.343 1.314 1.168 0 .635-.374.927-.965 1.371-.673.489-1.206 1.06-1.168 1.987l.003.217a.25.25 0 0 0 .25.246h.811a.25.25 0 0 0 .25-.25v-.105c0-.718.273-.927 1.01-1.486.609-.463 1.244-.977 1.244-2.056 0-1.511-1.276-2.241-2.673-2.241-1.267 0-2.655.59-2.75 2.286a.237.237 0 0 0 .241.247zm2.325 6.443c.61 0 1.029-.394 1.029-.927 0-.552-.42-.94-1.029-.94-.584 0-1.009.388-1.009.94 0 .533.425.927 1.01.927z"/></svg>');--md-admonition-icon--warning:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-exclamation-triangle-fill" viewBox="0 0 16 16">  <path d="M8.982 1.566a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566zM8 5c.535 0 .954.462.9.995l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995A.905.905 0 0 1 8 5zm.002 6a1 1 0 1 1 0 2 1 1 0 0 1 0-2z"/></svg>');--md-admonition-icon--failure:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-x-octagon-fill" viewBox="0 0 16 16">  <path d="M11.46.146A.5.5 0 0 0 11.107 0H4.893a.5.5 0 0 0-.353.146L.146 4.54A.5.5 0 0 0 0 4.893v6.214a.5.5 0 0 0 .146.353l4.394 4.394a.5.5 0 0 0 .353.146h6.214a.5.5 0 0 0 .353-.146l4.394-4.394a.5.5 0 0 0 .146-.353V4.893a.5.5 0 0 0-.146-.353L11.46.146zm-6.106 4.5L8 7.293l2.646-2.647a.5.5 0 0 1 .708.708L8.707 8l2.647 2.646a.5.5 0 0 1-.708.708L8 8.707l-2.646 2.647a.5.5 0 0 1-.708-.708L7.293 8 4.646 5.354a.5.5 0 1 1 .708-.708z"/></svg>');--md-admonition-icon--danger:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-exclamation-diamond-fill" viewBox="0 0 16 16">  <path d="M9.05.435c-.58-.58-1.52-.58-2.1 0L.436 6.95c-.58.58-.58 1.519 0 2.098l6.516 6.516c.58.58 1.519.58 2.098 0l6.516-6.516c.58-.58.58-1.519 0-2.098L9.05.435zM8 4c.535 0 .954.462.9.995l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 4.995A.905.905 0 0 1 8 4zm.002 6a1 1 0 1 1 0 2 1 1 0 0 1 0-2z"/></svg>');--md-admonition-icon--bug:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-bug-fill" viewBox="0 0 16 16">  <path d="M4.978.855a.5.5 0 1 0-.956.29l.41 1.352A4.985 4.985 0 0 0 3 6h10a4.985 4.985 0 0 0-1.432-3.503l.41-1.352a.5.5 0 1 0-.956-.29l-.291.956A4.978 4.978 0 0 0 8 1a4.979 4.979 0 0 0-2.731.811l-.29-.956z"/>  <path d="M13 6v1H8.5v8.975A5 5 0 0 0 13 11h.5a.5.5 0 0 1 .5.5v.5a.5.5 0 1 0 1 0v-.5a1.5 1.5 0 0 0-1.5-1.5H13V9h1.5a.5.5 0 0 0 0-1H13V7h.5A1.5 1.5 0 0 0 15 5.5V5a.5.5 0 0 0-1 0v.5a.5.5 0 0 1-.5.5H13zm-5.5 9.975V7H3V6h-.5a.5.5 0 0 1-.5-.5V5a.5.5 0 0 0-1 0v.5A1.5 1.5 0 0 0 2.5 7H3v1H1.5a.5.5 0 0 0 0 1H3v1h-.5A1.5 1.5 0 0 0 1 11.5v.5a.5.5 0 1 0 1 0v-.5a.5.5 0 0 1 .5-.5H3a5 5 0 0 0 4.5 4.975z"/></svg>');--md-admonition-icon--example:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-mortarboard-fill" viewBox="0 0 16 16">  <path d="M8.211 2.047a.5.5 0 0 0-.422 0l-7.5 3.5a.5.5 0 0 0 .025.917l7.5 3a.5.5 0 0 0 .372 0L14 7.14V13a1 1 0 0 0-1 1v2h3v-2a1 1 0 0 0-1-1V6.739l.686-.275a.5.5 0 0 0 .025-.917l-7.5-3.5Z"/>  <path d="M4.176 9.032a.5.5 0 0 0-.656.327l-.5 1.7a.5.5 0 0 0 .294.605l4.5 1.8a.5.5 0 0 0 .372 0l4.5-1.8a.5.5 0 0 0 .294-.605l-.5-1.7a.5.5 0 0 0-.656-.327L8 10.466 4.176 9.032Z"/></svg>');--md-admonition-icon--quote:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-quote" viewBox="0 0 16 16">  <path d="M12 12a1 1 0 0 0 1-1V8.558a1 1 0 0 0-1-1h-1.388c0-.351.021-.703.062-1.054.062-.372.166-.703.31-.992.145-.29.331-.517.559-.683.227-.186.516-.279.868-.279V3c-.579 0-1.085.124-1.52.372a3.322 3.322 0 0 0-1.085.992 4.92 4.92 0 0 0-.62 1.458A7.712 7.712 0 0 0 9 7.558V11a1 1 0 0 0 1 1h2Zm-6 0a1 1 0 0 0 1-1V8.558a1 1 0 0 0-1-1H4.612c0-.351.021-.703.062-1.054.062-.372.166-.703.31-.992.145-.29.331-.517.559-.683.227-.186.516-.279.868-.279V3c-.579 0-1.085.124-1.52.372a3.322 3.322 0 0 0-1.085.992 4.92 4.92 0 0 0-.62 1.458A7.712 7.712 0 0 0 3 7.558V11a1 1 0 0 0 1 1h2Z"/></svg>');}</style>
+
+
+
+    
+    
+      
+    
+    
+      
+        
+        
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
+        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+      
+    
+    
+      <link rel="stylesheet" href="/docs/stylesheets/extra.css">
+    
+    <script>__md_scope=new URL("/",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    
+      
+
+    
+    
+    
+  </head>
+  
+  
+    
+    
+    
+    
+    
+    <body dir="ltr" data-md-color-scheme="webrecorder" data-md-color-primary="indigo" data-md-color-accent="indigo">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+    
+      
+
+  
+
+<header class="md-header md-header--shadow md-header--lifted" data-md-component="header">
+  <nav class="md-header__inner md-grid" aria-label="Header">
+    <a href="/." title="ReplayWeb.page Docs" class="md-header__button md-logo" aria-label="ReplayWeb.page Docs" data-md-component="logo">
+      
+  <img src="/docs/assets/brand/replaywebpage-icon-white.svg" alt="logo">
+
+    </a>
+    <label class="md-header__button md-icon" for="__drawer">
+      
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
+    </label>
+    <div class="md-header__title" data-md-component="header-title">
+      <div class="md-header__ellipsis">
+        <div class="md-header__topic">
+          <span class="md-ellipsis">
+            ReplayWeb.page Docs
+          </span>
+        </div>
+        <div class="md-header__topic" data-md-component="header-topic">
+          <span class="md-ellipsis">
+            
+              
+            
+          </span>
+        </div>
+      </div>
+    </div>
+    
+      
+    
+    
+    
+    
+      <label class="md-header__button md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+      </label>
+      <div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
+      <label class="md-search__icon md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
+      </label>
+      <nav class="md-search__options" aria-label="Search">
+        
+        <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
+          
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
+        </button>
+      </nav>
+      
+        <div class="md-search__suggest" data-md-component="search-suggest"></div>
+      
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list" role="presentation"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header__source">
+        <a href="https://github.com/webrecorder/replayweb.page" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">
+  <path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
+</svg>
+  </div>
+  <div class="md-source__repository">
+    ReplayWeb.page
+  </div>
+</a>
+      </div>
+    
+  </nav>
+  
+    
+      
+<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
+  <div class="md-grid">
+    <ul class="md-tabs__list">
+      
+        
+  
+  
+  
+    <li class="md-tabs__item">
+      <a href="/docs/" class="md-tabs__link">
+        
+  
+    
+  
+  Home
+
+      </a>
+    </li>
+  
+
+      
+        
+  
+  
+  
+    
+    
+      <li class="md-tabs__item">
+        <a href="/docs/embedding/" class="md-tabs__link">
+          
+  
+    
+  
+  Embedding
+
+        </a>
+      </li>
+    
+  
+
+      
+        
+  
+  
+  
+    
+    
+      <li class="md-tabs__item">
+        <a href="/docs/user-guide/" class="md-tabs__link">
+          
+  
+    
+  
+  User Guide
+
+        </a>
+      </li>
+    
+  
+
+      
+    </ul>
+  </div>
+</nav>
+    
+  
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+      
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              
+              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+
+  
+
+
+<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href="/." title="ReplayWeb.page Docs" class="md-nav__button md-logo" aria-label="ReplayWeb.page Docs" data-md-component="logo">
+      
+  <img src="/docs/assets/brand/replaywebpage-icon-white.svg" alt="logo">
+
+    </a>
+    ReplayWeb.page Docs
+  </label>
+  
+    <div class="md-nav__source">
+      <a href="https://github.com/webrecorder/replayweb.page" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">
+  <path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
+</svg>
+  </div>
+  <div class="md-source__repository">
+    ReplayWeb.page
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/docs/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Home
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+      
+      
+        
+      
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="/docs/embedding/" class="md-nav__link ">
+              
+  
+  <span class="md-ellipsis">
+    Embedding
+  </span>
+  
+
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="0">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_2">
+            <span class="md-nav__icon md-icon"></span>
+            Embedding
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/docs/embedding/cors-settings/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Configuring CORS
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+      
+        
+          
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+    
+    
+      
+      
+        
+      
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="/docs/user-guide/" class="md-nav__link ">
+              
+  
+  <span class="md-ellipsis">
+    User Guide
+  </span>
+  
+
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="0">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_3">
+            <span class="md-nav__icon md-icon"></span>
+            User Guide
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/docs/user-guide/loading/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Loading Archived Items
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/docs/user-guide/locations/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Supported Location URLs
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/docs/user-guide/exploring/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Exploring Archived Content
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/docs/user-guide/sharing/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Sharing Archived Items
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/docs/user-guide/offline-use/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Offline Use
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          
+            <div class="md-content" data-md-component="content">
+              <article class="md-content__inner md-typeset">
+                
+  <h1>404 - Not found</h1>
+
+              </article>
+            </div>
+          
+          
+<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
+        </div>
+        
+      </main>
+      
+        <footer class="md-footer">
+  
+    
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-copyright">
+  
+    <div class="md-copyright__highlight">
+      Creative Commons Attribution 4.0 International (CC BY 4.0)
+    </div>
+  
+  
+</div>
+      
+        <div class="md-social">
+  
+    
+    
+    
+    
+      
+      
+    
+    <a href="https://webrecorder.net" target="_blank" rel="noopener" title="webrecorder.net" class="md-social__link">
+      <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-globe" viewBox="0 0 16 16">
+  <path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855A7.97 7.97 0 0 0 5.145 4H7.5V1.077zM4.09 4a9.267 9.267 0 0 1 .64-1.539 6.7 6.7 0 0 1 .597-.933A7.025 7.025 0 0 0 2.255 4H4.09zm-.582 3.5c.03-.877.138-1.718.312-2.5H1.674a6.958 6.958 0 0 0-.656 2.5h2.49zM4.847 5a12.5 12.5 0 0 0-.338 2.5H7.5V5H4.847zM8.5 5v2.5h2.99a12.495 12.495 0 0 0-.337-2.5H8.5zM4.51 8.5a12.5 12.5 0 0 0 .337 2.5H7.5V8.5H4.51zm3.99 0V11h2.653c.187-.765.306-1.608.338-2.5H8.5zM5.145 12c.138.386.295.744.468 1.068.552 1.035 1.218 1.65 1.887 1.855V12H5.145zm.182 2.472a6.696 6.696 0 0 1-.597-.933A9.268 9.268 0 0 1 4.09 12H2.255a7.024 7.024 0 0 0 3.072 2.472zM3.82 11a13.652 13.652 0 0 1-.312-2.5h-2.49c.062.89.291 1.733.656 2.5H3.82zm6.853 3.472A7.024 7.024 0 0 0 13.745 12H11.91a9.27 9.27 0 0 1-.64 1.539 6.688 6.688 0 0 1-.597.933zM8.5 12v2.923c.67-.204 1.335-.82 1.887-1.855.173-.324.33-.682.468-1.068H8.5zm3.68-1h2.146c.365-.767.594-1.61.656-2.5h-2.49a13.65 13.65 0 0 1-.312 2.5zm2.802-3.5a6.959 6.959 0 0 0-.656-2.5H12.18c.174.782.282 1.623.312 2.5h2.49zM11.27 2.461c.247.464.462.98.64 1.539h1.835a7.024 7.024 0 0 0-3.072-2.472c.218.284.418.598.597.933zM10.855 4a7.966 7.966 0 0 0-.468-1.068C9.835 1.897 9.17 1.282 8.5 1.077V4h2.355z"/>
+</svg>
+    </a>
+  
+    
+    
+    
+    
+      
+      
+    
+    <a href="https://forum.webrecorder.net/" target="_blank" rel="noopener" title="forum.webrecorder.net" class="md-social__link">
+      <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-chat-left-text-fill" viewBox="0 0 16 16">
+  <path d="M0 2a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H4.414a1 1 0 0 0-.707.293L.854 15.146A.5.5 0 0 1 0 14.793V2zm3.5 1a.5.5 0 0 0 0 1h9a.5.5 0 0 0 0-1h-9zm0 2.5a.5.5 0 0 0 0 1h9a.5.5 0 0 0 0-1h-9zm0 2.5a.5.5 0 0 0 0 1h5a.5.5 0 0 0 0-1h-5z"/>
+</svg>
+    </a>
+  
+    
+    
+      
+    
+    
+    
+      
+      
+    
+    <a href="https://digipres.club/@webrecorder" target="_blank" rel="noopener me" title="digipres.club" class="md-social__link">
+      <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-mastodon" viewBox="0 0 16 16">
+  <path d="M11.19 12.195c2.016-.24 3.77-1.475 3.99-2.603.348-1.778.32-4.339.32-4.339 0-3.47-2.286-4.488-2.286-4.488C12.062.238 10.083.017 8.027 0h-.05C5.92.017 3.942.238 2.79.765c0 0-2.285 1.017-2.285 4.488l-.002.662c-.004.64-.007 1.35.011 2.091.083 3.394.626 6.74 3.78 7.57 1.454.383 2.703.463 3.709.408 1.823-.1 2.847-.647 2.847-.647l-.06-1.317s-1.303.41-2.767.36c-1.45-.05-2.98-.156-3.215-1.928a3.614 3.614 0 0 1-.033-.496s1.424.346 3.228.428c1.103.05 2.137-.064 3.188-.189zm1.613-2.47H11.13v-4.08c0-.859-.364-1.295-1.091-1.295-.804 0-1.207.517-1.207 1.541v2.233H7.168V5.89c0-1.024-.403-1.541-1.207-1.541-.727 0-1.091.436-1.091 1.296v4.079H3.197V5.522c0-.859.22-1.541.66-2.046.456-.505 1.052-.764 1.793-.764.856 0 1.504.328 1.933.983L8 4.39l.417-.695c.429-.655 1.077-.983 1.934-.983.74 0 1.336.259 1.791.764.442.505.661 1.187.661 2.046v4.203z"/>
+</svg>
+    </a>
+  
+    
+    
+    
+    
+      
+      
+    
+    <a href="https://www.youtube.com/@webrecorder" target="_blank" rel="noopener" title="www.youtube.com" class="md-social__link">
+      <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-youtube" viewBox="0 0 16 16">
+  <path d="M8.051 1.999h.089c.822.003 4.987.033 6.11.335a2.01 2.01 0 0 1 1.415 1.42c.101.38.172.883.22 1.402l.01.104.022.26.008.104c.065.914.073 1.77.074 1.957v.075c-.001.194-.01 1.108-.082 2.06l-.008.105-.009.104c-.05.572-.124 1.14-.235 1.558a2.007 2.007 0 0 1-1.415 1.42c-1.16.312-5.569.334-6.18.335h-.142c-.309 0-1.587-.006-2.927-.052l-.17-.006-.087-.004-.171-.007-.171-.007c-1.11-.049-2.167-.128-2.654-.26a2.007 2.007 0 0 1-1.415-1.419c-.111-.417-.185-.986-.235-1.558L.09 9.82l-.008-.104A31.4 31.4 0 0 1 0 7.68v-.123c.002-.215.01-.958.064-1.778l.007-.103.003-.052.008-.104.022-.26.01-.104c.048-.519.119-1.023.22-1.402a2.007 2.007 0 0 1 1.415-1.42c.487-.13 1.544-.21 2.654-.26l.17-.007.172-.006.086-.003.171-.007A99.788 99.788 0 0 1 7.858 2h.193zM6.4 5.209v4.818l4.157-2.408L6.4 5.209z"/>
+</svg>
+    </a>
+  
+</div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    <div class="md-dialog" data-md-component="dialog">
+      <div class="md-dialog__inner md-typeset"></div>
+    </div>
+    
+    
+    <script id="__config" type="application/json">{"base": "/", "features": ["navigation.sections", "navigation.tabs", "navigation.tabs.sticky", "navigation.instant", "navigation.tracking", "navigation.indexes", "navigation.footer", "content.code.copy", "content.action.edit", "content.tooltips", "search.suggest"], "search": "/assets/javascripts/workers/search.6ce7567c.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
+    
+    
+      <script src="/assets/javascripts/bundle.83f73b43.min.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index f76d842bef..f2a41c00e0 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -911,6 +911,35 @@ def test_collection_url_list(crawler_auth_headers, default_org_id):
             assert snapshot["status"]
 
 
+def test_upload_collection_thumbnail(crawler_auth_headers, default_org_id):
+    with open(os.path.join(curr_dir, "data", "thumbnail.jpg"), "rb") as fh:
+        r = requests.put(
+            f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/stream/thumbnail?filename=thumbnail.jpg",
+            headers=crawler_auth_headers,
+            data=read_in_chunks(fh),
+        )
+        assert r.status_code == 200
+        assert r.json()["added"]
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    thumbnail = r.json()["thumbnail"]
+
+    assert thumbnail["name"]
+    assert thumbnail["path"]
+    assert thumbnail["hash"]
+    assert thumbnail["size"] > 0
+
+    assert thumbnail["originalFilename"] == "thumbnail.jpg"
+    assert thumbnail["mime"] == "image/jpeg"
+    assert thumbnail["userid"]
+    assert thumbnail["userName"]
+    assert thumbnail["created"]
+
+
 def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
     # Delete second collection
     r = requests.delete(

From 3b26828396f0b6c687cf3fc0e5b232faeac1b0b8 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Thu, 28 Nov 2024 16:50:53 -0500
Subject: [PATCH 08/57] WIP: Only share subset of coll and image data for
 public endpoint

TODO:
- Add data ranges to collections (maybe precompute on base model)
- Add caption for public collections?
---
 backend/btrixcloud/colls.py      | 66 +++++++++++++++++++++++++------
 backend/btrixcloud/models.py     | 58 +++++++++++++++++----------
 backend/test/test_collections.py | 67 +++++++++++++++++++++++++++++---
 3 files changed, 153 insertions(+), 38 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 22ad1e8d3d..cdaece5ce3 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -4,7 +4,7 @@
 
 from collections import Counter
 from uuid import UUID, uuid4
-from typing import Optional, List, TYPE_CHECKING, cast, Dict, Tuple, Any
+from typing import Optional, List, TYPE_CHECKING, cast, Dict, Tuple, Any, Union
 import os
 
 import asyncio
@@ -44,6 +44,7 @@
     ImageFile,
     ImageFilePreparer,
     MIN_UPLOAD_PART_SIZE,
+    PublicCollOut,
 )
 from .utils import dt_now
 
@@ -244,7 +245,8 @@ async def get_collection_out(
 
     async def list_collections(
         self,
-        oid: UUID,
+        org: Organization,
+        public_colls_out: bool = False,
         page_size: int = DEFAULT_PAGE_SIZE,
         page: int = 1,
         sort_by: Optional[str] = None,
@@ -259,16 +261,17 @@ async def list_collections(
         page = page - 1
         skip = page * page_size
 
-        match_query: dict[str, object] = {"oid": oid}
+        match_query: dict[str, object] = {"oid": org.id}
 
         if name:
             match_query["name"] = name
-
         elif name_prefix:
             regex_pattern = f"^{name_prefix}"
             match_query["name"] = {"$regex": regex_pattern, "$options": "i"}
 
-        if access:
+        if public_colls_out:
+            match_query["access"] = CollAccessType.PUBLIC
+        elif access:
             match_query["access"] = access
 
         aggregate = [{"$match": match_query}]
@@ -307,7 +310,22 @@ async def list_collections(
         except (IndexError, ValueError):
             total = 0
 
-        collections = [CollOut.from_dict(res) for res in items]
+        collections: List[Union[CollOut, PublicCollOut]] = []
+
+        for res in items:
+            if public_colls_out:
+                res["resources"] = await self.get_collection_crawl_resources(res["_id"])
+
+                thumbnail = res.get("thumbnail")
+                if thumbnail:
+                    image_file = ImageFile(**thumbnail)
+                    res["thumbnail"] = await image_file.get_public_image_file_out(
+                        org, self.storage_ops
+                    )
+
+                collections.append(PublicCollOut.from_dict(res))
+            else:
+                collections.append(CollOut.from_dict(res))
 
         return collections, total
 
@@ -446,7 +464,14 @@ async def add_successful_crawl_to_collections(self, crawl_id: str, cid: UUID):
             )
             await self.update_crawl_collections(crawl_id)
 
-    async def get_org_public_collections(self, org_slug: str):
+    async def get_org_public_collections(
+        self,
+        org_slug: str,
+        page_size: int = DEFAULT_PAGE_SIZE,
+        page: int = 1,
+        sort_by: Optional[str] = None,
+        sort_direction: int = 1,
+    ):
         """List public collections for org"""
         try:
             org = await self.orgs.get_org_by_slug(org_slug)
@@ -459,7 +484,12 @@ async def get_org_public_collections(self, org_slug: str):
             raise HTTPException(status_code=404, detail="public_profile_not_found")
 
         collections, _ = await self.list_collections(
-            org.id, access=CollAccessType.PUBLIC
+            org,
+            page_size=page_size,
+            page=page,
+            sort_by=sort_by,
+            sort_direction=sort_direction,
+            public_colls_out=True,
         )
 
         public_org_details = PublicOrgDetails(
@@ -658,7 +688,7 @@ async def list_collection_all(
         access: Optional[str] = None,
     ):
         collections, total = await colls.list_collections(
-            org.id,
+            org,
             page_size=pageSize,
             page=page,
             sort_by=sortBy,
@@ -677,7 +707,7 @@ async def list_collection_all(
     async def get_collection_all(org: Organization = Depends(org_viewer_dep)):
         results = {}
         try:
-            all_collections, _ = await colls.list_collections(org.id, page_size=10_000)
+            all_collections, _ = await colls.list_collections(org, page_size=10_000)
             for collection in all_collections:
                 results[collection.name] = await colls.get_collection_crawl_resources(
                     collection.id
@@ -811,8 +841,20 @@ async def download_collection(
         tags=["collections"],
         response_model=OrgPublicCollections,
     )
-    async def get_org_public_collections(org_slug: str):
-        return await colls.get_org_public_collections(org_slug)
+    async def get_org_public_collections(
+        org_slug: str,
+        pageSize: int = DEFAULT_PAGE_SIZE,
+        page: int = 1,
+        sortBy: Optional[str] = None,
+        sortDirection: int = 1,
+    ):
+        return await colls.get_org_public_collections(
+            org_slug,
+            page_size=pageSize,
+            page=page,
+            sort_by=sortBy,
+            sort_direction=sortDirection,
+        )
 
     @app.get(
         "/orgs/{oid}/collections/{coll_id}/urls",
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index a7b1c13e8a..ec5fed6f8c 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1123,15 +1123,15 @@ class ImageFileOut(BaseModel):
 
 
 # ============================================================================
-# class PublicImageFileOut(BaseModel):
-#     """public output for user-upload imaged file (conformance to Data Resource Spec)"""
+class PublicImageFileOut(BaseModel):
+    """public output for user-upload imaged file (conformance to Data Resource Spec)"""
 
-#     name: str
-#     path: str
-#     hash: str
-#     size: int
+    name: str
+    path: str
+    hash: str
+    size: int
 
-#     mime: str
+    mime: str
 
 
 # ============================================================================
@@ -1162,19 +1162,19 @@ async def get_image_file_out(self, org, storage_ops) -> ImageFileOut:
             created=self.created,
         )
 
-    # async def get_public_image_file_out(self, org, storage_ops) -> PublicImageFileOut:
-    #     """Get PublicImageFileOut with new presigned url"""
-    #     presigned_url = await storage_ops.get_presigned_url(
-    #         org, self, PRESIGN_DURATION_SECONDS
-    #     )
+    async def get_public_image_file_out(self, org, storage_ops) -> PublicImageFileOut:
+        """Get PublicImageFileOut with new presigned url"""
+        presigned_url = await storage_ops.get_presigned_url(
+            org, self, PRESIGN_DURATION_SECONDS
+        )
 
-    #     return PublicImageFileOut(
-    #         name=self.filename,
-    #         path=presigned_url or "",
-    #         hash=self.hash,
-    #         size=self.size,
-    #         mime=self.mime,
-    #     )
+        return PublicImageFileOut(
+            name=self.filename,
+            path=presigned_url or "",
+            hash=self.hash,
+            size=self.size,
+            mime=self.mime,
+        )
 
 
 # ============================================================================
@@ -1293,6 +1293,24 @@ class CollOut(BaseMongoModel):
     thumbnail: Optional[ImageFileOut] = None
 
 
+# ============================================================================
+class PublicCollOut(BaseMongoModel):
+    """Collection output model with annotations."""
+
+    name: str
+    description: Optional[str] = None
+    # caption: Optional[str] = None
+
+    # earliestDate: Optional[datetime] = None
+    # latestDate: Optional[datetime] = None
+
+    homeUrl: Optional[AnyHttpUrl] = None
+    homeUrlTs: Optional[datetime] = None
+
+    resources: List[CrawlFileOut] = []
+    thumbnail: Optional[PublicImageFileOut] = None
+
+
 # ============================================================================
 class UpdateColl(BaseModel):
     """Update collection"""
@@ -1366,7 +1384,7 @@ class OrgPublicCollections(BaseModel):
 
     org: PublicOrgDetails
 
-    collections: List[CollOut] = []
+    collections: List[PublicCollOut] = []
 
 
 # ============================================================================
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index f2a41c00e0..5f3c5d2628 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -16,8 +16,10 @@
 _coll_id = None
 _second_coll_id = None
 _public_coll_id = None
+_second_public_coll_id = None
 upload_id = None
 modified = None
+default_org_slug = None
 
 curr_dir = os.path.dirname(os.path.realpath(__file__))
 
@@ -742,11 +744,14 @@ def test_list_public_collections(
         json={
             "crawlIds": [crawler_crawl_id],
             "name": "Second public collection",
+            "description": "Lorem ipsum",
             "access": "public",
         },
     )
     assert r.status_code == 200
-    second_public_coll_id = r.json()["id"]
+
+    global _second_public_coll_id
+    _second_public_coll_id = r.json()["id"]
 
     # Get default org slug
     r = requests.get(
@@ -755,7 +760,10 @@ def test_list_public_collections(
     )
     assert r.status_code == 200
     data = r.json()
-    org_slug = data["slug"]
+
+    global default_org_slug
+    default_org_slug = data["slug"]
+
     org_name = data["name"]
 
     # Verify that public profile isn't enabled
@@ -764,7 +772,7 @@ def test_list_public_collections(
     assert data["publicUrl"] == ""
 
     # Try listing public collections without org public profile enabled
-    r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}")
+    r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}")
     assert r.status_code == 404
     assert r.json()["detail"] == "public_profile_not_found"
 
@@ -795,7 +803,7 @@ def test_list_public_collections(
     assert data["publicUrl"] == public_url
 
     # List public collections with no auth (no public profile)
-    r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}")
+    r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}")
     assert r.status_code == 200
     data = r.json()
 
@@ -807,8 +815,8 @@ def test_list_public_collections(
     collections = data["collections"]
     assert len(collections) == 2
     for collection in collections:
-        assert collection["id"] in (_public_coll_id, second_public_coll_id)
-        assert collection["access"] == "public"
+        assert collection["id"] in (_public_coll_id, _second_public_coll_id)
+        assert collection["name"]
 
     # Test non-existing slug - it should return a 404 but not reveal
     # whether or not an org exists with that slug
@@ -940,6 +948,53 @@ def test_upload_collection_thumbnail(crawler_auth_headers, default_org_id):
     assert thumbnail["created"]
 
 
+def test_list_public_colls_home_url_thumbnail():
+    # Check we get expected data for each public collection
+    # and nothing we don't expect
+    non_public_fields = (
+        "oid",
+        "modified",
+        "crawlCount",
+        "pageCount",
+        "totalSize",
+        "tags",
+        "access",
+        "homeUrlPageId",
+    )
+    non_public_image_fields = ("originalFilename", "userid", "userName", "created")
+
+    r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}")
+    assert r.status_code == 200
+    collections = r.json()["collections"]
+    assert len(collections) == 2
+
+    for coll in collections:
+        assert coll["id"] in (_public_coll_id, _second_public_coll_id)
+        assert coll["name"]
+        assert coll["resources"]
+
+        for field in non_public_fields:
+            assert field not in coll
+
+        if coll["id"] == _public_coll_id:
+            assert coll["homeUrl"]
+            assert coll["homeUrlTs"]
+
+        if coll["id"] == _second_public_coll_id:
+            assert coll["description"]
+            thumbnail = coll["thumbnail"]
+            assert thumbnail
+
+            assert thumbnail["name"]
+            assert thumbnail["path"]
+            assert thumbnail["hash"]
+            assert thumbnail["size"]
+            assert thumbnail["mime"]
+
+            for field in non_public_image_fields:
+                assert field not in thumbnail
+
+
 def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
     # Delete second collection
     r = requests.delete(

From ff64933751ce9b5d53633a6353598ce8b14ebf0a Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Thu, 28 Nov 2024 18:15:05 -0500
Subject: [PATCH 09/57] Fix test that was checking wrong collection

---
 backend/test/test_collections.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 5f3c5d2628..716856f766 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -980,8 +980,6 @@ def test_list_public_colls_home_url_thumbnail():
             assert coll["homeUrl"]
             assert coll["homeUrlTs"]
 
-        if coll["id"] == _second_public_coll_id:
-            assert coll["description"]
             thumbnail = coll["thumbnail"]
             assert thumbnail
 
@@ -994,6 +992,9 @@ def test_list_public_colls_home_url_thumbnail():
             for field in non_public_image_fields:
                 assert field not in thumbnail
 
+        if coll["id"] == _second_public_coll_id:
+            assert coll["description"]
+
 
 def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
     # Delete second collection

From 0ee13d84162360daa096713cfc21cd25b27ffc86 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 10:43:25 -0500
Subject: [PATCH 10/57] Change path for thumbnail endpoint

---
 backend/btrixcloud/colls.py      | 4 ++--
 backend/test/test_collections.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index cdaece5ce3..8d8d27de0c 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -891,11 +891,11 @@ async def set_collection_home_url(
         return await colls.set_home_url(coll_id, update, org)
 
     @app.put(
-        "/orgs/{oid}/collections/{coll_id}/stream/thumbnail",
+        "/orgs/{oid}/collections/{coll_id}/thumbnail",
         tags=["collections"],
         response_model=AddedResponse,
     )
-    async def upload_stream(
+    async def upload_thumbnail_stream(
         request: Request,
         filename: str,
         coll_id: UUID,
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 716856f766..ff38c46770 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -922,7 +922,7 @@ def test_collection_url_list(crawler_auth_headers, default_org_id):
 def test_upload_collection_thumbnail(crawler_auth_headers, default_org_id):
     with open(os.path.join(curr_dir, "data", "thumbnail.jpg"), "rb") as fh:
         r = requests.put(
-            f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/stream/thumbnail?filename=thumbnail.jpg",
+            f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/thumbnail?filename=thumbnail.jpg",
             headers=crawler_auth_headers,
             data=read_in_chunks(fh),
         )

From ce52e87c403bad0eb88a917d3f62991d76b92b52 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 10:53:02 -0500
Subject: [PATCH 11/57] Add endpoint to delete collection thumbnail

Also renames StorageOps.delete_crawl_file_object to simply
delete_file_object, since we use it across all types of files we
store in s3.
---
 backend/btrixcloud/basecrawls.py |  2 +-
 backend/btrixcloud/colls.py      | 33 +++++++++++++++++++++++++++++++-
 backend/btrixcloud/crawls.py     |  2 +-
 backend/btrixcloud/profiles.py   |  2 +-
 backend/btrixcloud/storages.py   |  4 +---
 backend/test/test_collections.py | 23 ++++++++++++++++++++++
 6 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py
index 5106487610..063ecb9dec 100644
--- a/backend/btrixcloud/basecrawls.py
+++ b/backend/btrixcloud/basecrawls.py
@@ -369,7 +369,7 @@ async def _delete_crawl_files(
         size = 0
         for file_ in crawl.files:
             size += file_.size
-            if not await self.storage_ops.delete_crawl_file_object(org, file_):
+            if not await self.storage_ops.delete_file_object(org, file_):
                 raise HTTPException(status_code=400, detail="file_deletion_error")
             # Not replicating QA run WACZs yet
             if not isinstance(crawl, QARun):
diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 8d8d27de0c..6704081c55 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -32,6 +32,7 @@
     UpdatedResponse,
     SuccessResponse,
     AddedResponse,
+    DeletedResponse,
     CollectionSearchValuesResponse,
     OrgPublicCollections,
     PublicOrgDetails,
@@ -633,7 +634,7 @@ async def stream_iter():
         thumbnail_file = file_prep.get_image_file(org.storage)
 
         if coll.thumbnail:
-            if not await self.storage_ops.delete_crawl_file_object(org, coll.thumbnail):
+            if not await self.storage_ops.delete_file_object(org, coll.thumbnail):
                 print(
                     f"Unable to delete previous collection thumbnail: {coll.thumbnail.filename}"
                 )
@@ -649,6 +650,25 @@ async def stream_iter():
 
         return {"added": True}
 
+    async def delete_thumbnail(self, coll_id: UUID, org: Organization):
+        """Delete collection thumbnail"""
+        coll = await self.get_collection(coll_id)
+
+        if not coll.thumbnail:
+            raise HTTPException(status_code=404, detail="thumbnail_not_found")
+
+        if not await self.storage_ops.delete_file_object(org, coll.thumbnail):
+            print(f"Unable to delete collection thumbnail: {coll.thumbnail.filename}")
+            raise HTTPException(status_code=400, detail="file_deletion_error")
+
+        # Delete from database
+        await self.collections.find_one_and_update(
+            {"_id": coll_id, "oid": org.id},
+            {"$set": {"thumbnail": None}},
+        )
+
+        return {"deleted": True}
+
 
 # ============================================================================
 # pylint: disable=too-many-locals
@@ -906,4 +926,15 @@ async def upload_thumbnail_stream(
             request.stream(), filename, coll_id, org, user
         )
 
+    @app.delete(
+        "/orgs/{oid}/collections/{coll_id}/thumbnail",
+        tags=["collections"],
+        response_model=DeletedResponse,
+    )
+    async def delete_thumbnail_stream(
+        coll_id: UUID,
+        org: Organization = Depends(org_crawl_dep),
+    ):
+        return await colls.delete_thumbnail(coll_id, org)
+
     return colls
diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py
index 5a0994fe70..539c408ee6 100644
--- a/backend/btrixcloud/crawls.py
+++ b/backend/btrixcloud/crawls.py
@@ -918,7 +918,7 @@ async def delete_crawl_qa_run_files(
         """delete crawl qa wacz files"""
         qa_run = await self.get_qa_run(crawl_id, qa_run_id, org)
         for file_ in qa_run.files:
-            if not await self.storage_ops.delete_crawl_file_object(org, file_):
+            if not await self.storage_ops.delete_file_object(org, file_):
                 raise HTTPException(status_code=400, detail="file_deletion_error")
             # Not replicating QA run WACZs yet
             # await self.background_job_ops.create_delete_replica_jobs(
diff --git a/backend/btrixcloud/profiles.py b/backend/btrixcloud/profiles.py
index ab72422472..9b8ae8da8f 100644
--- a/backend/btrixcloud/profiles.py
+++ b/backend/btrixcloud/profiles.py
@@ -426,7 +426,7 @@ async def delete_profile(
 
         # Delete file from storage
         if profile.resource:
-            await self.storage_ops.delete_crawl_file_object(org, profile.resource)
+            await self.storage_ops.delete_file_object(org, profile.resource)
             await self.orgs.inc_org_bytes_stored(
                 org.id, -profile.resource.size, "profile"
             )
diff --git a/backend/btrixcloud/storages.py b/backend/btrixcloud/storages.py
index 50b9557a92..43b6fabcd5 100644
--- a/backend/btrixcloud/storages.py
+++ b/backend/btrixcloud/storages.py
@@ -476,9 +476,7 @@ async def get_presigned_url(
 
         return presigned_url
 
-    async def delete_crawl_file_object(
-        self, org: Organization, crawlfile: BaseFile
-    ) -> bool:
+    async def delete_file_object(self, org: Organization, crawlfile: BaseFile) -> bool:
         """delete crawl file from storage."""
         return await self._delete_file(org, crawlfile.filename, crawlfile.storage)
 
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index ff38c46770..6705c3c192 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -996,6 +996,29 @@ def test_list_public_colls_home_url_thumbnail():
             assert coll["description"]
 
 
+def test_delete_thumbnail(crawler_auth_headers, default_org_id):
+    r = requests.delete(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/thumbnail",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    assert r.json()["deleted"]
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    assert r.json().get("thumbnail") is None
+
+    r = requests.delete(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_public_coll_id}/thumbnail",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 404
+    assert r.json()["detail"] == "thumbnail_not_found"
+
+
 def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
     # Delete second collection
     r = requests.delete(

From 5e880d90ad508c1296a06b2e351eeb8b075ddcf5 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 11:03:13 -0500
Subject: [PATCH 12/57] Add caption field to collections

---
 backend/btrixcloud/colls.py      |  1 +
 backend/btrixcloud/models.py     |  6 +++++-
 backend/test/test_collections.py | 30 +++++++++++++++++++++++++++++-
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 6704081c55..b250e68669 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -110,6 +110,7 @@ async def add_collection(self, oid: UUID, coll_in: CollIn):
             oid=oid,
             name=coll_in.name,
             description=coll_in.description,
+            caption=coll_in.caption,
             modified=modified,
             access=coll_in.access,
         )
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index ec5fed6f8c..5e0bebe4f8 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1238,6 +1238,7 @@ class Collection(BaseMongoModel):
     name: str = Field(..., min_length=1)
     oid: UUID
     description: Optional[str] = None
+    caption: Optional[str] = None
     modified: Optional[datetime] = None
 
     crawlCount: Optional[int] = 0
@@ -1262,6 +1263,7 @@ class CollIn(BaseModel):
 
     name: str = Field(..., min_length=1)
     description: Optional[str] = None
+    caption: Optional[str] = None
     crawlIds: Optional[List[str]] = []
 
     access: CollAccessType = CollAccessType.PRIVATE
@@ -1274,6 +1276,7 @@ class CollOut(BaseMongoModel):
     name: str
     oid: UUID
     description: Optional[str] = None
+    caption: Optional[str] = None
     modified: Optional[datetime] = None
 
     crawlCount: Optional[int] = 0
@@ -1299,7 +1302,7 @@ class PublicCollOut(BaseMongoModel):
 
     name: str
     description: Optional[str] = None
-    # caption: Optional[str] = None
+    caption: Optional[str] = None
 
     # earliestDate: Optional[datetime] = None
     # latestDate: Optional[datetime] = None
@@ -1317,6 +1320,7 @@ class UpdateColl(BaseModel):
 
     name: Optional[str] = None
     description: Optional[str] = None
+    caption: Optional[str] = None
     access: Optional[CollAccessType] = None
 
 
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 6705c3c192..a1202d72ba 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -12,6 +12,8 @@
 UPDATED_NAME = "Updated tést cöllection"
 SECOND_COLLECTION_NAME = "second-collection"
 DESCRIPTION = "Test description"
+CAPTION = "Short caption"
+UPDATED_CAPTION = "Updated caption"
 
 _coll_id = None
 _second_coll_id = None
@@ -33,6 +35,7 @@ def test_create_collection(
         json={
             "crawlIds": [crawler_crawl_id],
             "name": COLLECTION_NAME,
+            "caption": CAPTION,
         },
     )
     assert r.status_code == 200
@@ -51,6 +54,23 @@ def test_create_collection(
     assert _coll_id in r.json()["collectionIds"]
     assert r.json()["collections"] == [{"name": COLLECTION_NAME, "id": _coll_id}]
 
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+
+    assert data["id"] == _coll_id
+    assert data["name"] == COLLECTION_NAME
+    assert data["caption"] == CAPTION
+    assert data["crawlCount"] == 1
+    assert data["pageCount"] > 0
+    assert data["totalSize"] > 0
+    modified = data["modified"]
+    assert modified
+    assert modified.endswith("Z")
+
 
 def test_create_public_collection(
     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
@@ -61,6 +81,7 @@ def test_create_public_collection(
         json={
             "crawlIds": [crawler_crawl_id],
             "name": PUBLIC_COLLECTION_NAME,
+            "caption": CAPTION,
             "access": "public",
         },
     )
@@ -117,6 +138,7 @@ def test_update_collection(
         headers=crawler_auth_headers,
         json={
             "description": DESCRIPTION,
+            "caption": UPDATED_CAPTION,
         },
     )
     assert r.status_code == 200
@@ -132,6 +154,7 @@ def test_update_collection(
     assert data["id"] == _coll_id
     assert data["name"] == COLLECTION_NAME
     assert data["description"] == DESCRIPTION
+    assert data["caption"] == UPDATED_CAPTION
     assert data["crawlCount"] == 1
     assert data["pageCount"] > 0
     assert data["totalSize"] > 0
@@ -276,6 +299,7 @@ def test_get_collection(crawler_auth_headers, default_org_id):
     assert data["name"] == UPDATED_NAME
     assert data["oid"] == default_org_id
     assert data["description"] == DESCRIPTION
+    assert data["caption"] == UPDATED_CAPTION
     assert data["crawlCount"] == 2
     assert data["pageCount"] > 0
     assert data["totalSize"] > 0
@@ -294,6 +318,7 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id):
     assert data["name"] == UPDATED_NAME
     assert data["oid"] == default_org_id
     assert data["description"] == DESCRIPTION
+    assert data["caption"] == UPDATED_CAPTION
     assert data["crawlCount"] == 2
     assert data["pageCount"] > 0
     assert data["totalSize"] > 0
@@ -461,10 +486,11 @@ def test_list_collections(
     assert len(items) == 3
 
     first_coll = [coll for coll in items if coll["name"] == UPDATED_NAME][0]
-    assert first_coll["id"]
+    assert first_coll["id"] == _coll_id
     assert first_coll["name"] == UPDATED_NAME
     assert first_coll["oid"] == default_org_id
     assert first_coll["description"] == DESCRIPTION
+    assert first_coll["caption"] == UPDATED_CAPTION
     assert first_coll["crawlCount"] == 3
     assert first_coll["pageCount"] > 0
     assert first_coll["totalSize"] > 0
@@ -977,6 +1003,8 @@ def test_list_public_colls_home_url_thumbnail():
             assert field not in coll
 
         if coll["id"] == _public_coll_id:
+            assert coll["caption"] == CAPTION
+
             assert coll["homeUrl"]
             assert coll["homeUrlTs"]
 

From 355c7f44953bc49c2f3c1a968a78d5aa01bb90e7 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 11:35:43 -0500
Subject: [PATCH 13/57] Calculate and store earliest and latest dates in
 collection

- Based on page timestamps
- Not all archived items have pages written to the db yet, so this
won't count pages in older crawls and in uploads yet
- Might want to move to asyncio task or background job
---
 backend/btrixcloud/colls.py      | 43 ++++++++++++++++++++++++++++++++
 backend/btrixcloud/models.py     | 10 ++++++--
 backend/test/test_collections.py | 25 +++++++++++++++++++
 3 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index b250e68669..0485a1218c 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -120,6 +120,7 @@ async def add_collection(self, oid: UUID, coll_in: CollIn):
             if crawl_ids:
                 await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org)
                 await self.update_collection_counts_and_tags(coll_id)
+                await self.update_collection_dates(coll_id)
                 asyncio.create_task(
                     self.event_webhook_ops.create_added_to_collection_notification(
                         crawl_ids, coll_id, org
@@ -173,6 +174,7 @@ async def add_crawls_to_collection(
             raise HTTPException(status_code=404, detail="collection_not_found")
 
         await self.update_collection_counts_and_tags(coll_id)
+        await self.update_collection_dates(coll_id)
 
         asyncio.create_task(
             self.event_webhook_ops.create_added_to_collection_notification(
@@ -197,6 +199,7 @@ async def remove_crawls_from_collection(
             raise HTTPException(status_code=404, detail="collection_not_found")
 
         await self.update_collection_counts_and_tags(coll_id)
+        await self.update_collection_dates(coll_id)
 
         asyncio.create_task(
             self.event_webhook_ops.create_removed_from_collection_notification(
@@ -448,6 +451,46 @@ async def update_collection_counts_and_tags(self, collection_id: UUID):
             },
         )
 
+    async def update_collection_dates(self, coll_id: UUID):
+        """Update collection earliest and latest dates from page timestamps"""
+        coll = await self.get_collection(coll_id)
+        crawl_ids = await self.get_collection_crawl_ids(coll_id)
+
+        earliest_ts = None
+        latest_ts = None
+
+        match_query = {
+            "oid": coll.oid,
+            "crawl_id": {"$in": crawl_ids},
+            "ts": {"$ne": None},
+        }
+
+        cursor = self.pages.find(match_query).sort("ts", 1).limit(1)
+        pages = await cursor.to_list(length=1)
+        try:
+            earliest_page = pages[0]
+            earliest_ts = earliest_page.get("ts")
+        except IndexError:
+            pass
+
+        cursor = self.pages.find(match_query).sort("ts", -1).limit(1)
+        pages = await cursor.to_list(length=1)
+        try:
+            latest_page = pages[0]
+            latest_ts = latest_page.get("ts")
+        except IndexError:
+            pass
+
+        await self.collections.find_one_and_update(
+            {"_id": coll_id},
+            {
+                "$set": {
+                    "dateEarliest": earliest_ts,
+                    "dateLatest": latest_ts,
+                }
+            },
+        )
+
     async def update_crawl_collections(self, crawl_id: str):
         """Update counts and tags for all collections in crawl"""
         crawl = await self.crawls.find_one({"_id": crawl_id})
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 5e0bebe4f8..307c40c803 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1245,6 +1245,9 @@ class Collection(BaseMongoModel):
     pageCount: Optional[int] = 0
     totalSize: Optional[int] = 0
 
+    dateEarliest: Optional[datetime] = None
+    dateLatest: Optional[datetime] = None
+
     # Sorted by count, descending
     tags: Optional[List[str]] = []
 
@@ -1283,6 +1286,9 @@ class CollOut(BaseMongoModel):
     pageCount: Optional[int] = 0
     totalSize: Optional[int] = 0
 
+    dateEarliest: Optional[datetime] = None
+    dateLatest: Optional[datetime] = None
+
     # Sorted by count, descending
     tags: Optional[List[str]] = []
 
@@ -1304,8 +1310,8 @@ class PublicCollOut(BaseMongoModel):
     description: Optional[str] = None
     caption: Optional[str] = None
 
-    # earliestDate: Optional[datetime] = None
-    # latestDate: Optional[datetime] = None
+    dateEarliest: Optional[datetime] = None
+    dateLatest: Optional[datetime] = None
 
     homeUrl: Optional[AnyHttpUrl] = None
     homeUrlTs: Optional[datetime] = None
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index a1202d72ba..388d4e39cd 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -71,6 +71,9 @@ def test_create_collection(
     assert modified
     assert modified.endswith("Z")
 
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
+
 
 def test_create_public_collection(
     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
@@ -162,6 +165,8 @@ def test_update_collection(
     modified = data["modified"]
     assert modified
     assert modified.endswith("Z")
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
 
 
 def test_rename_collection(
@@ -236,6 +241,8 @@ def test_add_remove_crawl_from_collection(
     assert data["totalSize"] > 0
     assert data["modified"] >= modified
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
 
     # Verify it was added
     r = requests.get(
@@ -258,6 +265,8 @@ def test_add_remove_crawl_from_collection(
     assert data["totalSize"] == 0
     assert data["modified"] >= modified
     assert data.get("tags", []) == []
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
 
     # Verify they were removed
     r = requests.get(
@@ -286,6 +295,8 @@ def test_add_remove_crawl_from_collection(
     assert data["totalSize"] > 0
     assert data["modified"] >= modified
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
 
 
 def test_get_collection(crawler_auth_headers, default_org_id):
@@ -305,6 +316,8 @@ def test_get_collection(crawler_auth_headers, default_org_id):
     assert data["totalSize"] > 0
     assert data["modified"] >= modified
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
 
 
 def test_get_collection_replay(crawler_auth_headers, default_org_id):
@@ -324,6 +337,8 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id):
     assert data["totalSize"] > 0
     assert data["modified"] >= modified
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
 
     resources = data["resources"]
     assert resources
@@ -440,6 +455,8 @@ def test_add_upload_to_collection(crawler_auth_headers, default_org_id):
     assert data["totalSize"] > 0
     assert data["modified"]
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
 
     # Verify it was added
     r = requests.get(
@@ -497,6 +514,8 @@ def test_list_collections(
     assert first_coll["modified"]
     assert first_coll["tags"] == ["wr-test-2", "wr-test-1"]
     assert first_coll["access"] == "private"
+    assert first_coll["dateEarliest"]
+    assert first_coll["dateLatest"]
 
     second_coll = [coll for coll in items if coll["name"] == SECOND_COLLECTION_NAME][0]
     assert second_coll["id"]
@@ -509,6 +528,8 @@ def test_list_collections(
     assert second_coll["modified"]
     assert second_coll["tags"] == ["wr-test-2"]
     assert second_coll["access"] == "private"
+    assert second_coll["dateEarliest"]
+    assert second_coll["dateLatest"]
 
 
 def test_remove_upload_from_collection(crawler_auth_headers, default_org_id):
@@ -843,6 +864,8 @@ def test_list_public_collections(
     for collection in collections:
         assert collection["id"] in (_public_coll_id, _second_public_coll_id)
         assert collection["name"]
+        assert collection["dateEarliest"]
+        assert collection["dateLatest"]
 
     # Test non-existing slug - it should return a 404 but not reveal
     # whether or not an org exists with that slug
@@ -998,6 +1021,8 @@ def test_list_public_colls_home_url_thumbnail():
         assert coll["id"] in (_public_coll_id, _second_public_coll_id)
         assert coll["name"]
         assert coll["resources"]
+        assert coll["dateEarliest"]
+        assert coll["dateLatest"]
 
         for field in non_public_fields:
             assert field not in coll

From 571c3f75842ce2849e3088341bd107c4524deeb0 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 13:19:38 -0500
Subject: [PATCH 14/57] Add comment to get CI to run

---
 backend/btrixcloud/colls.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 0485a1218c..f5fc671c8f 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -465,6 +465,7 @@ async def update_collection_dates(self, coll_id: UUID):
             "ts": {"$ne": None},
         }
 
+        # Note: Pages for uploads are not currently in the db
         cursor = self.pages.find(match_query).sort("ts", 1).limit(1)
         pages = await cursor.to_list(length=1)
         try:

From d904dc06d8c4a10f73f3f0dbb409c146e134186e Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 13:44:09 -0500
Subject: [PATCH 15/57] Fix test

---
 backend/test/test_collections.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 388d4e39cd..a5f0c6610a 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -265,8 +265,8 @@ def test_add_remove_crawl_from_collection(
     assert data["totalSize"] == 0
     assert data["modified"] >= modified
     assert data.get("tags", []) == []
-    assert data["dateEarliest"]
-    assert data["dateLatest"]
+    assert data.get("dateEarliest") is None
+    assert data.get("dateLatest") is None
 
     # Verify they were removed
     r = requests.get(

From 99229e061af7021191e7dec332a50c8c49de2c13 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 11:55:10 -0500
Subject: [PATCH 16/57] Add pages to database for uploads

Note:
- Page endpoints have crawls in path, not uploads or all-crawls,
but will now work with uploads
- To add pages from older uploads, the pages endpoints to re-add
pages for a particular crawl or all crawls will now work for
uploads as well
---
 backend/btrixcloud/main.py    |  5 ++--
 backend/btrixcloud/main_bg.py |  2 +-
 backend/btrixcloud/main_op.py |  1 +
 backend/btrixcloud/models.py  |  6 ++---
 backend/btrixcloud/ops.py     | 23 +++++++---------
 backend/btrixcloud/pages.py   | 11 +++-----
 backend/btrixcloud/uploads.py |  4 +++
 backend/test/test_uploads.py  | 51 +++++++++++++++++++++++++++++++++++
 8 files changed, 76 insertions(+), 27 deletions(-)

diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py
index 7119214fc5..a9fe790730 100644
--- a/backend/btrixcloud/main.py
+++ b/backend/btrixcloud/main.py
@@ -245,14 +245,15 @@ def main() -> None:
 
     crawls = init_crawls_api(crawl_manager, *base_crawl_init)
 
+    upload_ops = init_uploads_api(*base_crawl_init)
+
     page_ops = init_pages_api(
         app, mdb, crawls, org_ops, storage_ops, current_active_user
     )
 
     base_crawl_ops.set_page_ops(page_ops)
     crawls.set_page_ops(page_ops)
-
-    init_uploads_api(*base_crawl_init)
+    upload_ops.set_page_ops(page_ops)
 
     org_ops.set_ops(base_crawl_ops, profiles, coll_ops, background_job_ops)
 
diff --git a/backend/btrixcloud/main_bg.py b/backend/btrixcloud/main_bg.py
index 2fba05e53f..6ce4264126 100644
--- a/backend/btrixcloud/main_bg.py
+++ b/backend/btrixcloud/main_bg.py
@@ -27,7 +27,7 @@ async def main():
         )
         return 1
 
-    (org_ops, _, _, _, _, _, _, _, _, _, user_manager) = init_ops()
+    (org_ops, _, _, _, _, _, _, _, _, _, _, user_manager) = init_ops()
 
     if not oid:
         print("Org id missing, quitting")
diff --git a/backend/btrixcloud/main_op.py b/backend/btrixcloud/main_op.py
index a6f6654be3..af7a2d0956 100644
--- a/backend/btrixcloud/main_op.py
+++ b/backend/btrixcloud/main_op.py
@@ -31,6 +31,7 @@ def main():
         crawl_config_ops,
         _,
         crawl_ops,
+        _,
         page_ops,
         coll_ops,
         _,
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 307c40c803..660b107ee5 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -797,6 +797,9 @@ class BaseCrawl(CoreCrawlable, BaseMongoModel):
 
     reviewStatus: ReviewStatus = None
 
+    filePageCount: Optional[int] = 0
+    errorPageCount: Optional[int] = 0
+
 
 # ============================================================================
 class CollIdName(BaseModel):
@@ -1013,9 +1016,6 @@ class Crawl(BaseCrawl, CrawlConfigCore):
     qa: Optional[QARun] = None
     qaFinished: Optional[Dict[str, QARun]] = {}
 
-    filePageCount: Optional[int] = 0
-    errorPageCount: Optional[int] = 0
-
 
 # ============================================================================
 class CrawlCompleteIn(BaseModel):
diff --git a/backend/btrixcloud/ops.py b/backend/btrixcloud/ops.py
index 32e5e5fee1..2a282b8e09 100644
--- a/backend/btrixcloud/ops.py
+++ b/backend/btrixcloud/ops.py
@@ -16,6 +16,7 @@
 from .pages import PageOps
 from .profiles import ProfileOps
 from .storages import StorageOps
+from .uploads import UploadOps
 from .users import UserManager
 from .webhooks import EventWebhookOps
 
@@ -26,6 +27,7 @@ def init_ops() -> Tuple[
     CrawlConfigOps,
     BaseCrawlOps,
     CrawlOps,
+    UploadOps,
     PageOps,
     CollectionOps,
     ProfileOps,
@@ -70,7 +72,7 @@ def init_ops() -> Tuple[
 
     coll_ops = CollectionOps(mdb, crawl_manager, org_ops, event_webhook_ops)
 
-    base_crawl_ops = BaseCrawlOps(
+    base_crawl_init = (
         mdb,
         user_manager,
         org_ops,
@@ -81,23 +83,17 @@ def init_ops() -> Tuple[
         background_job_ops,
     )
 
-    crawl_ops = CrawlOps(
-        crawl_manager,
-        mdb,
-        user_manager,
-        org_ops,
-        crawl_config_ops,
-        coll_ops,
-        storage_ops,
-        event_webhook_ops,
-        background_job_ops,
-    )
+    base_crawl_ops = BaseCrawlOps(*base_crawl_init)
+
+    crawl_ops = CrawlOps(crawl_manager, *base_crawl_init)
+
+    upload_ops = UploadOps(*base_crawl_init)
 
     page_ops = PageOps(mdb, crawl_ops, org_ops, storage_ops)
 
     base_crawl_ops.set_page_ops(page_ops)
-
     crawl_ops.set_page_ops(page_ops)
+    upload_ops.set_page_ops(page_ops)
 
     background_job_ops.set_ops(crawl_ops, profile_ops)
 
@@ -116,6 +112,7 @@ def init_ops() -> Tuple[
         crawl_config_ops,
         base_crawl_ops,
         crawl_ops,
+        upload_ops,
         page_ops,
         coll_ops,
         profile_ops,
diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py
index a980567c49..6f83409c8f 100644
--- a/backend/btrixcloud/pages.py
+++ b/backend/btrixcloud/pages.py
@@ -199,10 +199,7 @@ async def update_crawl_file_and_error_counts(
             inc_query["errorPageCount"] = error_count
 
         await self.crawls.find_one_and_update(
-            {
-                "_id": crawl_id,
-                "type": "crawl",
-            },
+            {"_id": crawl_id},
             {"$inc": inc_query},
         )
 
@@ -555,10 +552,8 @@ async def re_add_crawl_pages(self, crawl_id: str, oid: UUID):
         await self.add_crawl_pages_to_db_from_wacz(crawl_id)
 
     async def re_add_all_crawl_pages(self, oid: UUID):
-        """Re-add pages for all crawls in org"""
-        crawl_ids = await self.crawls.distinct(
-            "_id", {"type": "crawl", "finished": {"$ne": None}}
-        )
+        """Re-add pages for all crawls and uploads in org"""
+        crawl_ids = await self.crawls.distinct("_id", {"finished": {"$ne": None}})
         for crawl_id in crawl_ids:
             await self.re_add_crawl_pages(crawl_id, oid)
 
diff --git a/backend/btrixcloud/uploads.py b/backend/btrixcloud/uploads.py
index 7257c875e6..0473e1cf58 100644
--- a/backend/btrixcloud/uploads.py
+++ b/backend/btrixcloud/uploads.py
@@ -189,6 +189,8 @@ async def _create_upload(
             self.event_webhook_ops.create_upload_finished_notification(crawl_id, org.id)
         )
 
+        asyncio.create_task(self.page_ops.add_crawl_pages_to_db_from_wacz(crawl_id))
+
         await self.orgs.inc_org_bytes_stored(org.id, file_size, "upload")
 
         quota_reached = self.orgs.storage_quota_reached(org)
@@ -407,3 +409,5 @@ async def delete_uploads(
         org: Organization = Depends(org_crawl_dep),
     ):
         return await ops.delete_uploads(delete_list, org, user)
+
+    return ops
diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py
index fb7543d0a2..0049e53269 100644
--- a/backend/test/test_uploads.py
+++ b/backend/test/test_uploads.py
@@ -132,6 +132,57 @@ def test_get_stream_upload(
     assert r.status_code == 200
 
 
+def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id):
+    time.sleep(10)
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{upload_id}/pages",
+        headers=admin_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+
+    assert data["total"] > 0
+
+    pages = data["items"]
+    for page in pages:
+        assert page["id"]
+        assert page["oid"]
+        assert page["crawl_id"] == upload_id
+        assert page["url"]
+        assert page["ts"]
+        assert page.get("title") or page.get("title") is None
+        assert page["loadState"]
+        assert page["status"]
+        assert page["mime"]
+        assert page["isError"] in (True, False)
+        assert page["isFile"] in (True, False)
+
+    page_id = pages[0]["id"]
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{upload_id}/pages/{page_id}",
+        headers=admin_auth_headers,
+    )
+    assert r.status_code == 200
+    page = r.json()
+
+    assert page["id"] == page_id
+    assert page["oid"]
+    assert page["crawl_id"]
+    assert page["url"]
+    assert page["ts"]
+    assert page.get("title") or page.get("title") is None
+    assert page["loadState"]
+    assert page["mime"]
+    assert page["isError"] in (True, False)
+    assert page["isFile"] in (True, False)
+
+    assert page["notes"] == []
+    assert page.get("userid") is None
+    assert page.get("modified") is None
+    assert page.get("approved") is None
+
+
 def test_list_uploads(
     admin_auth_headers, default_org_id, uploads_collection_id, upload_id_2
 ):

From af9265a9ab4ee5b394462570c82515493b8ec7ce Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 13:09:14 -0500
Subject: [PATCH 17/57] WIP: Add all-crawls/ and uploads/ versions of GET pages
 endpoints

- QA-related endpoints are still restricted to crawls only for
now
- Do we want to enforce the collection types or are these multiple
paths per endpoint method sufficient? tbd
---
 backend/btrixcloud/pages.py  | 48 +++++++++++++++++++++++++++++++++---
 backend/test/test_uploads.py |  4 +--
 2 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py
index 6f83409c8f..74383d9d71 100644
--- a/backend/btrixcloud/pages.py
+++ b/backend/btrixcloud/pages.py
@@ -638,7 +638,17 @@ def init_pages_api(app, mdb, crawl_ops, org_ops, storage_ops, user_dep):
 
     @app.post(
         "/orgs/{oid}/crawls/all/pages/reAdd",
-        tags=["pages"],
+        tags=["pages", "crawls"],
+        response_model=StartedResponseBool,
+    )
+    @app.post(
+        "/orgs/{oid}/uploads/all/pages/reAdd",
+        tags=["pages", "uploads"],
+        response_model=StartedResponseBool,
+    )
+    @app.post(
+        "/orgs/{oid}/all-crawls/all/pages/reAdd",
+        tags=["pages", "all-crawls"],
         response_model=StartedResponseBool,
     )
     async def re_add_all_crawl_pages(
@@ -653,7 +663,17 @@ async def re_add_all_crawl_pages(
 
     @app.post(
         "/orgs/{oid}/crawls/{crawl_id}/pages/reAdd",
-        tags=["pages"],
+        tags=["pages", "crawls"],
+        response_model=StartedResponseBool,
+    )
+    @app.post(
+        "/orgs/{oid}/uploads/{crawl_id}/pages/reAdd",
+        tags=["pages", "uploads"],
+        response_model=StartedResponseBool,
+    )
+    @app.post(
+        "/orgs/{oid}/all-crawls/{crawl_id}/pages/reAdd",
+        tags=["pages", "all-crawls"],
         response_model=StartedResponseBool,
     )
     async def re_add_crawl_pages(
@@ -665,7 +685,17 @@ async def re_add_crawl_pages(
 
     @app.get(
         "/orgs/{oid}/crawls/{crawl_id}/pages/{page_id}",
-        tags=["pages"],
+        tags=["pages", "crawls"],
+        response_model=PageOut,
+    )
+    @app.get(
+        "/orgs/{oid}/uploads/{crawl_id}/pages/{page_id}",
+        tags=["pages", "uploads"],
+        response_model=PageOut,
+    )
+    @app.get(
+        "/orgs/{oid}/all-crawls/{crawl_id}/pages/{page_id}",
+        tags=["pages", "all-crawls"],
         response_model=PageOut,
     )
     async def get_page(
@@ -753,7 +783,17 @@ async def delete_page_notes(
 
     @app.get(
         "/orgs/{oid}/crawls/{crawl_id}/pages",
-        tags=["pages"],
+        tags=["pages", "crawls"],
+        response_model=PaginatedPageOutResponse,
+    )
+    @app.get(
+        "/orgs/{oid}/uploads/{crawl_id}/pages",
+        tags=["pages", "uploads"],
+        response_model=PaginatedPageOutResponse,
+    )
+    @app.get(
+        "/orgs/{oid}/all-crawls/{crawl_id}/pages",
+        tags=["pages", "all-crawls"],
         response_model=PaginatedPageOutResponse,
     )
     async def get_pages_list(
diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py
index 0049e53269..86d1abb854 100644
--- a/backend/test/test_uploads.py
+++ b/backend/test/test_uploads.py
@@ -136,7 +136,7 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id):
     time.sleep(10)
 
     r = requests.get(
-        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{upload_id}/pages",
+        f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/pages",
         headers=admin_auth_headers,
     )
     assert r.status_code == 200
@@ -160,7 +160,7 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id):
 
     page_id = pages[0]["id"]
     r = requests.get(
-        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{upload_id}/pages/{page_id}",
+        f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/pages/{page_id}",
         headers=admin_auth_headers,
     )
     assert r.status_code == 200

From 1ad1b271c98ef8d6d084e1e9a42fc4443e1f5055 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 16:56:20 -0500
Subject: [PATCH 18/57] Add fallbacks if pages have no id or a non-UUID id

---
 backend/btrixcloud/pages.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py
index 74383d9d71..37982b8d7c 100644
--- a/backend/btrixcloud/pages.py
+++ b/backend/btrixcloud/pages.py
@@ -94,9 +94,19 @@ def _get_page_from_dict(
         self, page_dict: Dict[str, Any], crawl_id: str, oid: UUID
     ) -> Page:
         """Return Page object from dict"""
-        page_id = page_dict.get("id")
+        page_id = page_dict.get("id", "")
         if not page_id:
             print(f'Page {page_dict.get("url")} has no id - assigning UUID', flush=True)
+            page_id = uuid4()
+
+        try:
+            UUID(page_id)
+        except ValueError:
+            print(
+                f'Page {page_dict.get("url")} is not a valid UUID - assigning UUID',
+                flush=True,
+            )
+            page_id = uuid4()
 
         status = page_dict.get("status")
         if not status and page_dict.get("loadState"):

From 34ebaed6509e580963c0f4725683c2a944615596 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 17:09:37 -0500
Subject: [PATCH 19/57] Move pages test later to give it more time

---
 backend/test/test_uploads.py | 102 +++++++++++++++++------------------
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py
index 86d1abb854..73c57cf8c2 100644
--- a/backend/test/test_uploads.py
+++ b/backend/test/test_uploads.py
@@ -132,57 +132,6 @@ def test_get_stream_upload(
     assert r.status_code == 200
 
 
-def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id):
-    time.sleep(10)
-
-    r = requests.get(
-        f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/pages",
-        headers=admin_auth_headers,
-    )
-    assert r.status_code == 200
-    data = r.json()
-
-    assert data["total"] > 0
-
-    pages = data["items"]
-    for page in pages:
-        assert page["id"]
-        assert page["oid"]
-        assert page["crawl_id"] == upload_id
-        assert page["url"]
-        assert page["ts"]
-        assert page.get("title") or page.get("title") is None
-        assert page["loadState"]
-        assert page["status"]
-        assert page["mime"]
-        assert page["isError"] in (True, False)
-        assert page["isFile"] in (True, False)
-
-    page_id = pages[0]["id"]
-    r = requests.get(
-        f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/pages/{page_id}",
-        headers=admin_auth_headers,
-    )
-    assert r.status_code == 200
-    page = r.json()
-
-    assert page["id"] == page_id
-    assert page["oid"]
-    assert page["crawl_id"]
-    assert page["url"]
-    assert page["ts"]
-    assert page.get("title") or page.get("title") is None
-    assert page["loadState"]
-    assert page["mime"]
-    assert page["isError"] in (True, False)
-    assert page["isFile"] in (True, False)
-
-    assert page["notes"] == []
-    assert page.get("userid") is None
-    assert page.get("modified") is None
-    assert page.get("approved") is None
-
-
 def test_list_uploads(
     admin_auth_headers, default_org_id, uploads_collection_id, upload_id_2
 ):
@@ -283,6 +232,57 @@ def test_get_upload_replay_json_admin(
     assert "files" not in data
 
 
+def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id):
+    time.sleep(10)
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/pages",
+        headers=admin_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+
+    assert data["total"] > 0
+
+    pages = data["items"]
+    for page in pages:
+        assert page["id"]
+        assert page["oid"]
+        assert page["crawl_id"] == upload_id
+        assert page["url"]
+        assert page["ts"]
+        assert page.get("title") or page.get("title") is None
+        assert page["loadState"]
+        assert page["status"]
+        assert page["mime"]
+        assert page["isError"] in (True, False)
+        assert page["isFile"] in (True, False)
+
+    page_id = pages[0]["id"]
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/pages/{page_id}",
+        headers=admin_auth_headers,
+    )
+    assert r.status_code == 200
+    page = r.json()
+
+    assert page["id"] == page_id
+    assert page["oid"]
+    assert page["crawl_id"]
+    assert page["url"]
+    assert page["ts"]
+    assert page.get("title") or page.get("title") is None
+    assert page["loadState"]
+    assert page["mime"]
+    assert page["isError"] in (True, False)
+    assert page["isFile"] in (True, False)
+
+    assert page["notes"] == []
+    assert page.get("userid") is None
+    assert page.get("modified") is None
+    assert page.get("approved") is None
+
+
 def test_replace_upload(
     admin_auth_headers, default_org_id, uploads_collection_id, upload_id
 ):

From e14c807c7fc44c93026bd08302593f5de6bf5d46 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 17:11:05 -0500
Subject: [PATCH 20/57] Delete upload pages when deleted or replaced

---
 backend/btrixcloud/basecrawls.py | 3 ++-
 backend/btrixcloud/uploads.py    | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py
index 063ecb9dec..01e700f8b5 100644
--- a/backend/btrixcloud/basecrawls.py
+++ b/backend/btrixcloud/basecrawls.py
@@ -323,8 +323,9 @@ async def delete_crawls(
                         status_code=400, detail=f"Error Stopping Crawl: {exc}"
                     )
 
+            await self.page_ops.delete_crawl_pages(crawl_id, org.id)
+
             if type_ == "crawl":
-                await self.page_ops.delete_crawl_pages(crawl_id, org.id)
                 await self.delete_all_crawl_qa_files(crawl_id, org)
 
             crawl_size = await self._delete_crawl_files(crawl, org)
diff --git a/backend/btrixcloud/uploads.py b/backend/btrixcloud/uploads.py
index 0473e1cf58..e95b30427f 100644
--- a/backend/btrixcloud/uploads.py
+++ b/backend/btrixcloud/uploads.py
@@ -99,9 +99,10 @@ async def stream_iter():
         if prev_upload:
             try:
                 await self._delete_crawl_files(prev_upload, org)
+                await self.page_ops.delete_crawl_pages(prev_upload.id, org.id)
             # pylint: disable=broad-exception-caught
             except Exception as exc:
-                print("replace file deletion failed", exc)
+                print(f"Error handling previous upload: {exc}", flush=True)
 
         return await self._create_upload(
             files, name, description, collections, tags, id_, org, user

From 594deddbc515ea0add54be8d30d21a0c98b1bf63 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 17:26:32 -0500
Subject: [PATCH 21/57] Remove asserts for upload pages for optional fields

---
 backend/test/test_uploads.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py
index 73c57cf8c2..3fb1c1c44b 100644
--- a/backend/test/test_uploads.py
+++ b/backend/test/test_uploads.py
@@ -233,6 +233,7 @@ def test_get_upload_replay_json_admin(
 
 
 def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id):
+    # Give time for pages to finish being uploaded
     time.sleep(10)
 
     r = requests.get(
@@ -252,11 +253,6 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id):
         assert page["url"]
         assert page["ts"]
         assert page.get("title") or page.get("title") is None
-        assert page["loadState"]
-        assert page["status"]
-        assert page["mime"]
-        assert page["isError"] in (True, False)
-        assert page["isFile"] in (True, False)
 
     page_id = pages[0]["id"]
     r = requests.get(
@@ -272,10 +268,6 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id):
     assert page["url"]
     assert page["ts"]
     assert page.get("title") or page.get("title") is None
-    assert page["loadState"]
-    assert page["mime"]
-    assert page["isError"] in (True, False)
-    assert page["isFile"] in (True, False)
 
     assert page["notes"] == []
     assert page.get("userid") is None

From f00cc9fc99a744ec0b66a7fd3e5c413bf9c534ed Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 2 Dec 2024 17:27:29 -0500
Subject: [PATCH 22/57] Remove outdated comment

---
 backend/btrixcloud/colls.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index f5fc671c8f..0485a1218c 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -465,7 +465,6 @@ async def update_collection_dates(self, coll_id: UUID):
             "ts": {"$ne": None},
         }
 
-        # Note: Pages for uploads are not currently in the db
         cursor = self.pages.find(match_query).sort("ts", 1).limit(1)
         pages = await cursor.to_list(length=1)
         try:

From 19cfca6ad13e673e82576d12e501b8509eb3aca0 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 11:25:28 -0500
Subject: [PATCH 23/57] Filter (re-)add all pages endpoint by crawl type in
 path

If a user POSTS to /crawls/all/ or /uploads/all/, filter the
crawl objects in the database by that type before adding pages.
This enables us to easily add pages to the database for all
uploads in an organization, for example.

GET endpoints and endpoints that specify just a single crawl have
been kept more permissive as long as the crawl_id is found.
---
 backend/btrixcloud/pages.py | 38 +++++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py
index 37982b8d7c..567ff4f7e5 100644
--- a/backend/btrixcloud/pages.py
+++ b/backend/btrixcloud/pages.py
@@ -6,7 +6,7 @@
 from typing import TYPE_CHECKING, Optional, Tuple, List, Dict, Any, Union
 from uuid import UUID, uuid4
 
-from fastapi import Depends, HTTPException
+from fastapi import Depends, HTTPException, Request
 import pymongo
 
 from .models import (
@@ -561,9 +561,15 @@ async def re_add_crawl_pages(self, crawl_id: str, oid: UUID):
         print(f"Deleted pages for crawl {crawl_id}", flush=True)
         await self.add_crawl_pages_to_db_from_wacz(crawl_id)
 
-    async def re_add_all_crawl_pages(self, oid: UUID):
+    async def re_add_all_crawl_pages(
+        self, oid: UUID, type_filter: Optional[str] = None
+    ):
         """Re-add pages for all crawls and uploads in org"""
-        crawl_ids = await self.crawls.distinct("_id", {"finished": {"$ne": None}})
+        match_query: Dict[str, object] = {"finished": {"$ne": None}}
+        if type_filter:
+            match_query["type"] = type_filter
+
+        crawl_ids = await self.crawls.distinct("_id", match_query)
         for crawl_id in crawl_ids:
             await self.re_add_crawl_pages(crawl_id, oid)
 
@@ -662,13 +668,28 @@ def init_pages_api(app, mdb, crawl_ops, org_ops, storage_ops, user_dep):
         response_model=StartedResponseBool,
     )
     async def re_add_all_crawl_pages(
-        org: Organization = Depends(org_crawl_dep), user: User = Depends(user_dep)
+        request: Request,
+        org: Organization = Depends(org_crawl_dep),
+        user: User = Depends(user_dep),
     ):
         """Re-add pages for all crawls in org (superuser only)"""
         if not user.is_superuser:
             raise HTTPException(status_code=403, detail="Not Allowed")
 
-        asyncio.create_task(ops.re_add_all_crawl_pages(org.id))
+        type_filter = None
+
+        try:
+            route_path = request.scope["route"].path
+            type_path = route_path.split("/")[4]
+
+            if type_path == "uploads":
+                type_filter = "upload"
+            if type_path == "crawls":
+                type_filter = "crawl"
+        except (IndexError, AttributeError):
+            pass
+
+        asyncio.create_task(ops.re_add_all_crawl_pages(org.id, type_filter=type_filter))
         return {"started": True}
 
     @app.post(
@@ -687,7 +708,8 @@ async def re_add_all_crawl_pages(
         response_model=StartedResponseBool,
     )
     async def re_add_crawl_pages(
-        crawl_id: str, org: Organization = Depends(org_crawl_dep)
+        crawl_id: str,
+        org: Organization = Depends(org_crawl_dep),
     ):
         """Re-add pages for crawl"""
         asyncio.create_task(ops.re_add_crawl_pages(crawl_id, org.id))
@@ -727,7 +749,7 @@ async def get_page_with_qa(
         page_id: UUID,
         org: Organization = Depends(org_crawl_dep),
     ):
-        """GET single page"""
+        """GET single page with QA details"""
         return await ops.get_page_out(page_id, org.id, crawl_id, qa_run_id=qa_run_id)
 
     @app.patch(
@@ -788,7 +810,7 @@ async def delete_page_notes(
         delete: PageNoteDelete,
         org: Organization = Depends(org_crawl_dep),
     ):
-        """Edit page note"""
+        """Delete page note"""
         return await ops.delete_page_notes(page_id, org.id, delete, crawl_id)
 
     @app.get(

From 4fd6e489d1e48e44fd7d298383eea72d809b5528 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 12:26:02 -0500
Subject: [PATCH 24/57] Move re-adding all pages in org to background job

---
 backend/btrixcloud/background_jobs.py   | 68 ++++++++++++++++++++++---
 backend/btrixcloud/crawlmanager.py      | 38 +++++++++-----
 backend/btrixcloud/main.py              |  2 +-
 backend/btrixcloud/main_bg.py           | 12 ++++-
 backend/btrixcloud/models.py            | 10 ++++
 backend/btrixcloud/ops.py               |  2 +-
 backend/btrixcloud/pages.py             | 40 ++++++++++-----
 chart/app-templates/background_job.yaml |  5 +-
 8 files changed, 139 insertions(+), 38 deletions(-)

diff --git a/backend/btrixcloud/background_jobs.py b/backend/btrixcloud/background_jobs.py
index b9667078e0..6ebb43ad01 100644
--- a/backend/btrixcloud/background_jobs.py
+++ b/backend/btrixcloud/background_jobs.py
@@ -1,7 +1,6 @@
 """k8s background jobs"""
 
 import asyncio
-import os
 from datetime import datetime
 from typing import Optional, Tuple, Union, List, Dict, TYPE_CHECKING, cast
 from uuid import UUID
@@ -22,6 +21,7 @@
     DeleteReplicaJob,
     DeleteOrgJob,
     RecalculateOrgStatsJob,
+    ReAddOrgPagesJob,
     PaginatedBackgroundJobResponse,
     AnyJob,
     StorageRef,
@@ -286,8 +286,6 @@ async def create_delete_org_job(
         try:
             job_id = await self.crawl_manager.run_delete_org_job(
                 oid=str(org.id),
-                backend_image=os.environ.get("BACKEND_IMAGE", ""),
-                pull_policy=os.environ.get("BACKEND_IMAGE_PULL_POLICY", ""),
                 existing_job_id=existing_job_id,
             )
             if existing_job_id:
@@ -331,8 +329,6 @@ async def create_recalculate_org_stats_job(
         try:
             job_id = await self.crawl_manager.run_recalculate_org_stats_job(
                 oid=str(org.id),
-                backend_image=os.environ.get("BACKEND_IMAGE", ""),
-                pull_policy=os.environ.get("BACKEND_IMAGE_PULL_POLICY", ""),
                 existing_job_id=existing_job_id,
             )
             if existing_job_id:
@@ -366,6 +362,52 @@ async def create_recalculate_org_stats_job(
             print(f"warning: recalculate org stats job could not be started: {exc}")
             return None
 
+    async def create_re_add_org_pages_job(
+        self,
+        oid: UUID,
+        type_filter: Optional[str] = None,
+        existing_job_id: Optional[str] = None,
+    ):
+        """Create job to (re)add all pages in an org, optionally filtered by crawl type"""
+
+        try:
+            job_id = await self.crawl_manager.run_re_add_org_pages_job(
+                oid=str(oid),
+                type_filter=type_filter,
+                existing_job_id=existing_job_id,
+            )
+            if existing_job_id:
+                readd_pages_job = await self.get_background_job(existing_job_id, oid)
+                previous_attempt = {
+                    "started": readd_pages_job.started,
+                    "finished": readd_pages_job.finished,
+                }
+                if readd_pages_job.previousAttempts:
+                    readd_pages_job.previousAttempts.append(previous_attempt)
+                else:
+                    readd_pages_job.previousAttempts = [previous_attempt]
+                readd_pages_job.started = dt_now()
+                readd_pages_job.finished = None
+                readd_pages_job.success = None
+            else:
+                readd_pages_job = ReAddOrgPagesJob(
+                    id=job_id,
+                    oid=oid,
+                    type_filter=type_filter,
+                    started=dt_now(),
+                )
+
+            await self.jobs.find_one_and_update(
+                {"_id": job_id}, {"$set": readd_pages_job.to_dict()}, upsert=True
+            )
+
+            return job_id
+        # pylint: disable=broad-exception-caught
+        except Exception as exc:
+            # pylint: disable=raise-missing-from
+            print(f"warning: re-add org pages job could not be started: {exc}")
+            return None
+
     async def job_finished(
         self,
         job_id: str,
@@ -411,7 +453,11 @@ async def job_finished(
     async def get_background_job(
         self, job_id: str, oid: Optional[UUID] = None
     ) -> Union[
-        CreateReplicaJob, DeleteReplicaJob, DeleteOrgJob, RecalculateOrgStatsJob
+        CreateReplicaJob,
+        DeleteReplicaJob,
+        DeleteOrgJob,
+        RecalculateOrgStatsJob,
+        ReAddOrgPagesJob,
     ]:
         """Get background job"""
         query: dict[str, object] = {"_id": job_id}
@@ -435,6 +481,9 @@ def _get_job_by_type_from_data(self, data: dict[str, object]):
         if data["type"] == BgJobType.RECALCULATE_ORG_STATS:
             return RecalculateOrgStatsJob.from_dict(data)
 
+        if data["type"] == BgJobType.READD_ORG_PAGES:
+            return ReAddOrgPagesJob.from_dict(data)
+
         return DeleteOrgJob.from_dict(data)
 
     async def list_background_jobs(
@@ -575,6 +624,13 @@ async def retry_background_job(
                 existing_job_id=job_id,
             )
 
+        if job.type == BgJobType.READD_ORG_PAGES:
+            await self.create_re_add_org_pages_job(
+                org.id,
+                job.type_filter,
+                existing_job_id=job_id,
+            )
+
         return {"success": True}
 
     async def retry_failed_background_jobs(
diff --git a/backend/btrixcloud/crawlmanager.py b/backend/btrixcloud/crawlmanager.py
index 7921ca4856..55ed6c3072 100644
--- a/backend/btrixcloud/crawlmanager.py
+++ b/backend/btrixcloud/crawlmanager.py
@@ -115,8 +115,6 @@ async def run_replica_job(
     async def run_delete_org_job(
         self,
         oid: str,
-        backend_image: str,
-        pull_policy: str,
         existing_job_id: Optional[str] = None,
     ) -> str:
         """run job to delete org and all of its data"""
@@ -127,14 +125,12 @@ async def run_delete_org_job(
             job_id = f"delete-org-{oid}-{secrets.token_hex(5)}"
 
         return await self._run_bg_job_with_ops_classes(
-            oid, backend_image, pull_policy, job_id, job_type=BgJobType.DELETE_ORG.value
+            oid, job_id, job_type=BgJobType.DELETE_ORG.value
         )
 
     async def run_recalculate_org_stats_job(
         self,
         oid: str,
-        backend_image: str,
-        pull_policy: str,
         existing_job_id: Optional[str] = None,
     ) -> str:
         """run job to recalculate storage stats for the org"""
@@ -146,19 +142,32 @@ async def run_recalculate_org_stats_job(
 
         return await self._run_bg_job_with_ops_classes(
             oid,
-            backend_image,
-            pull_policy,
             job_id,
             job_type=BgJobType.RECALCULATE_ORG_STATS.value,
         )
 
-    async def _run_bg_job_with_ops_classes(
+    async def run_re_add_org_pages_job(
         self,
         oid: str,
-        backend_image: str,
-        pull_policy: str,
-        job_id: str,
-        job_type: str,
+        type_filter: Optional[str] = None,
+        existing_job_id: Optional[str] = None,
+    ) -> str:
+        """run job to recalculate storage stats for the org"""
+
+        if existing_job_id:
+            job_id = existing_job_id
+        else:
+            job_id = f"org-pages-{oid}-{secrets.token_hex(5)}"
+
+        return await self._run_bg_job_with_ops_classes(
+            oid,
+            job_id,
+            job_type=BgJobType.READD_ORG_PAGES.value,
+            type_filter=type_filter,
+        )
+
+    async def _run_bg_job_with_ops_classes(
+        self, oid: str, job_id: str, job_type: str, **kwargs
     ) -> str:
         """run background job with access to ops classes"""
 
@@ -166,8 +175,9 @@ async def _run_bg_job_with_ops_classes(
             "id": job_id,
             "oid": oid,
             "job_type": job_type,
-            "backend_image": backend_image,
-            "pull_policy": pull_policy,
+            "backend_image": os.environ.get("BACKEND_IMAGE", ""),
+            "pull_policy": os.environ.get("BACKEND_IMAGE_PULL_POLICY", ""),
+            **kwargs,
         }
 
         data = self.templates.env.get_template("background_job.yaml").render(params)
diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py
index a9fe790730..75c7a32cdd 100644
--- a/backend/btrixcloud/main.py
+++ b/backend/btrixcloud/main.py
@@ -248,7 +248,7 @@ def main() -> None:
     upload_ops = init_uploads_api(*base_crawl_init)
 
     page_ops = init_pages_api(
-        app, mdb, crawls, org_ops, storage_ops, current_active_user
+        app, mdb, crawls, org_ops, storage_ops, background_job_ops, current_active_user
     )
 
     base_crawl_ops.set_page_ops(page_ops)
diff --git a/backend/btrixcloud/main_bg.py b/backend/btrixcloud/main_bg.py
index 6ce4264126..7c77dcba78 100644
--- a/backend/btrixcloud/main_bg.py
+++ b/backend/btrixcloud/main_bg.py
@@ -12,6 +12,7 @@
 
 job_type = os.environ.get("BG_JOB_TYPE")
 oid = os.environ.get("OID")
+type_filter = os.environ.get("TYPE_FILTER")
 
 
 # ============================================================================
@@ -27,7 +28,7 @@ async def main():
         )
         return 1
 
-    (org_ops, _, _, _, _, _, _, _, _, _, _, user_manager) = init_ops()
+    (org_ops, _, _, _, _, page_ops, _, _, _, _, _, user_manager) = init_ops()
 
     if not oid:
         print("Org id missing, quitting")
@@ -57,6 +58,15 @@ async def main():
             traceback.print_exc()
             return 1
 
+    if job_type == BgJobType.READD_ORG_PAGES:
+        try:
+            await page_ops.re_add_all_crawl_pages(org, type_filter=type_filter)
+            return 0
+        # pylint: disable=broad-exception-caught
+        except Exception:
+            traceback.print_exc()
+            return 1
+
     print(f"Provided job type {job_type} not currently supported")
     return 1
 
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 660b107ee5..8ed1b52d74 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -2294,6 +2294,7 @@ class BgJobType(str, Enum):
     DELETE_REPLICA = "delete-replica"
     DELETE_ORG = "delete-org"
     RECALCULATE_ORG_STATS = "recalculate-org-stats"
+    READD_ORG_PAGES = "readd-org-pages"
 
 
 # ============================================================================
@@ -2346,6 +2347,14 @@ class RecalculateOrgStatsJob(BackgroundJob):
     type: Literal[BgJobType.RECALCULATE_ORG_STATS] = BgJobType.RECALCULATE_ORG_STATS
 
 
+# ============================================================================
+class ReAddOrgPagesJob(BackgroundJob):
+    """Model for tracking jobs to readd an org's pages"""
+
+    type: Literal[BgJobType.READD_ORG_PAGES] = BgJobType.READD_ORG_PAGES
+    type_filter: Optional[str] = None
+
+
 # ============================================================================
 # Union of all job types, for response model
 
@@ -2356,6 +2365,7 @@ class RecalculateOrgStatsJob(BackgroundJob):
         BackgroundJob,
         DeleteOrgJob,
         RecalculateOrgStatsJob,
+        ReAddOrgPagesJob,
     ]
 ]
 
diff --git a/backend/btrixcloud/ops.py b/backend/btrixcloud/ops.py
index 2a282b8e09..bee24d00c5 100644
--- a/backend/btrixcloud/ops.py
+++ b/backend/btrixcloud/ops.py
@@ -89,7 +89,7 @@ def init_ops() -> Tuple[
 
     upload_ops = UploadOps(*base_crawl_init)
 
-    page_ops = PageOps(mdb, crawl_ops, org_ops, storage_ops)
+    page_ops = PageOps(mdb, crawl_ops, org_ops, storage_ops, background_job_ops)
 
     base_crawl_ops.set_page_ops(page_ops)
     crawl_ops.set_page_ops(page_ops)
diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py
index 567ff4f7e5..afc16ab7d0 100644
--- a/backend/btrixcloud/pages.py
+++ b/backend/btrixcloud/pages.py
@@ -24,6 +24,7 @@
     PageNoteEdit,
     PageNoteDelete,
     QARunBucketStats,
+    StartedResponse,
     StartedResponseBool,
     UpdatedResponse,
     DeletedResponse,
@@ -34,11 +35,12 @@
 from .utils import str_to_date, str_list_to_bools, dt_now
 
 if TYPE_CHECKING:
+    from .background_jobs import BackgroundJobOps
     from .crawls import CrawlOps
     from .orgs import OrgOps
     from .storages import StorageOps
 else:
-    CrawlOps = StorageOps = OrgOps = object
+    CrawlOps = StorageOps = OrgOps = BackgroundJobOps = object
 
 
 # ============================================================================
@@ -49,18 +51,24 @@ class PageOps:
     crawl_ops: CrawlOps
     org_ops: OrgOps
     storage_ops: StorageOps
+    background_job_ops: BackgroundJobOps
 
-    def __init__(self, mdb, crawl_ops, org_ops, storage_ops):
+    def __init__(self, mdb, crawl_ops, org_ops, storage_ops, background_job_ops):
         self.pages = mdb["pages"]
         self.crawls = mdb["crawls"]
         self.crawl_ops = crawl_ops
         self.org_ops = org_ops
         self.storage_ops = storage_ops
+        self.background_job_ops = background_job_ops
 
     async def init_index(self):
         """init index for pages db collection"""
         await self.pages.create_index([("crawl_id", pymongo.HASHED)])
 
+    async def set_ops(self, background_job_ops: BackgroundJobOps):
+        """Set ops classes as needed"""
+        self.background_job_ops = background_job_ops
+
     async def add_crawl_pages_to_db_from_wacz(self, crawl_id: str, batch_size=100):
         """Add pages to database from WACZ files"""
         pages_buffer: List[Page] = []
@@ -562,16 +570,16 @@ async def re_add_crawl_pages(self, crawl_id: str, oid: UUID):
         await self.add_crawl_pages_to_db_from_wacz(crawl_id)
 
     async def re_add_all_crawl_pages(
-        self, oid: UUID, type_filter: Optional[str] = None
+        self, org: Organization, type_filter: Optional[str] = None
     ):
         """Re-add pages for all crawls and uploads in org"""
         match_query: Dict[str, object] = {"finished": {"$ne": None}}
-        if type_filter:
+        if type_filter in ("crawl", "upload"):
             match_query["type"] = type_filter
 
         crawl_ids = await self.crawls.distinct("_id", match_query)
         for crawl_id in crawl_ids:
-            await self.re_add_crawl_pages(crawl_id, oid)
+            await self.re_add_crawl_pages(crawl_id, org.id)
 
     async def get_qa_run_aggregate_counts(
         self,
@@ -644,35 +652,37 @@ async def get_qa_run_aggregate_counts(
 
 # ============================================================================
 # pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme
-def init_pages_api(app, mdb, crawl_ops, org_ops, storage_ops, user_dep):
+def init_pages_api(
+    app, mdb, crawl_ops, org_ops, storage_ops, background_job_ops, user_dep
+):
     """init pages API"""
     # pylint: disable=invalid-name
 
-    ops = PageOps(mdb, crawl_ops, org_ops, storage_ops)
+    ops = PageOps(mdb, crawl_ops, org_ops, storage_ops, background_job_ops)
 
     org_crawl_dep = org_ops.org_crawl_dep
 
     @app.post(
         "/orgs/{oid}/crawls/all/pages/reAdd",
         tags=["pages", "crawls"],
-        response_model=StartedResponseBool,
+        response_model=StartedResponse,
     )
     @app.post(
         "/orgs/{oid}/uploads/all/pages/reAdd",
         tags=["pages", "uploads"],
-        response_model=StartedResponseBool,
+        response_model=StartedResponse,
     )
     @app.post(
         "/orgs/{oid}/all-crawls/all/pages/reAdd",
         tags=["pages", "all-crawls"],
-        response_model=StartedResponseBool,
+        response_model=StartedResponse,
     )
     async def re_add_all_crawl_pages(
         request: Request,
         org: Organization = Depends(org_crawl_dep),
         user: User = Depends(user_dep),
     ):
-        """Re-add pages for all crawls in org (superuser only)"""
+        """Re-add pages for all crawls in org (superuser only, may delete page QA data!)"""
         if not user.is_superuser:
             raise HTTPException(status_code=403, detail="Not Allowed")
 
@@ -689,8 +699,10 @@ async def re_add_all_crawl_pages(
         except (IndexError, AttributeError):
             pass
 
-        asyncio.create_task(ops.re_add_all_crawl_pages(org.id, type_filter=type_filter))
-        return {"started": True}
+        job_id = await ops.background_job_ops.create_re_add_org_pages_job(
+            org.id, type_filter=type_filter
+        )
+        return {"started": job_id or ""}
 
     @app.post(
         "/orgs/{oid}/crawls/{crawl_id}/pages/reAdd",
@@ -711,7 +723,7 @@ async def re_add_crawl_pages(
         crawl_id: str,
         org: Organization = Depends(org_crawl_dep),
     ):
-        """Re-add pages for crawl"""
+        """Re-add pages for crawl (may delete page QA data!)"""
         asyncio.create_task(ops.re_add_crawl_pages(crawl_id, org.id))
         return {"started": True}
 
diff --git a/chart/app-templates/background_job.yaml b/chart/app-templates/background_job.yaml
index 132d3bf8fe..b6301457df 100644
--- a/chart/app-templates/background_job.yaml
+++ b/chart/app-templates/background_job.yaml
@@ -8,7 +8,7 @@ metadata:
     btrix.org: {{ oid }}
 
 spec:
-  ttlSecondsAfterFinished: 0
+  ttlSecondsAfterFinished: 90
   backoffLimit: 3
   template:
     spec:
@@ -38,6 +38,9 @@ spec:
           - name: OID
             value: {{ oid }}
 
+          - name: TYPE_FILTER
+            value: {{ type_filter }}
+
           envFrom:
             - configMapRef:
                 name: backend-env-config

From 7271dbf798de0aa8b8b1d1c70496ea8775874efc Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 12:40:34 -0500
Subject: [PATCH 25/57] Add migration to add upload files for all orgs in bg
 jobs

---
 backend/btrixcloud/db.py                      | 11 ++++--
 backend/btrixcloud/main.py                    |  1 +
 .../migrations/migration_0037_upload_pages.py | 37 +++++++++++++++++++
 backend/btrixcloud/pages.py                   |  2 +-
 4 files changed, 46 insertions(+), 5 deletions(-)
 create mode 100644 backend/btrixcloud/migrations/migration_0037_upload_pages.py

diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py
index f453442191..460f1a1082 100644
--- a/backend/btrixcloud/db.py
+++ b/backend/btrixcloud/db.py
@@ -17,7 +17,7 @@
 from .migrations import BaseMigration
 
 
-CURR_DB_VERSION = "0036"
+CURR_DB_VERSION = "0037"
 
 
 # ============================================================================
@@ -82,6 +82,7 @@ async def update_and_prepare_db(
     invite_ops,
     storage_ops,
     page_ops,
+    background_job_ops,
     db_inited,
 ):
     """Prepare database for application.
@@ -94,7 +95,7 @@ async def update_and_prepare_db(
     """
     await ping_db(mdb)
     print("Database setup started", flush=True)
-    if await run_db_migrations(mdb, user_manager, page_ops):
+    if await run_db_migrations(mdb, user_manager, background_job_ops):
         await drop_indexes(mdb)
     await create_indexes(
         org_ops,
@@ -113,7 +114,7 @@ async def update_and_prepare_db(
 
 
 # ============================================================================
-async def run_db_migrations(mdb, user_manager, page_ops):
+async def run_db_migrations(mdb, user_manager, background_job_ops):
     """Run database migrations."""
 
     # if first run, just set version and exit
@@ -145,7 +146,9 @@ async def run_db_migrations(mdb, user_manager, page_ops):
             assert spec.loader
             migration_module = importlib.util.module_from_spec(spec)
             spec.loader.exec_module(migration_module)
-            migration = migration_module.Migration(mdb, page_ops=page_ops)
+            migration = migration_module.Migration(
+                mdb, background_job_ops=background_job_ops
+            )
             if await migration.run():
                 migrations_run = True
         except ImportError as err:
diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py
index 75c7a32cdd..927a03dcb8 100644
--- a/backend/btrixcloud/main.py
+++ b/backend/btrixcloud/main.py
@@ -278,6 +278,7 @@ def main() -> None:
                 invites,
                 storage_ops,
                 page_ops,
+                background_job_ops,
                 db_inited,
             )
         )
diff --git a/backend/btrixcloud/migrations/migration_0037_upload_pages.py b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
new file mode 100644
index 0000000000..e228782556
--- /dev/null
+++ b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
@@ -0,0 +1,37 @@
+"""
+Migration 0037 -- upload pages
+"""
+
+from btrixcloud.migrations import BaseMigration
+
+
+MIGRATION_VERSION = "0037"
+
+
+class Migration(BaseMigration):
+    """Migration class."""
+
+    # pylint: disable=unused-argument
+    def __init__(self, mdb, **kwargs):
+        super().__init__(mdb, migration_version=MIGRATION_VERSION)
+
+        self.background_job_ops = kwargs.get("background_job_ops")
+
+    async def migrate_up(self):
+        """Perform migration up.
+
+        Start background jobs to parse uploads and add their pages to db
+        """
+        mdb_orgs = self.mdb["organizations"]
+        async for org in mdb_orgs.find():
+            oid = org["_id"]
+            try:
+                await self.background_job_ops.create_re_add_org_pages_job(
+                    oid, type_filter="upload"
+                )
+            # pylint: disable=broad-exception-caught
+            except Exception as err:
+                print(
+                    f"Error starting background job to add upload pges to org {oid}: {err}",
+                    flush=True,
+                )
diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py
index afc16ab7d0..1e62a9130f 100644
--- a/backend/btrixcloud/pages.py
+++ b/backend/btrixcloud/pages.py
@@ -111,7 +111,7 @@ def _get_page_from_dict(
             UUID(page_id)
         except ValueError:
             print(
-                f'Page {page_dict.get("url")} is not a valid UUID - assigning UUID',
+                f'Page {page_dict.get("url")} id "{page_id}" is not a valid UUID - assigning UUID',
                 flush=True,
             )
             page_id = uuid4()

From 0ddbcd019d5778f7da866051cc14d97769edd260 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 14:29:47 -0500
Subject: [PATCH 26/57] Add API endpoint to GET single public collection

---
 backend/btrixcloud/colls.py                   | 58 ++++++++++++--
 .../migrations/migration_0037_upload_pages.py |  6 ++
 backend/test/test_collections.py              | 75 ++++++++++++++++++-
 3 files changed, 130 insertions(+), 9 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 0485a1218c..a61857e416 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -2,6 +2,8 @@
 Collections API
 """
 
+# pylint: disable=too-many-lines
+
 from collections import Counter
 from uuid import UUID, uuid4
 from typing import Optional, List, TYPE_CHECKING, cast, Dict, Tuple, Any, Union
@@ -210,11 +212,11 @@ async def remove_crawls_from_collection(
         return await self.get_collection_out(coll_id, org)
 
     async def get_collection_raw(
-        self, coll_id: UUID, public_only: bool = False
+        self, coll_id: UUID, public_or_unlisted_only: bool = False
     ) -> Dict[str, Any]:
         """Get collection by id as dict from database"""
         query: dict[str, object] = {"_id": coll_id}
-        if public_only:
+        if public_or_unlisted_only:
             query["access"] = {"$in": ["public", "unlisted"]}
 
         result = await self.collections.find_one(query)
@@ -224,17 +226,21 @@ async def get_collection_raw(
         return result
 
     async def get_collection(
-        self, coll_id: UUID, public_only: bool = False
+        self, coll_id: UUID, public_or_unlisted_only: bool = False
     ) -> Collection:
         """Get collection by id"""
-        result = await self.get_collection_raw(coll_id, public_only)
+        result = await self.get_collection_raw(coll_id, public_or_unlisted_only)
         return Collection.from_dict(result)
 
     async def get_collection_out(
-        self, coll_id: UUID, org: Organization, resources=False, public_only=False
+        self,
+        coll_id: UUID,
+        org: Organization,
+        resources=False,
+        public_or_unlisted_only=False,
     ) -> CollOut:
         """Get CollOut by id"""
-        result = await self.get_collection_raw(coll_id, public_only)
+        result = await self.get_collection_raw(coll_id, public_or_unlisted_only)
 
         if resources:
             result["resources"] = await self.get_collection_crawl_resources(coll_id)
@@ -248,6 +254,26 @@ async def get_collection_out(
 
         return CollOut.from_dict(result)
 
+    async def get_public_collection_out(
+        self, coll_id: UUID, org: Organization
+    ) -> PublicCollOut:
+        """Get PublicCollOut by id"""
+        result = await self.get_collection_raw(coll_id)
+
+        if result.get("access") != "public":
+            raise HTTPException(status_code=404, detail="collection_not_found")
+
+        result["resources"] = await self.get_collection_crawl_resources(coll_id)
+
+        thumbnail = result.get("thumbnail")
+        if thumbnail:
+            image_file = ImageFile(**thumbnail)
+            result["thumbnail"] = await image_file.get_public_image_file_out(
+                org, self.storage_ops
+            )
+
+        return PublicCollOut.from_dict(result)
+
     async def list_collections(
         self,
         org: Organization,
@@ -825,7 +851,7 @@ async def get_collection_public_replay(
         org: Organization = Depends(org_public),
     ):
         coll = await colls.get_collection_out(
-            coll_id, org, resources=True, public_only=True
+            coll_id, org, resources=True, public_or_unlisted_only=True
         )
         response.headers["Access-Control-Allow-Origin"] = "*"
         response.headers["Access-Control-Allow-Headers"] = "*"
@@ -920,6 +946,24 @@ async def get_org_public_collections(
             sort_direction=sortDirection,
         )
 
+    @app.get(
+        "/public-collections/{org_slug}/collections/{coll_id}",
+        tags=["collections"],
+        response_model=PublicCollOut,
+    )
+    async def get_public_collection(
+        org_slug: str,
+        coll_id: UUID,
+    ):
+        try:
+            org = await colls.orgs.get_org_by_slug(org_slug)
+        # pylint: disable=broad-exception-caught
+        except Exception:
+            # pylint: disable=raise-missing-from
+            raise HTTPException(status_code=404, detail="collection_not_found")
+
+        return await colls.get_public_collection_out(coll_id, org)
+
     @app.get(
         "/orgs/{oid}/collections/{coll_id}/urls",
         tags=["collections"],
diff --git a/backend/btrixcloud/migrations/migration_0037_upload_pages.py b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
index e228782556..5ae79c51aa 100644
--- a/backend/btrixcloud/migrations/migration_0037_upload_pages.py
+++ b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
@@ -22,6 +22,12 @@ async def migrate_up(self):
 
         Start background jobs to parse uploads and add their pages to db
         """
+        if self.background_job_ops is None:
+            print(
+                "Unable to start background job, missing background_job_ops", flush=True
+            )
+            return
+
         mdb_orgs = self.mdb["organizations"]
         async for org in mdb_orgs.find():
             oid = org["_id"]
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index a5f0c6610a..f48955642b 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -1,5 +1,6 @@
 import requests
 import os
+from uuid import uuid4
 
 from zipfile import ZipFile, ZIP_STORED
 from tempfile import TemporaryFile
@@ -15,6 +16,19 @@
 CAPTION = "Short caption"
 UPDATED_CAPTION = "Updated caption"
 
+NON_PUBLIC_COLL_FIELDS = (
+    "oid",
+    "modified",
+    "crawlCount",
+    "pageCount",
+    "totalSize",
+    "tags",
+    "access",
+    "homeUrlPageId",
+)
+NON_PUBLIC_IMAGE_FIELDS = ("originalFilename", "userid", "userName", "created")
+
+
 _coll_id = None
 _second_coll_id = None
 _public_coll_id = None
@@ -1024,7 +1038,7 @@ def test_list_public_colls_home_url_thumbnail():
         assert coll["dateEarliest"]
         assert coll["dateLatest"]
 
-        for field in non_public_fields:
+        for field in NON_PUBLIC_COLL_FIELDS:
             assert field not in coll
 
         if coll["id"] == _public_coll_id:
@@ -1042,13 +1056,70 @@ def test_list_public_colls_home_url_thumbnail():
             assert thumbnail["size"]
             assert thumbnail["mime"]
 
-            for field in non_public_image_fields:
+            for field in NON_PUBLIC_IMAGE_FIELDS:
                 assert field not in thumbnail
 
         if coll["id"] == _second_public_coll_id:
             assert coll["description"]
 
 
+def test_get_public_collection():
+    r = requests.get(
+        f"{API_PREFIX}/public-collections/{default_org_slug}/collections/{_public_coll_id}"
+    )
+    assert r.status_code == 200
+    coll = r.json()
+
+    assert coll["id"] == _public_coll_id
+    assert coll["name"]
+    assert coll["resources"]
+    assert coll["dateEarliest"]
+    assert coll["dateLatest"]
+
+    for field in NON_PUBLIC_COLL_FIELDS:
+        assert field not in coll
+
+    assert coll["caption"] == CAPTION
+
+    assert coll["homeUrl"]
+    assert coll["homeUrlTs"]
+
+    thumbnail = coll["thumbnail"]
+    assert thumbnail
+
+    assert thumbnail["name"]
+    assert thumbnail["path"]
+    assert thumbnail["hash"]
+    assert thumbnail["size"]
+    assert thumbnail["mime"]
+
+    for field in NON_PUBLIC_IMAGE_FIELDS:
+        assert field not in thumbnail
+
+    # Invalid org slug - don't reveal whether org exists or not, use
+    # same exception as if collection doesn't exist
+    r = requests.get(
+        f"{API_PREFIX}/public-collections/doesntexist/collections/{_public_coll_id}"
+    )
+    assert r.status_code == 404
+    assert r.json()["detail"] == "collection_not_found"
+
+    # Invalid collection id
+    random_uuid = uuid4()
+    r = requests.get(
+        f"{API_PREFIX}/public-collections/{default_org_slug}/collections/{random_uuid}"
+    )
+    assert r.status_code == 404
+    assert r.json()["detail"] == "collection_not_found"
+
+    # Collection isn't public
+    r = requests.get(
+        f"{API_PREFIX}/public-collections/{default_org_slug}/collections/{ _coll_id}"
+    )
+    assert r.status_code == 404
+    assert r.json()["detail"] == "collection_not_found"
+
+
 def test_delete_thumbnail(crawler_auth_headers, default_org_id):
     r = requests.delete(
         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/thumbnail",

From 34ea14395e4b7d912e7f52f89e28b200418568da Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 14:45:21 -0500
Subject: [PATCH 27/57] Recalculate collection dates after adding upload pages

---
 backend/btrixcloud/colls.py   | 9 +++++++++
 backend/btrixcloud/main_bg.py | 3 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index a61857e416..4199b0e1f2 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -477,6 +477,15 @@ async def update_collection_counts_and_tags(self, collection_id: UUID):
             },
         )
 
+    async def recalculate_org_collection_dates(self, org: Organization):
+        """Recalculate earliest and latest dates for collections in org"""
+        collections, _ = await self.list_collections(
+            org,
+            page_size=100_000,
+        )
+        for coll in collections:
+            await self.update_collection_dates(coll.id)
+
     async def update_collection_dates(self, coll_id: UUID):
         """Update collection earliest and latest dates from page timestamps"""
         coll = await self.get_collection(coll_id)
diff --git a/backend/btrixcloud/main_bg.py b/backend/btrixcloud/main_bg.py
index 7c77dcba78..a742e4c408 100644
--- a/backend/btrixcloud/main_bg.py
+++ b/backend/btrixcloud/main_bg.py
@@ -28,7 +28,7 @@ async def main():
         )
         return 1
 
-    (org_ops, _, _, _, _, page_ops, _, _, _, _, _, user_manager) = init_ops()
+    (org_ops, _, _, _, _, page_ops, coll_ops, _, _, _, _, user_manager) = init_ops()
 
     if not oid:
         print("Org id missing, quitting")
@@ -61,6 +61,7 @@ async def main():
     if job_type == BgJobType.READD_ORG_PAGES:
         try:
             await page_ops.re_add_all_crawl_pages(org, type_filter=type_filter)
+            await coll_ops.recalculate_org_collection_dates(org)
             return 0
         # pylint: disable=broad-exception-caught
         except Exception:

From 60522c15b1641724537d7423bb2a6cc7231ac7e4 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 14:56:21 -0500
Subject: [PATCH 28/57] Rename type_filter to crawl_type

---
 backend/btrixcloud/background_jobs.py         |  8 ++--
 backend/btrixcloud/crawlmanager.py            |  4 +-
 backend/btrixcloud/main_bg.py                 |  4 +-
 .../migrations/migration_0037_upload_pages.py |  2 +-
 backend/btrixcloud/models.py                  |  2 +-
 backend/btrixcloud/pages.py                   | 39 +++++++++++--------
 chart/app-templates/background_job.yaml       |  4 +-
 7 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/backend/btrixcloud/background_jobs.py b/backend/btrixcloud/background_jobs.py
index 6ebb43ad01..35a2e75d59 100644
--- a/backend/btrixcloud/background_jobs.py
+++ b/backend/btrixcloud/background_jobs.py
@@ -365,7 +365,7 @@ async def create_recalculate_org_stats_job(
     async def create_re_add_org_pages_job(
         self,
         oid: UUID,
-        type_filter: Optional[str] = None,
+        crawl_type: Optional[str] = None,
         existing_job_id: Optional[str] = None,
     ):
         """Create job to (re)add all pages in an org, optionally filtered by crawl type"""
@@ -373,7 +373,7 @@ async def create_re_add_org_pages_job(
         try:
             job_id = await self.crawl_manager.run_re_add_org_pages_job(
                 oid=str(oid),
-                type_filter=type_filter,
+                crawl_type=crawl_type,
                 existing_job_id=existing_job_id,
             )
             if existing_job_id:
@@ -393,7 +393,7 @@ async def create_re_add_org_pages_job(
                 readd_pages_job = ReAddOrgPagesJob(
                     id=job_id,
                     oid=oid,
-                    type_filter=type_filter,
+                    crawl_type=crawl_type,
                     started=dt_now(),
                 )
 
@@ -627,7 +627,7 @@ async def retry_background_job(
         if job.type == BgJobType.READD_ORG_PAGES:
             await self.create_re_add_org_pages_job(
                 org.id,
-                job.type_filter,
+                job.crawl_type,
                 existing_job_id=job_id,
             )
 
diff --git a/backend/btrixcloud/crawlmanager.py b/backend/btrixcloud/crawlmanager.py
index 55ed6c3072..6810929f51 100644
--- a/backend/btrixcloud/crawlmanager.py
+++ b/backend/btrixcloud/crawlmanager.py
@@ -149,7 +149,7 @@ async def run_recalculate_org_stats_job(
     async def run_re_add_org_pages_job(
         self,
         oid: str,
-        type_filter: Optional[str] = None,
+        crawl_type: Optional[str] = None,
         existing_job_id: Optional[str] = None,
     ) -> str:
         """run job to recalculate storage stats for the org"""
@@ -163,7 +163,7 @@ async def run_re_add_org_pages_job(
             oid,
             job_id,
             job_type=BgJobType.READD_ORG_PAGES.value,
-            type_filter=type_filter,
+            crawl_type=crawl_type,
         )
 
     async def _run_bg_job_with_ops_classes(
diff --git a/backend/btrixcloud/main_bg.py b/backend/btrixcloud/main_bg.py
index a742e4c408..e798f78ec3 100644
--- a/backend/btrixcloud/main_bg.py
+++ b/backend/btrixcloud/main_bg.py
@@ -12,7 +12,7 @@
 
 job_type = os.environ.get("BG_JOB_TYPE")
 oid = os.environ.get("OID")
-type_filter = os.environ.get("TYPE_FILTER")
+crawl_type = os.environ.get("CRAWL_TYPE")
 
 
 # ============================================================================
@@ -60,7 +60,7 @@ async def main():
 
     if job_type == BgJobType.READD_ORG_PAGES:
         try:
-            await page_ops.re_add_all_crawl_pages(org, type_filter=type_filter)
+            await page_ops.re_add_all_crawl_pages(org, crawl_type=crawl_type)
             await coll_ops.recalculate_org_collection_dates(org)
             return 0
         # pylint: disable=broad-exception-caught
diff --git a/backend/btrixcloud/migrations/migration_0037_upload_pages.py b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
index 5ae79c51aa..9bb4408a0d 100644
--- a/backend/btrixcloud/migrations/migration_0037_upload_pages.py
+++ b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
@@ -33,7 +33,7 @@ async def migrate_up(self):
             oid = org["_id"]
             try:
                 await self.background_job_ops.create_re_add_org_pages_job(
-                    oid, type_filter="upload"
+                    oid, crawl_type="upload"
                 )
             # pylint: disable=broad-exception-caught
             except Exception as err:
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 8ed1b52d74..e799aaa72d 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -2352,7 +2352,7 @@ class ReAddOrgPagesJob(BackgroundJob):
     """Model for tracking jobs to readd an org's pages"""
 
     type: Literal[BgJobType.READD_ORG_PAGES] = BgJobType.READD_ORG_PAGES
-    type_filter: Optional[str] = None
+    crawl_type: Optional[str] = None
 
 
 # ============================================================================
diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py
index 1e62a9130f..d3f4c9dc05 100644
--- a/backend/btrixcloud/pages.py
+++ b/backend/btrixcloud/pages.py
@@ -570,12 +570,12 @@ async def re_add_crawl_pages(self, crawl_id: str, oid: UUID):
         await self.add_crawl_pages_to_db_from_wacz(crawl_id)
 
     async def re_add_all_crawl_pages(
-        self, org: Organization, type_filter: Optional[str] = None
+        self, org: Organization, crawl_type: Optional[str] = None
     ):
         """Re-add pages for all crawls and uploads in org"""
         match_query: Dict[str, object] = {"finished": {"$ne": None}}
-        if type_filter in ("crawl", "upload"):
-            match_query["type"] = type_filter
+        if crawl_type in ("crawl", "upload"):
+            match_query["type"] = crawl_type
 
         crawl_ids = await self.crawls.distinct("_id", match_query)
         for crawl_id in crawl_ids:
@@ -649,6 +649,23 @@ async def get_qa_run_aggregate_counts(
 
         return sorted(return_data, key=lambda bucket: bucket.lowerBoundary)
 
+    def get_crawl_type_from_pages_route(self, request: Request):
+        """Get crawl type to filter on from request route"""
+        crawl_type = None
+
+        try:
+            route_path = request.scope["route"].path
+            type_path = route_path.split("/")[4]
+
+            if type_path == "uploads":
+                crawl_type = "upload"
+            if type_path == "crawls":
+                crawl_type = "crawl"
+        except (IndexError, AttributeError):
+            pass
+
+        return crawl_type
+
 
 # ============================================================================
 # pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme
@@ -686,21 +703,9 @@ async def re_add_all_crawl_pages(
         if not user.is_superuser:
             raise HTTPException(status_code=403, detail="Not Allowed")
 
-        type_filter = None
-
-        try:
-            route_path = request.scope["route"].path
-            type_path = route_path.split("/")[4]
-
-            if type_path == "uploads":
-                type_filter = "upload"
-            if type_path == "crawls":
-                type_filter = "crawl"
-        except (IndexError, AttributeError):
-            pass
-
+        crawl_type = ops.get_crawl_type_from_pages_route(request)
         job_id = await ops.background_job_ops.create_re_add_org_pages_job(
-            org.id, type_filter=type_filter
+            org.id, crawl_type=crawl_type
         )
         return {"started": job_id or ""}
 
diff --git a/chart/app-templates/background_job.yaml b/chart/app-templates/background_job.yaml
index b6301457df..f47dd2acfd 100644
--- a/chart/app-templates/background_job.yaml
+++ b/chart/app-templates/background_job.yaml
@@ -38,8 +38,8 @@ spec:
           - name: OID
             value: {{ oid }}
 
-          - name: TYPE_FILTER
-            value: {{ type_filter }}
+          - name: CRAWL_TYPE
+            value: {{ crawl_type }}
 
           envFrom:
             - configMapRef:

From 67b52af8005679eb92671abf88f3b97cd1dc1560 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 15:00:39 -0500
Subject: [PATCH 29/57] Recalculate collection stats after adding upload pages

---
 backend/btrixcloud/colls.py   | 9 +++++++++
 backend/btrixcloud/main_bg.py | 1 +
 2 files changed, 10 insertions(+)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 4199b0e1f2..787c4ac783 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -443,6 +443,15 @@ async def download_collection(self, coll_id: UUID, org: Organization):
             resp, headers=headers, media_type="application/wacz+zip"
         )
 
+    async def recalculate_org_collection_counts_tags(self, org: Organization):
+        """Recalculate counts and tags for collections in org"""
+        collections, _ = await self.list_collections(
+            org,
+            page_size=100_000,
+        )
+        for coll in collections:
+            await self.update_collection_counts_and_tags(coll.id)
+
     async def update_collection_counts_and_tags(self, collection_id: UUID):
         """Set current crawl info in config when crawl begins"""
         crawl_count = 0
diff --git a/backend/btrixcloud/main_bg.py b/backend/btrixcloud/main_bg.py
index e798f78ec3..709139d8d2 100644
--- a/backend/btrixcloud/main_bg.py
+++ b/backend/btrixcloud/main_bg.py
@@ -62,6 +62,7 @@ async def main():
         try:
             await page_ops.re_add_all_crawl_pages(org, crawl_type=crawl_type)
             await coll_ops.recalculate_org_collection_dates(org)
+            await coll_ops.recalculate_org_collection_counts_tags(org)
             return 0
         # pylint: disable=broad-exception-caught
         except Exception:

From 2b5ba8ecaefaee8ba95a13daa544b603caef8f59 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 15:20:50 -0500
Subject: [PATCH 30/57] Reduce per-page print logging

---
 backend/btrixcloud/pages.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py
index d3f4c9dc05..251d959be1 100644
--- a/backend/btrixcloud/pages.py
+++ b/backend/btrixcloud/pages.py
@@ -104,16 +104,11 @@ def _get_page_from_dict(
         """Return Page object from dict"""
         page_id = page_dict.get("id", "")
         if not page_id:
-            print(f'Page {page_dict.get("url")} has no id - assigning UUID', flush=True)
             page_id = uuid4()
 
         try:
             UUID(page_id)
         except ValueError:
-            print(
-                f'Page {page_dict.get("url")} id "{page_id}" is not a valid UUID - assigning UUID',
-                flush=True,
-            )
             page_id = uuid4()
 
         status = page_dict.get("status")

From 2e72abe58e5136e74aedd4cf15bf9b83d764faa3 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 16:06:17 -0500
Subject: [PATCH 31/57] Include upload pages in collection pageCount

---
 backend/btrixcloud/colls.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 787c4ac783..c46a8a511e 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -467,8 +467,17 @@ async def update_collection_counts_and_tags(self, collection_id: UUID):
             files = crawl.files or []
             for file in files:
                 total_size += file.size
-            if crawl.stats:
-                page_count += crawl.stats.done
+
+            try:
+                org = await self.orgs.get_org_by_id(crawl.oid)
+                _, crawl_pages = await self.page_ops.list_pages(
+                    crawl.id, org, page_size=1_000_000
+                )
+                page_count += crawl_pages
+            # pylint: disable=broad-exception-caught
+            except Exception:
+                pass
+
             if crawl.tags:
                 tags.extend(crawl.tags)
 

From b96346b7501a0689217b05bd9775eed02f2ef24d Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 16:12:42 -0500
Subject: [PATCH 32/57] Return 404 for GET public collection if org isn't
 public

---
 backend/btrixcloud/colls.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index c46a8a511e..ffcf397767 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -989,6 +989,9 @@ async def get_public_collection(
             # pylint: disable=raise-missing-from
             raise HTTPException(status_code=404, detail="collection_not_found")
 
+        if not org.enablePublicProfile:
+            raise HTTPException(status_code=404, detail="collection_not_found")
+
         return await colls.get_public_collection_out(coll_id, org)
 
     @app.get(

From b260386e30c522852f25c9713cb4495438dfe829 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 16:59:26 -0500
Subject: [PATCH 33/57] Add oid to PublicCollOut model

---
 backend/btrixcloud/models.py     | 1 +
 backend/test/test_collections.py | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index e799aaa72d..36ae904877 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1307,6 +1307,7 @@ class PublicCollOut(BaseMongoModel):
     """Collection output model with annotations."""
 
     name: str
+    oid: UUID
     description: Optional[str] = None
     caption: Optional[str] = None
 
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index f48955642b..2d1ea98614 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -17,7 +17,6 @@
 UPDATED_CAPTION = "Updated caption"
 
 NON_PUBLIC_COLL_FIELDS = (
-    "oid",
     "modified",
     "crawlCount",
     "pageCount",
@@ -877,6 +876,7 @@ def test_list_public_collections(
     assert len(collections) == 2
     for collection in collections:
         assert collection["id"] in (_public_coll_id, _second_public_coll_id)
+        assert collection["oid"]
         assert collection["name"]
         assert collection["dateEarliest"]
         assert collection["dateLatest"]
@@ -1033,6 +1033,7 @@ def test_list_public_colls_home_url_thumbnail():
 
     for coll in collections:
         assert coll["id"] in (_public_coll_id, _second_public_coll_id)
+        assert coll["oid"]
         assert coll["name"]
         assert coll["resources"]
         assert coll["dateEarliest"]
@@ -1063,7 +1064,7 @@ def test_list_public_colls_home_url_thumbnail():
             assert coll["description"]
 
 
-def test_get_public_collection():
+def test_get_public_collection(default_org_id):
     r = requests.get(
         f"{API_PREFIX}/public-collections/{default_org_slug}/collections/{_public_coll_id}"
     )
@@ -1071,6 +1072,7 @@ def test_get_public_collection():
     coll = r.json()
 
     assert coll["id"] == _public_coll_id
+    assert coll["oid"] == default_org_id
     assert coll["name"]
     assert coll["resources"]
     assert coll["dateEarliest"]

From de9907106e99ca76fe89e10b891dd04159982d7c Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 17:08:09 -0500
Subject: [PATCH 34/57] Modify public collections endpoint paths

Use public namespace, with form:
- /public/orgs/slug/collections
- /public/orgs/slug/collections/collection_id
---
 backend/btrixcloud/colls.py      |  8 ++++----
 backend/test/test_collections.py | 20 ++++++++++----------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index ffcf397767..a60f0f074b 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -954,8 +954,8 @@ async def download_collection(
         return await colls.download_collection(coll_id, org)
 
     @app.get(
-        "/public-collections/{org_slug}",
-        tags=["collections"],
+        "/public/orgs/{org_slug}/collections",
+        tags=["collections", "public"],
         response_model=OrgPublicCollections,
     )
     async def get_org_public_collections(
@@ -974,8 +974,8 @@ async def get_org_public_collections(
         )
 
     @app.get(
-        "/public-collections/{org_slug}/collections/{coll_id}",
-        tags=["collections"],
+        "/public/orgs/{org_slug}/collections/{coll_id}",
+        tags=["collections", "public"],
         response_model=PublicCollOut,
     )
     async def get_public_collection(
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 2d1ea98614..a150a40cf0 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -832,7 +832,7 @@ def test_list_public_collections(
     assert data["publicUrl"] == ""
 
     # Try listing public collections without org public profile enabled
-    r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}")
+    r = requests.get(f"{API_PREFIX}/public/org/{default_org_slug}/collections")
     assert r.status_code == 404
     assert r.json()["detail"] == "public_profile_not_found"
 
@@ -863,7 +863,7 @@ def test_list_public_collections(
     assert data["publicUrl"] == public_url
 
     # List public collections with no auth (no public profile)
-    r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}")
+    r = requests.get(f"{API_PREFIX}/public/orgs/{default_org_slug}/collections")
     assert r.status_code == 200
     data = r.json()
 
@@ -883,7 +883,7 @@ def test_list_public_collections(
 
     # Test non-existing slug - it should return a 404 but not reveal
     # whether or not an org exists with that slug
-    r = requests.get(f"{API_PREFIX}/public-collections/nonexistentslug")
+    r = requests.get(f"{API_PREFIX}/public/orgs/nonexistentslug/collections")
     assert r.status_code == 404
     assert r.json()["detail"] == "public_profile_not_found"
 
@@ -891,7 +891,7 @@ def test_list_public_collections(
 def test_list_public_collections_no_colls(non_default_org_id, admin_auth_headers):
     # Test existing org that's not public - should return same 404 as
     # if org doesn't exist
-    r = requests.get(f"{API_PREFIX}/public-collections/{NON_DEFAULT_ORG_SLUG}")
+    r = requests.get(f"{API_PREFIX}/public/orgs/{NON_DEFAULT_ORG_SLUG}/collections")
     assert r.status_code == 404
     assert r.json()["detail"] == "public_profile_not_found"
 
@@ -908,7 +908,7 @@ def test_list_public_collections_no_colls(non_default_org_id, admin_auth_headers
 
     # List public collections with no auth - should still get profile even
     # with no public collections
-    r = requests.get(f"{API_PREFIX}/public-collections/{NON_DEFAULT_ORG_SLUG}")
+    r = requests.get(f"{API_PREFIX}/public/orgs/{NON_DEFAULT_ORG_SLUG}/collections")
     assert r.status_code == 200
     data = r.json()
     assert data["org"]["name"] == NON_DEFAULT_ORG_NAME
@@ -1026,7 +1026,7 @@ def test_list_public_colls_home_url_thumbnail():
     )
     non_public_image_fields = ("originalFilename", "userid", "userName", "created")
 
-    r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}")
+    r = requests.get(f"{API_PREFIX}/public/orgs/{default_org_slug}/collections")
     assert r.status_code == 200
     collections = r.json()["collections"]
     assert len(collections) == 2
@@ -1066,7 +1066,7 @@ def test_list_public_colls_home_url_thumbnail():
 
 def test_get_public_collection(default_org_id):
     r = requests.get(
-        f"{API_PREFIX}/public-collections/{default_org_slug}/collections/{_public_coll_id}"
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_public_coll_id}"
     )
     assert r.status_code == 200
     coll = r.json()
@@ -1101,7 +1101,7 @@ def test_get_public_collection(default_org_id):
     # Invalid org slug - don't reveal whether org exists or not, use
     # same exception as if collection doesn't exist
     r = requests.get(
-        f"{API_PREFIX}/public-collections/doesntexist/collections/{_public_coll_id}"
+        f"{API_PREFIX}/public/orgs/doesntexist/collections/{_public_coll_id}"
     )
     assert r.status_code == 404
     assert r.json()["detail"] == "collection_not_found"
@@ -1109,14 +1109,14 @@ def test_get_public_collection(default_org_id):
     # Invalid collection id
     random_uuid = uuid4()
     r = requests.get(
-        f"{API_PREFIX}/public-collections/{default_org_slug}/collections/{random_uuid}"
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{random_uuid}"
     )
     assert r.status_code == 404
     assert r.json()["detail"] == "collection_not_found"
 
     # Collection isn't public
     r = requests.get(
-        f"{API_PREFIX}/public-collections/{default_org_slug}/collections/{ _coll_id}"
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{ _coll_id}"
     )
     assert r.status_code == 404
     assert r.json()["detail"] == "collection_not_found"

From cabed6d5d8353b14a56e510e9a263bdc4ef09940 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 3 Dec 2024 17:35:24 -0500
Subject: [PATCH 35/57] Fix endpoint path in test

---
 backend/test/test_collections.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index a150a40cf0..7a0b723903 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -832,7 +832,7 @@ def test_list_public_collections(
     assert data["publicUrl"] == ""
 
     # Try listing public collections without org public profile enabled
-    r = requests.get(f"{API_PREFIX}/public/org/{default_org_slug}/collections")
+    r = requests.get(f"{API_PREFIX}/public/orgs/{default_org_slug}/collections")
     assert r.status_code == 404
     assert r.json()["detail"] == "public_profile_not_found"
 

From 5fbf3f37d7a309195876ecdc8063824ae605baee Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 4 Dec 2024 09:59:49 -0500
Subject: [PATCH 36/57] Only fetch org once, not per-crawl

---
 backend/btrixcloud/colls.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index a60f0f074b..b9b533ecef 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -459,6 +459,9 @@ async def update_collection_counts_and_tags(self, collection_id: UUID):
         total_size = 0
         tags = []
 
+        coll = await self.get_collection(collection_id)
+        org = await self.orgs.get_org_by_id(coll.oid)
+
         async for crawl_raw in self.crawls.find({"collectionIds": collection_id}):
             crawl = BaseCrawl.from_dict(crawl_raw)
             if crawl.state not in SUCCESSFUL_STATES:
@@ -469,7 +472,6 @@ async def update_collection_counts_and_tags(self, collection_id: UUID):
                 total_size += file.size
 
             try:
-                org = await self.orgs.get_org_by_id(crawl.oid)
                 _, crawl_pages = await self.page_ops.list_pages(
                     crawl.id, org, page_size=1_000_000
                 )

From ea86db4e2c76414936f00f74bb2f775c3cc161b6 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 4 Dec 2024 10:03:08 -0500
Subject: [PATCH 37/57] Add counts and size to PublicCollOut

---
 backend/btrixcloud/models.py     |  4 ++++
 backend/test/test_collections.py | 12 +++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 36ae904877..430ca14a22 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1311,6 +1311,10 @@ class PublicCollOut(BaseMongoModel):
     description: Optional[str] = None
     caption: Optional[str] = None
 
+    crawlCount: Optional[int] = 0
+    pageCount: Optional[int] = 0
+    totalSize: Optional[int] = 0
+
     dateEarliest: Optional[datetime] = None
     dateLatest: Optional[datetime] = None
 
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 7a0b723903..854c2c430b 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -18,9 +18,6 @@
 
 NON_PUBLIC_COLL_FIELDS = (
     "modified",
-    "crawlCount",
-    "pageCount",
-    "totalSize",
     "tags",
     "access",
     "homeUrlPageId",
@@ -880,6 +877,9 @@ def test_list_public_collections(
         assert collection["name"]
         assert collection["dateEarliest"]
         assert collection["dateLatest"]
+        assert collection["crawlCount"] > 0
+        assert collection["pageCount"] > 0
+        assert collection["totalSize"] > 0
 
     # Test non-existing slug - it should return a 404 but not reveal
     # whether or not an org exists with that slug
@@ -1038,6 +1038,9 @@ def test_list_public_colls_home_url_thumbnail():
         assert coll["resources"]
         assert coll["dateEarliest"]
         assert coll["dateLatest"]
+        assert coll["crawlCount"] > 0
+        assert coll["pageCount"] > 0
+        assert coll["totalSize"] > 0
 
         for field in NON_PUBLIC_COLL_FIELDS:
             assert field not in coll
@@ -1077,6 +1080,9 @@ def test_get_public_collection(default_org_id):
     assert coll["resources"]
     assert coll["dateEarliest"]
     assert coll["dateLatest"]
+    assert coll["crawlCount"] > 0
+    assert coll["pageCount"] > 0
+    assert coll["totalSize"] > 0
 
     for field in NON_PUBLIC_COLL_FIELDS:
         assert field not in coll

From 90052bc7f5ecbc00d1c2e82b13d80a0db827cb32 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 4 Dec 2024 11:22:50 -0500
Subject: [PATCH 38/57] Enforce max thumbnail file size of 2MB

---
 backend/btrixcloud/colls.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index b9b533ecef..7e60544309 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -60,6 +60,9 @@
     OrgOps = StorageOps = EventWebhookOps = CrawlOps = object
 
 
+THUMBNAIL_MAX_SIZE = 2_000_000
+
+
 # ============================================================================
 class CollectionOps:
     """ops for working with named collections of crawls"""
@@ -717,7 +720,7 @@ async def stream_iter():
                 file_prep.add_chunk(chunk)
                 yield chunk
 
-        print("Collection Thumbnail Stream Upload Start", flush=True)
+        print("Collection thumbnail stream upload starting", flush=True)
 
         if not await self.storage_ops.do_upload_multipart(
             org,
@@ -725,13 +728,21 @@ async def stream_iter():
             stream_iter(),
             MIN_UPLOAD_PART_SIZE,
         ):
-            print("Collection Thumbnail Stream Upload Failed", flush=True)
+            print("Collection thumbnail stream upload failed", flush=True)
             raise HTTPException(status_code=400, detail="upload_failed")
 
-        print("Collection Thumbnail Stream Upload Complete", flush=True)
+        print("Collection thumbnail stream upload complete", flush=True)
 
         thumbnail_file = file_prep.get_image_file(org.storage)
 
+        if thumbnail_file.size > THUMBNAIL_MAX_SIZE:
+            print(
+                "Collection thumbnail stream upload failed: max size (2 MB) exceeded",
+                flush=True,
+            )
+            await self.storage_ops.delete_file_object(org, thumbnail_file)
+            raise HTTPException(status_code=400, detail="upload_failed")
+
         if coll.thumbnail:
             if not await self.storage_ops.delete_file_object(org, coll.thumbnail):
                 print(
@@ -740,8 +751,7 @@ async def stream_iter():
 
         coll.thumbnail = thumbnail_file
 
-        # Update entire document to avoid bson.errors.InvalidDocument error
-        # with thumbnail
+        # Update entire document to avoid bson.errors.InvalidDocument exception
         await self.collections.find_one_and_update(
             {"_id": coll_id, "oid": org.id},
             {"$set": coll.to_dict()},

From 921c6bb5ffb6900c906df82ea1f69f3f0f56ed8b Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 4 Dec 2024 11:38:09 -0500
Subject: [PATCH 39/57] Use proper thumbnail as test data

---
 backend/test/data/thumbnail.jpg | Bin 27462 -> 27636 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/backend/test/data/thumbnail.jpg b/backend/test/data/thumbnail.jpg
index e746e341f2c778c4ac69de34063000d624169e89..133cfae2b048546ea31f8eb79ff6dd4517cbee50 100644
GIT binary patch
literal 27636
zcmb5Vby%BC(>NN6gyNLoL5h1R8XBY&r^Vf&KyeLP+}%QfLLoSTP@uRL*HYZAxE6P5
z;qW}~`+etJzw7*Q&i--V$?WdT?99r@-Vd`63jiqq76v93CI%K3CKeEgg^fdmgY)z$
z4v2sdkBA&ZK|u~8Bcq~WrlX=}1e1}`bI~)hu(EToQ_^wsa<lO=v$3=Ng9Hr-1ma-h
zkl^5uuu+jwvHkxp5A6VAY&7d9f#_%qfG5Og=)`Cb-2gBE4S<G;h6eb*3)&O(M}C3W
zkG0xF05o*;C+L`u++#d_iUoXZ{ZB11*7IjCC4nSrW-cHGzMv#^&D=3;QpPvQm1O++
z9ZUjJ8t+|$JCDhkg(2p!s?`$;H+XeTH=DGDdq~0fBVH=B|Caxg>SG-*@gp}+o}gi%
zJ;A`l{FgDbC&cK_o@0=_lvHCl=5q<^zyz69jxi?XuAaPqm<QmYKXxESCk99WZb`&%
zu7Vc2?nQ~}W2lL+`|cI98YE}0X=U_FKJ^egFKvI|7aiCj@2NiBtrN3PC@ujx(>{|s
zKnQU;1)fO^SV{MG_V%_@^acXZO-Y-qmoRwj>QnB|6Bs>7M{GHVbZYsA-amQ!wT$^_
z>&J(`NbhTQ`>vr6n-zji>LwgnhFl-LfJ+5*HEFa5oUa;x`J*|UZ^k3~Bp2J?C5xV@
zK<`CNE)&w=NTu1`ubM0r0S?ds-wVR)f`F*gQv-Do*v4lw&xIG+mmgG`N=@Iw>V(eq
zzby5B7@}#NrTQJZF2cAJEG{b-;Jl}4!qXcnr65CX%i|7}1W4#KcjTH>>MuA1ZRx$c
zNN$eLa}`edWejk`MP^p$8nh87+0=JA6xb7O_A!qnEi}o|I(nw-4RuhUTLN4=Q)#M%
zcAD1XTVxQV!Xm@`V}_32(5{`bSix2x$GV)%IEUPsRt=#Eej`e9yQ<hLphMqACl|*b
z1)MzB7=aCFMWKnfs%)fVk1GG-0Bvs}z)5Q7>fW$UFxwnP)@jbE%0`6@ISCpp{w~%{
z0m|H@cc!YX5QMzSzEE4_?^)KWa723bfgb?h2|6r%puR>IYWw>%Mi&|jkKhNuWbu!I
zOOL-uKTRK~sL_RrV})1W@Ez<;6*T<1c`vKWdvop)n6*ylCOUP$n*WGZd9{`27Fn<2
z$_2YQ?{quWxl4U)ncqNho%K)4wc!oN{4Et!+hVM*av#c*o37;uh4>}cNahlPu$1+c
z*SSl&%({>wI`NzmYw}4-KxBT}&Sv&$bc$&$SrqPB*1S9w>#q?x3iM5MrQS3qdIV{S
zbv^Q};)vMLMZ@}&GE)VaH@pE)$6KTf?&0fT<{jaqMOAsmnTN#2+g!zqV9{-*C1`l`
z4;x;uZZ+{YoJc+Z&|EIE^7PmpwLp9_#+Jc(ik@V(1up#Q+k+1Pt#>r}v&c_9VE2Qh
zOqMr0?e=CpIgmC8eHvP&M0D%$+rzc};~bin89KRn%<?+<^`g9n{;R5|ywLAoI79SR
zzOSujX9qWbn5pymF4IcoRhn_T3oUM#A4ThmzuV{Sb6OAj8G7j|wrFI+@WoxrWg;NU
zbM#)JPwC!$S?Y$h8tyZDuQp0DB)owII+<`i$~u>TKQ=;muHUfMCLNjGNN#RrFQV3q
z9ssY7a{r{f6FBlRXJ+nm77f6-tP)%F2jM6C8-W^}TVhY=MDWwZY`*8y2&-dhOsSx-
z{1v9;Wh7snK3zkibL37QepZf*glB7s@#dltOTIo;)hGh_PD)$KdbDp8suLO;fuzA_
zot+s;L$HFvPaV&%B71e$x0a76IJq)~{o=J)&2}3H^jg&`t!@7n37=FqX9$1Z`~d4*
zjW?zb#umpknY|bmqK4_;Nrkq~=#JgJO7~jdO~neY`E#sNMCLuhbu=+#Mcr8EA3>58
z|0h*Y%g&|<X`Xq~yT4k70degZ;qfDV05HN`>Sd5E4NXm<+4X~ebDEu(r(+j4z=5LL
z{Dd`{jhl_}FWg%ld$;dW+@DdH`G%eliH!pJM+2DiCUv45eugjEo7*~(q7%zqiduLk
z8XI23F1<}nt(Cd;LZw?(@~U_jQ~PCjxcjs<a3eCRIxO;h9g@zp-X2YAO01>aZ28vD
za_SrR(WH`#9SM?Nu*Rbb4lO(bA2*WT)8o_5VX()q8{Y>&m5R8n0w5?s6!UAVk%418
zBrjp!Zttl*X#Gn<)UoxRrg8aDedKC1zlMnj1CFz&XSx{mnx>uuK_OhRX--C_*Jv|2
z$7$9oL~gCt)5%bKxmfpWXVBTy^iK|+6zGTuwfVkqN3LTK{XuNj$yy0Dt?4r@FUk|3
z<ph|8Uls9eRGulMUTQ$o6%EK*!DGv-(6J{%sv#@N`>`A@Q0zLVJ=Q?EaKmP&z;rH}
zAfur;okD-mwuW*tnqn-vbJGf19ncP{Doy+mG@v3K_u8L*4Vf2@LcMJDIDZH0e1ylv
zi!kIWYoNj~NJhCDBucFEdL;-3SqS&^cIx|1dmXs@RE6SEMP%bjFPx{-<iXS5o{g`=
zRhrOcT|E}j;5D}qH3rIAxkad!J~2`CK`sJ%;Fask7DFhke*nWF5Ejc*gqo4kf_^9|
z4A(OVq`iFNAwu`2IAh2@1FJ(2lPj@y1g3i(1ZaVXu6IX3=)fuSe+I>AeLuJ^%>Xs#
zd%z$;LB|>$Pv_xzUzh60hE7YURlX!`o0eu)>B8TlCTxc(*8wF_xAk!O<884&=n(Nk
z<#o|^$VSuLd10{sU2FAdTdT8gdZf=VIv`v0nHZ@%)%!lx86i6v+>m(rL)q<^n480j
z@LI!79qXFO<0hD0nY)A^l$zex|BQa#o7X5)U>_*#hKnUWb#Pi&GP=NNdFj23d(D)P
zOwF0;&mC#cBg*Pac3s}7#BZK*=$0ByE;4}=L4vDnfsS_Hu$Mw*+@rdvVwIEV0$d#x
z*a%)DoH~2}tckO)x*LXB*-r1-nydfz(<m9T_~g>uV*V~H5dNAqy%*gZ{@OT}U`{{H
z<_lw%h<j1w)e&st=j#)Z_6U+KNs$W!$7mxzk^o1WjLyXYmiU!9>y3DwZmO-TOqs%=
zt)D^g8LW`TRR}K242s)Rv2T3*Eh7@Gr|e~e3)VVh;WI16cA|&g#Js~qTf8lm4NXLB
z6TQm!6TgpzVyI;$C<`*TC|X;fvckfYEF?D2Uuq}1SwT-l7i?2-M78+jH3Zl-=}z;i
z%&~A%VBW8$yfoSdQ)&?Cfwl^;NKR=&r{S%%D`w~HeqWbC+3mPynv^@sfzS17x{r~O
z1tr_KLNvU2$mmP*jXuzW$)2Fc&1;#C8Cg0ZWc#gyyOp?$(em%-mBBB^+Yywsa+VFw
z1Q^7L9^%#_uax}Wmw=7#=Ey;@d}J;HWDsH3#r2q|!p7}Hk^So2DUYZ<c6wxn-iMOU
z?bAZQ&2n_s7{;aE*CHD#s3CXw<Sr#+S&iks6S_iJBR+Zr?dQ37LI#HXe7X+jNk7_0
zXX@y2y}V6t=)b#^T4FY#>P**3MOD%}TKer2AZK$n=F=YjyYa;t@)j_!7$~xh!ijPz
zABf+T5XdMxBgJK(!P>`3b^U96MzD)M<R@tYb5*!vnx=xB^35W;5o11Pw`r`^XwpuI
z;gFV<U%MJ#;Z{yR!#w%%3AuBiQ7o)|dy?zK*{(GGT|bK_N#+C#z<PD~<55H3y+JXu
zdcmd(0|cr(CNk)vprGHNE5@|oOT|*&{X!yLK5Qs@lcNbR!jijD`xCugE07A|QB!cM
z!zuYfZ2@<oa0$yaD`WM|_Byp@cGH4lFW-IjVx?X&O*qn>!mHOD!I&bJ<Qk=ALtLNg
z`bsidI`Xq`@w;D6R_p8zaNzMy@!25cnn|GuV?*bc;YZ0gD1z#VH?qC9>#MW9(meDB
zKOOq*`PZxFx@4@v<u}6NaN*w3X_0S%`e^;=juCDStv@Cx9Z2b4T7?i1dWvk;r{5OE
zPn?$b!D@f8XA2g|BZGxM52jOE6Ku(@c>Cj;xK3U+8%--J&Sn8UD15egotq)oa{O_P
z1U_EeWfVENp*?b7URh<Y_X(j^04snO+PUt<C!vo6mZ?P<2n7o(^n#an8|L7X><rs1
z<YpZ0w;CmUjomGToW`47L>Kz3^CFPY3oR)rX~ey%da3vbbchJn8K5?4U)vH11)~qN
zgH3}8GrH>M_fv0K#>dAyXZ5>i!Q*RN2-9RZEGYAuYfnQ;aeV$mABh4+IDaS@08{}y
z`G?jT>8k|oMaMKbwO=MA&r5x|!vgUDYgkw!XmLL}QLVpGw%)86+v>Bkd~3|HNg*NM
zmL2o+c7NXqe|7I|GVsr}C(``6+W<v|G)HxGbo6?oIDJH#r^BP;6wouKJ<alfHO{vL
z{-p=zgB>L&4*;jX5}esZ7j?^P%eR(-_lF;Se$_{_Tac%3SSb6LbY6xWp0zXj1wp+?
zf8WxsYIu$dy_&y)&X<S_uMjXmdOdyzHlWY-td5G4+j=cg&r__JCvN6LD32HSztg=n
z(6g%4KTsRCHF|rZ&~A_&nH(R&?ET%?SI?Y<>eqb{;^_Dnom$_36VF9Oa~t&Dy(aDU
z>h=NPtSUOC{s1sxt+>K4Az|NYQxGTeN6Gg4M{1%E=EZU(LpXNcBn^>Fp|g+VWtwNu
z`pZXZtRXwzpsA`B_ujk~x0Hlz4k<`5<ox}}7VWE8xPYFS4NqPvfqI$uSdyVw;t{|h
zfw*`U0#>mN0kkdctYP?7co36N{^Ryodx73g;9zzF&89E@zT3BWi2v1Lyb8!mup9Ih
z&O%4j_k@3Y$ZU|{Yrd%F#pPZ8t3C}v=d+!yOsh#A5lhc4FIe3@4W}~|FBYJ^zgP4L
zGI!ZYa76FootFFB0<0)w#;w;tMS>j|0~T`KA$Vm@u~RPcvuJJbMJHUIa-dh#wN}{I
z#|RA^)AkG31};|-Jhjx;#}+X20Px~jr0b2-u4wW98XljK@xr>i_>(7S<&T9cYe%C_
z%~Ilm?q1W4Sl(}Sn3}|0+yg+yu1u)0xODCNG7~}LB&720u4do4OZSz-H`XC{4JA@2
z4c93jTl><HGr;$TN7u}Cur*-ZU*d9==WuTY>g{!Kp+8tSHLA!~wtpG(DTF%)VsC!X
za_+QNN#k7F3~j6y=uuW_GvO3legMR^wfNp}mz6JLHj(pXii{+lC@{vGFt!*5tYB7%
zT=%V?Jpk~X0`TkvOH!{C#NHK^Zwj4FN@U-b(NMj@tV4hAeOR)}xFph!vwi40ebQ+z
zNXF5@2obqZ$8rg(3dDA#PI$Rijwi4k0d>`%Fc6}F%{<q{(!~47(cb%G_x<Y;lZ=Y?
ztX@sDkoq+gI^$i+gc`lQCPHD5VtCKbo3m&|Vx1!=<em?Sjntw=$!Xr(au90ad}Wi7
zJlR2*DLjq+NRI?hG1%lg;i4OkU8Y#MK@Z`-SE>Uz#LDycd#;NG-6^ga{sag*147+#
zE(c!qwAmHk5&IEt1*7sMFan|*sGbA@;U4|JK~$)OO7gY*dDP3}zH^C;#?F_mphQt&
zjXIO$ulko63Dx9lhzYezhFek?dY-Ad+d<Ma_nng5T*8hfPDaRGr9a0dP!nLfpH=m$
zC)+0bkvOtu((m)xnU2e#1;8A@YTw2=&F%B<0r25Pk62#*zT6{~=Aezw0Hl3<rM2d8
zfUD@E?I~~>b$Mm}M{Cjivqs!Rox(KNos6tF#eLXg7ivC<+&@!`0Fmc_bLBaj_cQ+z
z*wL8h5brjLZlQWvM}h{maE~z_6ZfY$JJUE2r28PzccvBiG7yR0@lTEa;|R<j#X&pu
zpSN2*d+7fT<mAu9ye;ss@A}?$J%fLYmoKa8PuBY&t$&*eTs9?C)2)@8da*q57(9JM
z^&b=hm#FN2;O6XsS*U-Y8jl1tqceg%?Y(YFGa1>t!h9@mUiIJunHn|_mYSfTrcACH
zB#!3>6369S`f&rGE;*N0%3%>+()_IR<`ru&LOz2iGTK^_ajQh#7ep&he^|1;gA)D5
zo3js#Em2c|Y+q=KXlvDbaZRdw?)kl`gH=LfQA?EzCz@xp#<Xuiwu0Pn+yj)N-rqU@
zSKdKgYspuJ6F;3sdd#ewYi8mkKq>j8qhga&==Wn07=JD~^F;>bhgG=T4UL1)CTwjC
zlF$#woqYp=Cx(0{nKKJnC!gHY7wrk-wX1d$Y;*eNAJ;T@e*Fj++LY~K+q#=*t9WKB
z{de)~>v_voAtjP@Myc7=U~CMcZ`78xRjOwS)G;D1&yK~=`VE9mmv=yLSKeP>w-LeJ
z!FZF;rw#zihSkl%G0o)&U#q*xqq{y!$W8X4MyYyXI=u@Xkq6~4C_HdQVI$V2_fI3K
z0Tq=nI_+~MK{l*wamrr4a$#x$J;)CvH5{h|@myd$2a-9eGN8F=<*4@~PzV6qEA90q
zQIf7PhgS@}M(@xBYEGE;4=ZbiqG@6+IL1j0<|Pse?4gtdkl3hqEMd_Zk?bK#(kD_q
z{r$&_w-%)_9S?waJ}R}BE|SjXez!Q68`nazsoC9v<{mK%G|FH01LUUXSYj^zu4Rzd
z^pCN6^a?x$OD=d4(4`;i|E}4;+AWO7{+T=owg)d>VI`DkP|rQuN{Bg%)x@ntNm}U_
z=@Rnay_HIQ2*)oxt_0i4_YAm_Lf~B_D%9dC^nT=pzXbhCD~Es7?0d==W)yx(($E-}
zcD^aV!@NT%VI2DQ@oAtgKPJuDIA-Ma*fgsf@Oaw3G6*vkLm>1ZCOj(Gt2C}2#%gaD
z!sh4PYURq$#f-D-jA$uU^sUxchs9Y^6NnMg_{R;)<Hj`$K2D>xWyp-;2^qXRY%CLY
zQUXzK8ikE#X4^kJj&EbAbGBpcTd41i4~6{HWF*ULls+JG$~_p0Q`HT7x53h6fh&Cc
zAxT@`e^dUMb`%oi8I@HH+os9=tz`DTRVd``j$+I{7S@lw-eLXrHD$ecT66RY<F^KL
zp7v5Ywjax0`d6x$O@a<!%~7<*Gsi0pj%aq@U~#9fkZ;Sf5Y=&$2*a7gJ=(>u^HwH%
z_|Re-BPGnL^lBL`>gy%-(_UyZUT^owJ}j=j^XK^AN&`ou&xV4vYjKzJ9QtV!LRbU8
zTGb(5QXc40i-wJOB$=%G9tmoUw53Y0DKe<m8hpy7S#81)b?p;vW6`xa(7sA*Wx1gb
ztIIMd;jppg7R#7<Yf+q4DOBC+jaJxE_~(r{n82!NafbO;ZpI&V@LO8PNA$vY2*el3
zzR<(ekhL9_Um_a(U9@oU_%_MzkUYk+H4mGIoQgwl>mJ3vZSR=u>ywk2bvfttoz97z
z!^p*{%JzU%&kWfhKP|~M-q1g~?OfYam_wWuFXCN%aJz1rcR(jC`Eg?0@HcZ(9wN8)
zFz~g{W!x;4*VVGCC}`Jzc*INec0V)t3_4@mR^nrxH%1N>hN&2J>|aUP%xxZsN>s1j
znO3izsm&AGwvGAun2me<e%Wm3=!-d9a@d@BbzILFHa817pJmnmp5H3$W7-&F)-Z+}
zwWyvxw@g!gY$aO1IbcOI6RKtCcKw%=-I@kj;C|kdy5!f`k>kUea3Km2KWZKsa;(*H
zP@4E#U?>ndp>@Ua+048|3HN%VIO(U|HSWv9z^8*%-mc2oq5Bt`kbcf?HywwIdhyYf
zas6sV71J`5DDTOSZ3v{MF?R?i+l`<3>f9j9bG}v!X(*NNmYM~B02G@LrOHwT;ki8d
z#q~^Xu0w<#FYbq1GgJ<Bn?t$R>YaChuK&x&w$jGu0YDpP9n+E;Esm3GC~F9$6&u~E
zi4ArMnerp}nCbheVJFQ&D1Wj}N$}mk1?F`85-0*s@LBi?qJ=U2hU0uIx5ieVTzi&B
zU<OCKt=TP6B2sX6<moZwxvx{1yNq_(!H@WARqNkbnyRw13rNJaXStm@EP`bl1HoUn
z>h}~`n-U{)siswId;axSJ5ft4Z<0ES`O^vk(d+BV9drf#1~26i-R;4Q%om6Rr`zb%
z1<c#+xhZ=pV^6@3>B(na3hmC!f#CBE!sv5+*nAdgL^YL>omC;ZJ_oX!A$D*PPAe}0
zfza0kzZ_Cd@sZ0*s#}~~^}5Kf_u+m3)Zh}NPn<h_b?9hBRE|3f0X;4M;5&+)z6|!D
z+)UZmon?2a-k8s1A@x%G^J#p)Z$OSPy5v=9fh>`wqL~2W(>>&rvgpJc-e~8~O4yy4
z5b4oCVHx#!L`jYeTY0G2n-V$Aar)vN6bC$mxK&s)k-tqwM@BJ2C_#%FJzh~UAF@#u
z2~B*e38mxO)byN%X+i_GD%F(N;e~xb){NxX(^Jg`7}V4s7i4=uI#y>OXz7;j5>@ar
z|Ed5gnB$ck7MAbeRK44zX!B@{rz2VatdEeiA;jN40|?5Ta&&!9{q?5<p{0><n!Q(_
zcmgfY8Ul*|IY%V44ld4ro_XZX4mc^BmRU}S&q!t0-ya(YJNhc<FMNA6rbS#H04?Om
zi3b4Hw3&z~YrvnWUi|7np=I!Q#R6df;Fndp7S?v?QONqlm>)GQrIvHz=!&!V`H$3d
z^sUd2@p)RDqI&ynblOrXM-ECJ40kOs!nfs<fIX}dr!E$Zx8Yq<w1Op)qU+edoLB6&
z2YR&Y%(Jx&WQp)TqB4Y+;TewfPJ5;3d!NN|Udy(V>Mp&eMuqQ;ikLk~43Yt^MLS33
zc6zK^|Mi^XDbB}V_sVzMr`wg54tW6J+}xe(j*agPemR<tDk>lOfOKySrwrFMyk}Se
zhDgVYSJ2}AE@8A!YJ>|`m<QQqtR_lsVIS+()J)&WaGDl|!-KytMXuYu|DYm@7L?*y
zoa)y~)3oCgmR%R498!|1;P9@g&X%Rk<1;f%!TG`tAnw39E%DAk%x5@tl1M0bdj2U>
zeC~0=$0yq_hdcvEo>D?B2uaTkr{~#!=ayx}XDIf|G8FqNxy91ryHJ8hUXUhn*{2Hs
z6Czl|HJrf<7R46r{2nK+-l$=XZ>QiGelmZUQ$d&>j-%gjjTkUGjI9qbZ{;5x!DT&9
zsX$g=uIKWQR>0WVR>N%&(`jUtwFFEnjvlx>(SGDb?0esfvX$r7i-D2yKiE2e-40dr
zlLo~o4g)ncIVEl>U32>?=^0UhGixR*Agje3a$vcDrLo!rsZr#%6~=GGiLa6M3!9Dt
zb45S^2=G;aw4?c1$vs8pAp@J^#$>0-y%GT<E8ogKdJNi@_{o%yIp$@p6TlJaNev0A
zWE0e1=c~l64_+89jX1j6#MFKLI{xZCw*Ho;hJfIBGKD9xKcVn5{(k3mO2f7f;~Gl0
z8e>C}KIN_BZ}Gx8Q>+;j1KBg5F5(4BT0=IbE-aT-<|@>s?|M+d-;W;v+Itxu{dzm{
zb0%b^KDH|;5GWXh5Z|K=<zA1=wk+?}7lz`e)~^cO!0n7Mm(PDz6f})ZOpkx=`pN%I
zENm#>3?rnngVH$UR@DaeCBcHP%e(+gcKdlRTSbL}A}Y6|Tfoc3$}zA)pX`fs<Cn;s
z0`e!kw$gY<%Cpxe2m{zgwdNc$^wMa-KP*9Z_qU~`PuUe4`)Zy$dr@LMFFGH1)Sy%O
zrvJ=v#<AHSZj^Nt>m&RCP~AM3USNrA(H=5ztexfL$PGC75S=P4Qho;6t~9X5jJ{Hz
z@27TX(9|HJhUyQP5N`oI_(ZiocpbS2H{2YZKjs1oehs%BwcSNdoM%4(3R@rl=so6h
zYW?U8>~qR9*zK%sY%N2#gokjAx16$~FyeYW)QoKEbta|6+*{#A*2?B^Yih>Nuxz(P
z`DMS<!tvcTlfv?h4CFh53(Ie0hW3Kbhm%5FNHTc?=DcifQid)8^N6uK!?w#9kC32G
zW%i)2tk2<%nGm|YZ*=#9=R;bef~pECT6i!?#<wQmuLO2MTi8&dGJ}Ij?O8mby;rvK
zZyRFR*X$iw`|tsl!t1ixLemKk0BQk~D~Z3!N!Jg61>2X|r;rE0t&AV7gV45NPxq)g
zbsz;P3)WRj<;i+X>f#OS-WSi`FQ&yi6mGjHF3VTHA94A5jc_-fDrNYt&P)7tJoo&h
znSYXGduQ38Hs2;6iCXwrJa$36@OgV_L2)k^I_SJSN8qq}^>op)C2bO>?=Z2*Se_9K
zSKK+KRK2F!rHt2gp`;l2J>`w{b$?rL!XwLN`jyb;2b8-nez+pE+Yx4NjSuFgr>Pp1
zefjfDTTi|3#hl%_)^;t`+k=@JJwfHKD~H7k+~_;`HXQ)L0@Fm?Z)ntTXT@82{Im$w
zOQTYEvc_+F+&%{Hk<mk}KA6miClibzA%U<+Y70VI1GLNvSZ6wy?|{(*zz9Y20O)@d
z%q=Sq^1P{XX6cSUxF}eJ#j>fkRuIpWDRK}zzVG!!))(vdoKn#*$2V)%?D*|?IEdBn
zI}3EQ{DY3ZsWz1Tj26pt$e&a*bxL&Z`!tuf?7!}YlrRL0I~E}nh|y664~R3wmDh7&
zvtpV|@j&ACxjtKYWh)hOx=$o*9ZP6{adA3nbR{G{-9%wx7hgoln6qS`>~kr4L+E~f
ztSB9L-F&Mq<s6ho1;gx2#c6m-@N$IO!cmSf;t5}cxiaoI9~x9hH4VR>lw*EydC@W`
zSq04fOnyz6$Ab^H_npLDMo?6s=A=Uj<lIH=r3ZGvw|l(<n8k$Q3clx!)-1`TP>%M_
zvwp+A6P1fyNb-hQktLKNlfha79?Q|h*4ae#c}@`|&4#1t#S+S<u1S=kl(Wr|yoFNR
zC<<epQkR359&2W!QW?^`FEqh%!DYV=eZ9ZxHx=7ZFcl{XB0p?14~qlv2TpOxJ8CU%
z)~8@vj%gZVlamR)9;RMaC1xF{lX(71@rkBWT23Kb#V@@w9SKGCe<#HM{G!<ff4Ps`
zo;)}BP+9ryWgNb_EWo<mdItzU+l2o4!}{arOF0#g%bVUlJO_MpU0y06{gbi<h(jH3
z!Px+?uSX*@?zr%HJgNGvtjk8Nu*wOF)`a2#&`q|U>f7?Fl*FLNF!2X7J~0%sI_wGT
zaX)_m^x-td#+{60UvY#uFZ2JTRis#?Y43f7>T9mUHc@UNreJay>4S0vURAb|(6%w_
zI?gga0GPi}g~*v4Y1LR|i`%|l^do&u;H6uh1hCWua-FFI6UMX}Z`kJI{YaHg)v>&@
zQAzb$RDqk{wCFX0^Izj=hIR^=fkHbuKw)CU7%$lVJ_r68w0Z#i%6b3<eIzTK6j~CN
zqA~hq4kApH22YdIGCbW7BTw>3E@8Wva?cnl3axd0I`}hd&U%1i!?T@|se-V4qi?>!
zX=o;)EsfS5X^^lmevw+1Kkk9@Mw$D`NMBokPBJO6(V^$)i~+oj)$=mFGh+O;vgyTj
zqmxklV#2l63yPcdY<Sl}>QigS`45*nMuM1lxz*0G$E!omtTVtZGCJFg){zpo12I&G
zNP~`+=F6dY$WrRM=&4BNI&Ep1u(;P|_TG$<t$#oOE#(N5`|S9@{a2M^YVaJF+xyy-
zDl&mOM*tQYFlj{4rv_i=FL8~e(Z2u8ub&JnM?Nj~%CL}?2JzWqWc@BT8>@|HWn<;L
z)dm7Q$GHc9#aH8*3->Q-HYi>D3!V~RSe=c0RJ$xhzAK6X$O?D8&HQ%7+*G9$0ONTp
zbk1$^m9nTz8+Bs?-TBR8;Y71d=!WYR)993dpAsilQS44oo;qS1bfzIkQXp73OF$nK
zxahi^F2Vf6WAXLqd;k;LGJpC5pptWPfAZ6$8|Y+gq6@<^PV^<OfE9X&M<36-n{vME
z3|q^TK?7&a?}b{1OcjN-yG=h9!nh6-n!|1q0-r$^_Z{E_vo~c$LDLU_@sCY=ID<Y1
zS5#ul#4gT^5Ycw}kTIB8RHD;-w*}pC(#-w1mX1|7?d(M)hSl<d#ATeJSEKifcjYER
zLtc+ZvdiELvgOpv6V!zmx4WU6{;QACy0lwSu@8Xh8vip{5xR`|9V3Icr4HkhlDNCb
zpw;BpXNYIAlE7l<U-B>dwOsWvS*y2cI@7vYDlaVrK5fW-V`WTYiZusM4}McBb(iug
za3PUt2_b+EG)KJAh?%rZ<KZ(JY|HWscyVYx;HaU~xox^kQ1CcM4en1z_R9}Gkl9^W
zVwPTL2?}Jwf}lm~T?%W<sY_%)(*iPEL!y9AgTcRr39~V~SecWD6<h-EO%_fZArL|`
zm(7#}75hx}fZKOGMG{l>^>zo6AEPcjXqcImS|By@w}~@QT4M<<nxZ|QA*|$t#+nhu
zy?K}A$vTr0xo1GE*Gt_FS!tBsei14QTV-RfhEeQZA^ACWgBklt2vc(>B5$5Sz1DS~
z{r9h$8nDaVoG0J)Rs+!%nrVcVpNjAg;+-`5mk6(G2do^}SNrSKU=_7aY3LQ(x=mRZ
zxG9-6iaW9-QdjpgLT@j$rtJ1^2RGg8-|8|oQL&N@@oy2=)32-#(qAltQ+^Fy#6_16
zSk&|xj=_k8=&WEy2$p)kllb(l#xgmj{IIoE6Wgh(IY(coqdD|UeG6cKr};<@#GoZz
znah##=xobhoOS%Nni6$B^wPOuv1~Zw#Lm4uw~7q=U8e7Aw4*0L=6XX`^R(u|vv)&+
ztHbGI%I49zLmep%7~6e7`|=YXFTSHbzCX=op0UL;Y)v)6#ESHxotX^S@7Se@vOsy(
zyr18(OH!ylMwmhw*d?(GWr_aT>p?OQNo?fQn<h-0@@S$r@zKs<#0oz`MsO5E`t#T|
zQ9la)lE?>>R=4sGX>0LF1P=udNzq>S$7_%z9AQP!^_Xjnb>a1X5F9D)#T$o4T_2A~
zZtzws{~4K5b$(UuNo>Co3*gONR$D<q4`HiSNPi@-o6C}n{kV4prR;frtknQMpVR1+
z9ZZR17roL0Eu#THbM#&>JIN#|zoZ*-J3rGa)D>;K^)6mLXU(Vpz?L^q02n$l_|}Qw
z(>F(9D4mQhj@0Yz>l9rYPin4u26WE_YC2Q}ViRk<n<Ly$o6{A2qjWC^eM%E+Q+5QE
zX^+HtJ!e^rsk)W#uFh8Y1`#<Lt%gQSr293$s4SxCD>6KfYdcsQn2@V&&8>as;!;1R
zK0$8(?xkSppUxo}M77UY;WOC*b2r!83iELAzrKX7o3@<;Uavl<AG0X(%UW2Ogeuub
z=3av)8`~lTRWTj@JGCL)aPR;Xcw!HyH+;e|Jn6hrRh=(=n<w(kFCg8^xM^4BUK5tj
z7Xq5KYJ40S)+}*fYO7?y)Dxz%KdvbNXn{f$JB6f^A+#CM87Zf)GBU7@o)7+X2~W!=
z32M^}k58qF;$SEWN**}r?{9~Fm#5@>4w|Ik)8`)7zBAupH_{nt<*jM^9b0-lA&FNt
zh(|2Pgya*81+-iz&umqE>U?K$9iC5{8l91xFlUn!61%xHX_hHBg6{r~>=wcRXJcny
zi#LgjjVdF&063lL$!D1rm5Ygeh&L1P$iVtRAQcAXBOJ5u$1ptv<^6-JW=7JyR$~e$
z!^Z)n1=vvRrveiup{a>ySC6o#`e<Rmli}MMHE>_$LF;I@zIBD-JhuFBp*~z{umgpi
zJ}=-HO|y{fleCN^F>ek7Lr&!9*V57$(yyg&@&Y4+RHbhpUm;^?BzYXv3=Dy}ieYM)
zZ;DZ>a$1;*!I2@Sn#n0@8zpSf${;!!)**FgSPEO=Lh|F3^ycUnw^a9krq~mi6BOSv
zY3c%ZCJa=U!~qi;8(!j)2_PZMNqZ$s*_difnS?#qv)LqLu9ccrtkmXXCP<0|THCSB
z{^YQaFG6q2jz^|zUpw9%t!YU%$8t2d0ueXr)3;`Rz<8;o-wBwi=y#)-^juByn5wqs
zma5cTH2~DPx(Gckw-ine5j&b}Vj<R^+Uo)adWKIjZ`j|_kJ{qa`ZbjcsgHXer+St@
zkB0zK8yseMViR~?U<T158=mUVIVgjf$G$guk}TrNH3z<iCI`F3mj>u~<Q9~;iz&T*
zpYqi{6iRQ_9SFN0xaB}O)@V+Mv+w1{YYRYXk~3)y+;8Z|h1IpL%Hs{x5+a&^#``n?
ziGZy5!@}(g*`gc^#i-1ifUSBgIrz57A($S5M+f^ntjr%Y$mz&4iP0}s^<oZAbL83q
zxM$)dm?W!fV#(O4W`TI0a`OQDO-T?YnpA_zdF5Y=!x3-w%n|xD?68bcY}OdCNG}%f
zk-(#4l|oR0il(+n`6jNNR(trjb2a4fk6b7iZ{OF$K+$fhwdhU1?YhgAOPe?Ypf7%d
zP(61h#-0GquY(TmgNWs<1PenSGc+KoG}~9h00Y~UMaJ01!k%)+6*Bm!C$|_MU|1MU
zFGh2E%^#}f=`qtHKLZhL7Y&N7XEHArIX5UWDJxI<Nor4E*KHN!(BHp3F{Jr>sX<Ql
z0pJidgtP*;Fg4V;4nM)84!gJ2%ji`VIdH@O61VtTm<hcnJQFS3ggJt?=xedpA-B)V
z;Uj&+{Tcyq$l(gqA^aYQ3&X!%U~|H^c-^GDBiI*u;$@0ja;w+)ev(ma4zpNs9u_-m
zn?VGCs95(D|6o-uMF<Y9@emH!*bTP}12zOquTiCfVuo(}yasQ#lUvXl{VDW?2(Co7
zKhrU2FPa=!JmXx2%&zvxw;4Qj796dQV2XfJqsQJ_ok<oZYfPugGuAdHJj0&fP9yT9
zL5IPG!QrpY^-`LGKYpD5Ozi~g=EDL$*@qA#;>1s&Xcx~N>>M>eDMd|b)UovQjjxZS
zZISW2<ChGvF02B`6TFfTBq0gJNoPADs(;@aEp_%9jQHX<uWhwB$Ajz9Bhr}&{7A3e
zNewIL^|L!Cju5O*#J1|`2)iLwa$E$K_FJtBW)|`AI*GXx7k~m|sKvnu9Cx$6(XG5J
z>Z=X6sB%aD=~jpy^JT-B!QN_~b-eLan_a`-VVZD4Lasw)5&R$>x&V}Ap6$B5zaFQ#
zy{{Y-(Npg$59))F&52j2JEuapkdg?^3V~f9lSpB?vom86mFG$OUP<@^z&^t(FTN~X
z!Ig@Ck}Q@E{Ro3<;DXyiT`;=@IYMk+^|v7qi0*Jnzdh5TBa~kfbr`wuw$RipIb4n`
zUg$)C(32?>;!3>^`LV0T9rqQsI9*%rJs3&B1L?hI6X~zTH%IN7`4wqLZQ!r3L+liO
zrzZHM_i;CtCvO!Jf!JlxEvebxaWGn_M`A|?RWUGNN9ILFqOc>Q85o{0(1&hCMzRnS
z$%06kWr>Nri#Zg7gW)BNJ(`-dlA7vTtRj#lZC}2UzTn7$Sa;2p^>i)MXC=Lgk#%w_
z-eC$GDvycL%Guj6h0V4X%rRta^dt@EKhVC>(W8bRO|qPgxcj$RaqQG!Hyu;j-DMGu
zc&@{8x&G1~L3nBJ-J{c+QH+O4&)#|I&?_Sfp@wOF{Hqy=LbG%Wk9E}|e4;^%a?V#n
zlixsslc|e~Kt-V9yiZSuK+@`gVIcZX?-)3rv`)yFyzthd&O9A{V!-$%^{B4zFDoIh
zyf%u<<K}mk?lpoV5+FZdJiZ7J>R2$vS`yQCka-<8WNKcJvQQhloj@{5C@2@8btQ3-
zQC=#{P65|-7zfPsJhygnwoYHc%S^=bwm^^9(;O%|Q2%kmRKx?zpnx$}2L1w@i=*QT
zx0T#wUk)kXmmUTz2nuSnn01-v2BY9!6ngs`jrmL`(9oJ^DuJw9urW&HK`h*x5<N6I
z^LwYxwg`}Z+y3!(HFNs%l;6bNhvFkt>-OJj4LPqqc%NgU|M-)?nNQu~nNIMazGsXm
zN%MNi<tu^JV6SecuJ{80e*Nse@Ts2Ih0fvwpw`y&!o=-<t_1acophSot}t9j<Ac0B
zmy1u-b|%dgrHGc2MNayNTT;K*FXPuKE0)V-SkCCx4I3Xz|Ni|4L7gq)aRk|qBTzmG
z5l7@CH?G)MdO8wdi5QN{0(xOd$MtCusE{HDx*FQ<(K+h&tq2GZZyUqX>c51SC;yJz
zeJDbCMjP29(;E4sSloDc;zp9X>sdw5+MW-+??ZCGORBKs0gnaU$$4$}^+{-~RYPwp
zngZ;n#Qv)M%Y$u&S7(Cq0%NKkCHnl$+t|>@+;~9DXXv45BYT>P0W-l7drZuiPD2Y9
z&%VCQ?QF7}t*X0!QA|DxZ}(AnTF{>8Qs$=8=F4c)!^7veVr#skD=R|c+;j<1+W8Z8
z{fjg-(DVp$z?|V&Bai|}F(`>%MCRo8uln=bP<P17Atj9#OTiXAe2hdaK4q0DGU5MB
zj(=+Z(R9rbrFA9rHoNh0VNYJx_pToBij$E9-qUXddt1aSOjmdfO3Q%$wGbYcR^H=!
zBPLcZj*Mhr06YS*BXLC7-vndtg{i<piJ~>>3*tV}395(6h({A<WTRs3QP0&zOGCX;
z*ozEo9E4mcYIMv5#~ci4_`U7+#q{yMDL>Kr?MoOL&FbNwRAFl*z6o@g+OJI6`kszN
z1P`YuEFS$Vjom2eQ)?>8CKTxOw(-T|2t3gUMe|sodKvbdSZ1^U14mN>8+0xMQYR*s
zR`y|kts?z{{`sRpBTaRiDWf6mwbCJK7Mqv}ko8+U-_Ax&%Got_QBx~9hjaHnsty1o
zTZlVLO0L??9NkK7crsCm0n@$jQn=C&Sgk)bNZ%dg5vz^QYpBYUQ!yUB|08e0I2h%K
zWiZ%H)96@?m@{ThI#1)-7dJkRfX2Mv20I}#(ZO}%oKE`!+3q3N4u3xR;Il5-JOIL%
z2cXE>y9Moe+@zwYs>KkbeYuN|Q75i^uMjN~nCiV>Qk2tJRW_2kN*q?^9F@-+n)sTD
z*0*oW;cs~I;kN+%Hh6+r!QEH7E~kyh2*yXTieK}d^QFuMDEr!=v!9H1F#d1TwcSaX
z?ldlYxplv5oO}BZA5UW!_~T{Yy4hvq=ylo1w{gGe>}#bz$+I@%zk$bJZaA03vVYB6
zsTzBs8@MNkLU^G5m$M7zU9nkLyG<1r4qZ%5>)nQ9J!y%!;of-mVq^>d>A7&j`VTI`
zM<FRc-qNR?IdTuAZ@4exeT8CFr$JmxfX$iK)0*QV4A+NbxVZ<a<ZbQNorxQhQvZJc
zb}HV?9r2&;!lx>agDR7FWQBcR@L+k#mQKpeyUP(ju0@QAN2FuVV7>L@sdlzgi`Xfe
zi@6kmxr+y^Zo&NL(acUg;!#h!5|5G<`yUB?e-zAz<D&;anHE0&I8OftVq?y1fRDFX
zR4$Z|K#%6-b;>)Cuqj$`BcD{yaqVV4^+g|Y(6auLN~NjekI=osKWanXQwJ~|-Hjdi
zUyfJJxSFF=!)&Yo3BQ;!u^ywEWi#z52JGeErp7;fQzE3`L-6vxI*9Ao0LuIq3(J47
zg#07swj;UP$37mR21hEU&JA+l&~?u=vlczEmkqlUJvx}Ebpk0KUBW&}zQ(=}fi6ii
zw7O>l1|$EY=luT_Hf-+iL4=Hg9%!5@`i#lZE)Wn~1Lu{Mlgvh?uOHf=Ox@q5Je%B*
z?IV)!@0|rV!K?mv)->Z0OCh#c<Pg62=n^ttk~Q#^I33$RvP<i^6B9rBXa7Ek^639P
zK^Yi;n$nUuYH!eFIEX~p)N?UFTZL~JQfN_yIC*do&17O`5{P(5i`h^d2TlyiCa_H&
z8PIeYkOfevs!MJ*IZTM7@;qjJC(Xe@Vw#|&M(~fs&&TTJ5}cImQ_7%KEm_biZ=uX<
z^=Q>MnG7HEqJ~6DuyH^+uQwtePxQe5_Xo{CED}pgOaBkQM0t;g{eSuW&o5f}NHj@}
z{yc29Nbj8BEHxtaFhvG7<!JdgD0YdHFj{{c?;@cyX;s-D*gq<6*m~7KuGF~{JE-2h
zmz&r0A&nf#m0DSij~PqT0^J`n;!!*K3aW*~?;dHeOJgWo|34Uu`t)Cq>0h_$|I_2m
ze};ww%EMMg=Fx}Y<mD182BY2t2S*oD%xP*&u!k`dgI3r&OD*<-Q>3%WejI7pszm9^
z>NCLf?B_^uRAoVV^p^is)uRx2|B)1~RQ?gEumih-d$SBaQ?=k}W0}8x|8-WUg2Yas
z%8*^NR)$xNmQ}S9lZwxJ?B`z;YvZe_5wv>L-luyl3v$}i<CjLa@s6if3c7oIyB7W7
zw7XTN<`P81K<=dx>a|nzxT$-G^{~O!FXcXVh0(ffZa!bcq>5JDjigx{7*na?PY3H*
z)53M!=YBOHr-?M>Xav6LsJ)E)>c~&c?BUwm{G}DVP0>N)N*-csgsbCIQu4K~3zO^d
zQzS(NOUxDv$1MnckQnx#aN+lY8naM?{JO=rM$qn0kKZp{e@PsF&1Ao|y`AYr+LwUX
zbWeMXAs^0ft_&t{k1U^N-1&Woj2Yjk7ai-MgxgS31BD?gm`co=q@VT?sn%OzH>*B0
ziO$Z>es508mx@bsK*CUs`93Y#J0mrpZ(}6iFPo<dk+cDV!*iT=tF_pAzGX<zB9DcV
zbJ%B(w~gL^%i=$VS<sG^9DT}g?9p*=Xo&vk;X<?=wXshnIza#8v(s{c?APckNN#?u
z##n7vFeQUqK|QtQA1T&Rp8Pt6BhTTXVzF{L@i9_0(g|`w_=#CK*NQpqUHI$mu51b7
zsiC^*CjHsnztQ9pmoKFcd9cnM<tZ=P_Zls;4QqZ!(Aj(3o>dNyt{#vmDH!c{!do+p
z*HC~%mI(9bL#$WwC!4|KA6o(YBf3j9#>m32HOlu}HhMHlFPBn?Pt3!*`e2VYwXbe>
zws`a_R41qTyt%gEhy~mjI=3>W?rrp722=Whkb1J$BRtI6Ukl)3!Ul`|?|p;5T?MXu
z6FV?-7Q-gs44ej$`^+Md)srJu`E@!%S&8F#inIB1-4=nk5m>@=5zvh=?&M9E-c5JP
zi~xPyDVUA@ZTYg6@t@DToZJ;i5&-`p1PbYWOvU@AIheOl-B=Zr7P*rSe?}MN&jdr1
zy^Q+^qauJnhYpM<ysa(=#$9@aM=nOi`fPK&@z%EN5$KF1_*d=6BA9=^Npdb)A3oVO
z$SW_i_@=3&p~z$rhacd!Bh~LrOK3+pGGv|r-)^+(@@;|P>s=lY6T7*>jB@Ot+Tq`t
zw!M<N$vvGrD1IhOP~uzvQGfzk@~eK$RlMrbpOi6?l94mDqiSa)@J*7x3h(RwbNI9$
z3J@|l8rM;*R^7RM)u(yKUN;%hsv{f~8+HFXrUh`N?3>1p<nm~i{Ss8Czq3|9Na0%*
z*uEDLgy4U^{1fe4A8LDG5VBrRPTpuDoi}Zhk^WoYpbAyF56}B(y_vL6LBL}z(l=y3
zBS&m%eE4L>5ndp@yRl~P(mfG;)!0$y_|YP%O$Je;|0}p|y~vKCh730ufJlp7El&v*
zDPzrHGT#b`A8(pp{TN>-r6Wn>M+`%8pYb=x1VV!KgkW)%-K&*bfaea}RKxXJ+MP?R
zjs$gb^rW6t;|A0&CvqgRr=qTF*K!#r4)|uxwq;VT%}e{FAdjMn0a3RJ3n80D59`ft
zKJZ&*AiWOi(!r~OoFcc$;6q(Z4DIv$R9SI(pQOXIV*ro@*6-6`z5Lu`YcSlh{Ds8G
z@B4rN_Hb|-&BpnLw4qklWn+1I&iOjWdbG$PDW9i6rTo4?GbN9Qa61i<@3pb_+zSVC
z&O=mqUr9~@R!!=JrpC9v&f}+eFa;4H2KxG5ZCgR6Cl0<Dvci+1<oABfHpnf3XO#qC
zctTH|xr#)rHe8(f_Z1gZ9H^)`C(f2wx?7Au1!Ln1d%Djl+i$}9xQ@JIKfdfVhNk$T
zh|Nc}$939Sa!Z1Xz{e+D^ggiFMCU4Ut4zZR>yJOa$m;3oiSm?`Ae6T<vdUMF=wc5)
z3g8cP7^$C{jEj#cnG_sMxeHr6_zxQPUY*f-2CKMV)5z0a^?$WmDEoc7OlN%7)|C=b
zf19DWiaEaQD7L#|o|h^csQ{&XN-!vAh<7mH^F4cBsfk=ivqJ;A(7S6h*FibF{^-AN
zDgW-|^2@GQnR5_)rC<KkNqqo(YLtcToWUvzsU{e~#q*|iROTOXvKjNfj~%TY=HasY
z4Et#|OmX9S!{bZ#ThD05L@i#d*(^bKsa(h4H<g$$NcE}Bl4~*K@#_4l9dIErt}7t>
z`P!?()unH`U!10E(yokmc4d6>iVkh*f``cxjj6lQzYMZ6QtmZ3$mOa|o0RP8>8a^S
zy2W+HcBv4$cTcqKXLm~_1d9H7vsc2wP+An7m$iEQWAqZ@j$z;8zgtv(K5a)+_4onA
zAa&s<@sD*UB42%eW@_$JlLx*Aics`#X$jl?9Zyiu(&IYd)6tsz!QECvJivudv(dsx
z-%P+9=iCdc=RS!yxG8U3U3b~vX;GeAh;_cB5u5MP=8rTHcvd?M>w;8KxKy5r7&n@>
zsF@sqt~d?9A8@U6Gm#}qcGv|i5=mlsg(1m0h)FU2F=U7&HHJX+Y+7uR*kO-Rc-ulu
zVg(uXFf|qFUeGEI_Ey>36G9npy)SDwOqAC7Zk=(=Aa?4UFt-2G8WE%_pSV3!nKqce
zKUC1ibGmzBLtt|o*lL?vRW#&`N>#AThjXO)$dF`WL<E)NGcPh1&ieDA4bLsOT)aOa
z9A}bZs%xV5RjWGEmfFgSyIR6EM5k~WdEqg)+36DY<JQS1nL1(Q@-AD}82P7-R?j5i
z=;FdSpv0ZqZ+-$iUF`TtDO%XG{8=z_Y&~mC1np&C+U;u|pQ?{r`tF-~N8II2Rq$t*
zSmT|B`d0#ha`Z0U9zU8tew`A~OabX|l6GbQgU4wU&EvAD6aWA$m9#03thj~vvhvj6
zx~jT}3qO7fyuw7Y?gqTr1h;sa{w?`qL#KkRUZ2kc?g8QwV{K=d-j<NFraHer4}j9*
zw##JS#-+l%iCA|`>6fqMZGJN3DJCPp5P+9UCB>aV*0=A$Jz@?@so{2IP#e3Auy_Pn
zX1HjA7hy){gueb1(mY^q7R-`-JFw~Pn=m5UaD9!3*<y6PrS9L{ly2zo_vOH4q0{AG
z5@RXIMj?Kw^~s8e^G$uE?@Z#HZI9?1#^)Kv=dBQ1R{QrRZm|Nw9!K$}h~|ns<~Vro
z^@cT!7T0iGh<^XgY5yI|YS@YqgL#Li$Cx~L(bWbPMX=V(CrMpiRds?RKE|UQ|69ei
zFurcgume;#V7$dAPBaSc!)q)D10DcdPu3l^`<8fT5jz>%X?(&p&CN8BOcz8Je}i%J
zymDZE^{%IurtA;gKPgAiSQE|h9L0nBZzDx3kH9P7Od{8o%aW9;+PTBrSQm@!$m2{?
z!k6)=lUo(N0Uzg+t2CJB-o1a@>i$~icKk`tR!S^rO8w0)l@WDi_e9*xFTc27rL}|~
zv~ja0)3k@s>=PKVl*vVc9ss<f{&p#U>B>A_9gd{GtWkYc(VCi3q@z(g{3Yy<(i)S~
zVPokG)pTXhSF|$gc7Q3MNZ0ViyH@kTPg4`Ln{uR`!Flik61@GF%@Oa>_C&F_L}wyr
ze&15_d~MJ$>Q2%6CO<vEr6pO}xNq%55!ry>OR#R9)X-scWb2;dloLHBr6WdM8g7Fu
zMmmA=D6YLe5A}lAj`=6tw=}PCXf)Kg+_mjUigymiJsq71K7G$Qc5VQGf9dtkv7%Dn
zidOSAEcK=R#w~ob4ZOahQoZo_es0&t7dDtbZAUAdb!rYfPqW}xT`H?wWVfYZv|jyd
z^cH>dnK-syY-2Z=Tr?am+YT7(Pwq50XU0=`Ci{bT8$W}=43Nz*{?%a56xBGYp}%!$
zP(bub*VZOPU-~P-pgf~j)kIH%r}M8;ems|ZvZ{>{i8Vih7<83I{P`Zo1~Tj=EfsK2
z;D$hBPSS<m?{|%@V~%8Yj&Y3xC5$8smXRII;(oh>m&Ely@4>e((hokS;%ZiosnBSV
zCXF273o<tMNudR_v~LHm=}(;Ck&a$Yl&-hgk&g+f+F!oeviI=4zFOW+2o0!RYq`vm
zof*B8Dhj89gQ+@KkLi{eE3&imb|IBN{EJ@j=q``n{$GWiXHZjbyY+)qr3eBd5Q+pu
zs)&?O73m-#(mNOslF&l0N|7pr9xy_X4xx7lMT+z$z4wmvDj<6Df1dZ0nR8~|Z~N2E
zp3GeLzVEeuYwfo6Vv+SvuSWHC42ETsj;a&2l^=hvdjMCKCa6*U<%2yH%P$!FQ1scv
z;he{A9sUg1X1ExaO1fmzUNWzYWEi|jKDTficT2o@q@gSikHtxF!@no^W>-}`doVaR
zhv;59ws>PC;D%CH<f@rxa934lej9-SP~{aI@i9UQSE`XF7>I=H%&OinlpI`A3|~hX
zghRJi*=3<rnj{_ofGW2o0iUgJUBxi0pcKWEizGRch^>wI?&C#e+hXIbS>M#^GBSQ)
z5-q^uXVxmD)5E<BMXSH?O)FpN`&`#LM-UjFUFjnFXk(^WbjF7GL8I^{v@jRG0LzNx
zW}Gs42)5(~a$}}C|8SF!x)F;?R|z(bjNRTYp|HxiDgV;&kuWqYF0F?XnTmUowtHHx
zuQM()PL-c@@R0#QdTd|993v(rE75z(!l>t)J1TO&3B27<@Q$`Rt-WDt%2bu$I|*5Z
zjI#={$L4Z-<ayU#s)X>M`!_9DMRoE|Oy*!yr6u4JHksZ#%qh7+@tf*3{ou$GdQm|V
zOmf7){+!r7i2-R1M)0hQrnJG}>+MxJI=v)KPoL$_1#%biszbLWF3MhPE;S|8^g{R0
z5tGJK*{H~@2D<(5FzOAV^^p3(*#>au@5xww3&Rb)=7I!L6)*?f1XV>db}|%`+p8(P
zxA*kdtNOa5G~r(L_cdD$&l|^~0&s^yqz;RWeScn2-aB<`NVpRXp{21YS=XdXbF~G?
zLp;~fr{*t!a3?Ey+733$Wm*9w5zUq<N7f?<e>gVRn@RB3wu2otw7s9(Gw_jo&5`th
zDVy#T>@b4*SlW6$2@h`lj`95%7no}6N53Pwi&0|=eWNn9W5)bZ%efogEzNlae3Am&
zay*<gDRNWLF`wl*u-o(`%xMvPBDgdEse+0H1K+sXcXX6MN<SMk7k>|$-u`vii%*!a
zoTY0UI~G+<7bR(%<n8-n4Vn!Isj2qoy2ObjrtMk~j8j2PL7^yKUNE#^ww;cj!tR*x
zvNX1EuX;ipH|y@4TPH7Q!O1BRSP|=0*PPOFGu(&1@~v#x{<a;?%jFfH$smcbjc%Wt
z^Ia5@JL7U+?kh19#yF*K*LZ)>-IL}1=n-8A8TNT=QkB?Q&}O(|{qQ$X6LD<_ZXr6}
zYL*-~MwaUD!20%DdRe57=oH(jJFerR_5vh+#j9OGP7`z8ZqW66LlmN82M87dgwu($
z#Ut%kT_er6xt6Cys#JqxF{S>TG2GnkP1PRJE~7r?zI=OQwM^m(y9b32S5o4Jvr>Sa
ztE`_)vPAY)s|6tH6la2dx_i-;V+a6G!E^vqiv-Yxuz>T7PaBd`KUZi~W0F7MZ=Eqe
zYw4=muB6$;$WA!PEN~-(xy&r3oAjMW*US?uIORoW-FmqqyfP=n9vXPyg=q*G3ffGH
zAG(b+vMG4a!Um&OCbbiN{h}2pG-pC*4@6Illo6boX+}>lP_p4m8+3FgOm=LoOCwc*
zUEeS6mv>~}qEe!w5-0VaOd8+eDRJ*Ft*^1wmn>{bnqxmAAdC6rP~ps=m{-U(mh8hP
zP@Szeu156kHPdj5Z<~vE>_h^xaN1@u>;sd(n>Ms+-rr2MZp73#Z~V3H1r_QMszx6r
zy61U&=yrPxh}+q@mhV;gzZD-mQ9(zYqG|UJS&>7RH%>$RvOtnO?@s?B^{K&Ok=9X+
zqGj*v@XQ=K$8Z8ooriaBJ#KTPR{Nh45dHC@9aGF%Nn_m{Pj)+W!=7$v<LT|GD~2#O
z^cBc(hwpt#`H+P>+He_A<@xleTl2Y_w^v%DL62_Y#C1t-?X264;`kVS5{=I7pBxbt
zqJthlFJ&QNaq}T%l(ffYYK^SHuRK=2DM&|GVzSF6?vKT#4@vEwn3-y>MtP!a=Q~$3
z-DPoRQzqB8;Q(h9#v!?ogey@h0V98}wZ~bEXR&|OF?$x32V1Yix`s_*j(3avqWR;&
zt1WQ3soxW}R+8V;w{f4NJFy#mN%juNw^#aTFL~1yGS}PS*15RAwFVd4j@>!0@@Dh*
zR8O@3z+8Ny)+_v+u4U662LuriQF{_`T$bWEo;=)Bt|mu*8M`;_kf=M{J!QgOHJ;2W
z`>-6MBuucQI!6;Sl#sy6?`!KPdE~q~ko@Y=&!9cr0itA5v20kwz$v8vYqlTjq3}KM
zY)q8}KGk2rKwnfmIUqdq4#upfNY2~dkj(IOc@8|(wi<<?f=^1(eq};$UmcgER>c2!
zMYkuoYF|xcGh!VW|KokhkWiCU6|*SItfnqgZ=k}J5-yYX+X<)0TKfICSY_?akkgyz
zziOYe(X}H7zxB-cAkCj#eF#hY^XXmAjOP9&(}A_WC*2beP&qJT6?o3>dSpJ<Z1Ll2
z!I!lz!=VOcpl!NtEn3xbBxwL706mb9X#A}*=mXgyUTV%_(@t8$?=PnNzQjli{{<9?
z!oN&1+03A(K-s0JSwuvsJw2jIx=f}~5DMKO>k1^gMb$Up))`%zlv7gj)kd^F!NdgU
zOoIVFi}|#&(SI1Pbh*)l*U1%g!3u68<KvnuQ(t2UKM``qdjP1KA?RiPn!fci(-QGJ
zwPNVpOyj_ppgyDTE4!q-TF;3Vqrv%^<&&=-1@Wa-A9sRHIaUOrU1p&SqE^Ft7w|pN
zijs{PBYuYi7Hy3O_f&yjJGZ*z;#l`3q&6`>Gs<*2RQ0mCa>A9ldKa}9`e<MHAqAtC
z2f=dzn~tviMRl2_!|2dej{*mqnb4ZYQx|nr>URlKJsbhf?e*3yNvO7gCMG1BNm7NU
zAO#fkU=|-&Lr+8zoRarTx$J404|ZVh+yV3P!F{{xD;lemIs*u6Hq^tf=y&L<Y4k?g
zbu|Bg-$AS)0bR5z-Opb(j#lq?KlW8D%qevr%v^NN``FS&a{H~Ti>dI494Y5MnNh=J
zXc?ZzGicx78_cOOw~C>|3>oo<jO#E^KSHn{-H)o37#erv*CR+@svrpyvB`U|!utGZ
z+;IFRenG^2Ar#$QGOtanOc&QI0UTLsdxnaOYY5nB0#)9K+Kiz!n}-*e%uKD_b?>5t
znFvV;negVU6{#Sw>pr4JzwTMxfc22<z4?`&QiQk*1M+ILvTsFoby?Ke18aQ10`<Od
zhM`vyw&^m08$^}hW<i`aL!6?oEA(;7B30D`cE41HZVxfMXigbrxS~{t*|bev;5b&g
zDwOWaiX$CdwK#-jzeXnw_W3>gh&!Hr0=i*`+3lk5<)R`ZQ}xX-y&n*D592Vxzo{%O
zeGt~ArWPVU)6}`TswMNCD6zG#zbjHWxtS_GCU4p%f12>!MGn}sNZbr`sdr**@_tsK
z^+c!!hw>2*=!RgIogG`Gz#wKvKQW7UTU!N7BOZ47sjIJg!3s8W-Al~r&*=wY147e9
zB6>bbU`SC$xbL&K(gOJ(qf&l_Uhgis)(_3_1^kf7$IUbBc+Tb)au`U;lAxUL?JYr^
z)x$j*s9M}Ot$c0_uzeG%MedUqo9)RAbSsPL$xXym55LC0!1rDZs~we0xt}>y?8$yX
zO8W4^A}v|;bh~JTyx2pjm!n(;uYb8;?jOkP@##`3`xTbPt(jzyFzI7nvcG>~z;Xjt
zR{U<ZCpvaEKfi)8HcDfDWUV#bigYwy-B8v{0bvZXJnm-B`r;SfTAy5lS%)Vvbr6%#
zK8ZO7uh0N0X_dpabMsM=kUSlubeZ0F!@Am?+o00U!&*jZ`HRmrDwk38r<=vAY0jPR
zQ+}UYEK5&cr#60m-{ALN^cNyM3xg^*ad>84On;^`F|N=HtK)kALuFtNtSp`GMf+oK
z+*kOLR?)1K31l$fmf?&ouO5tad?ZP0M7Fln<v2+Nk%h!r7#f$*)xvEA+JfE<sOmgG
z@i6xcfShg^28Z(Cqstd{rCe?1MpvITogwO}FYFjOQvnP;EXA;tYSt#yx}CIcVlfhh
z%J>)zno$*m0z*|heJDy{d%ags{?iF?&Qn#lyQ`qLTeDGREVT%BS7ySM#%~pAgx1Xq
zo99=bp*gvUv@d=Iv%Hzu|4sXiN`>?zrkM{Zn}u|_*1vS``y>_SeYm7CG?Twyef9*E
zaZLL;`z9+#IvD94wYtVIiRK;b(?g4UHr*(y8}U~<lOTMv00i9xWfsB~Zk0UMlvjxZ
zCNdQ6BT8FaTgF-h?^PTv`ft5xxmhGWEwx1l2`J}g6d>b`c^*E(s6oxX|4RKKMAe?s
zI!6UJzhL0{8G`zr>b)$Ucw`{;vaCU8tFQyZEn2DfLV;`UV=$G~^F2WGV^_v^bpslH
zHDs`qW6q$Aj2H+m#+Q+m$j!npse5|Q%_q_~df`M-%Wyg`9ut@J^foh#v~95EaxF7@
zlXi?|>HZP!-9WQSQ1f7KzJmV3zR_JWU$D#EUO%u@ek48&-Jqv#q$9`o5CgVlSwiZM
zwT)~bY{MdA*jF*PmVC{!iYtH47HJRbYnv2s+PiV+`v%d(_5&?zrTru%I!P6S-%ik<
z_fBCKI0`8!ts+)~1r8pphEXZc0guJaq|_gz?ELc2b;G1`ZnWBMUOfKx7r>`uS7Ds*
z`?AEHz9cAZ*aW)JCI71Pe*K8%Z6eADLUPw^r_-4%4U+?1{0p8Ba=_Fh`zSS5?vy?1
z%y2CIG}!eo;2dZ1$NpzRrk!fWg!mnuM$ZQ(rtM4T&K>Mmp2`~rFRFy9#(w%ls9rqI
zXCymuD;4xyrj;%9`%f1NG_3aU#p@jEmprGh12Qyeh#t}9=~9>P$Ufbw&=*zil%Wsr
zR%BF6($vlGWlvP-%}C;<WhLWM*5efp$iZ7_LXk0O%U?;4gD|u!soWj9;i62zT0dZS
zo>(_1Y-cRK>)x6V5>lpB0fXL3miUZLJ0?}leJ%Z(fsssM<&46W7z8Z2+$B$yYh84o
z<EZ3&l=E&ix~b@R)7>Mkb_c7ny<d_~5fd)b%1zK-9+%2iA$@MzbQfa$7MU}u^z+NF
z*;WvnzPfq;TGbL^@R*Iy8_wRIf;Y?2m3-12sJ4wN(bD!8EdYC3T=IvR^vuRck+Qc5
z*?C*l$xTvD%`H<-x^C4+;>!*S4}Z&Ap0IJ@6d4@!FnwC@YCGg$k-Tb;D_bdEEyG<@
zpTBu?WQ#n2V{0d6hH|h`9ei@F0@8;*^wkQ(4aaLDn1IUdmzM%GV7+dktAZilXJKJ9
zYto^&wtXW`{s<w~B>1!6Ksd)tl6v=l*QXg}L#q{mbQhA5HDu=2NHwE*@)?!Zi!aU6
zO$-80Ao@lRjDOZ-wI1PAKA=xebw5uZnToDfe(@ifwEc?0tyAcuJl%$J3!bVlw~;n3
ziBAz1CYN8oDi(-8$Tx2vD8;GCnWMvB6&>-JXgHxzXkIUF{?JEcQ*(X-gaH3r^}E#;
zKF<|XBeD@QrE(_KuYRcGZ-*j5DRaJ&K-zJ0I+QPdr;o|Kmi#(;K{7F{JKpoUQz8x>
zLD)`Mo(dq9CP<rfiO6zfJ(+Go9U;)mX29GEdqy^Tg}5+kzz~_ovjFk7Im6RAF(v|K
zJ49{(xG_k*FV(w#5W$%i_UwmX;4QmZ!f6=(gh}o(cfL{0Fu<+l<1anF|ET2ioZFf9
zb;8So`*j~cacekh1=wQ&;n%O_lvQ6F5mC^THy)Cv>qRT)LhVZQ4DJ{%BWrSgyS@Se
zHNl;J^==zDB3;W!SW(PjR^aGIet7ifWJy=zj>I$P1kCMa`{*m0U&Xk`B4RS)s#e+Y
z=sr3Tkz*AbH|I(~<0|(dxnT1Yd@C=zC{|rVio!)GCL-H1q*8$u!UmCaon_S46X+}x
zoiiKI&~~%F3?Kk?CvDN43g(7_p$i<LLQLJOmT+TIc6@FpHM{{<;$i1lh0Lk%A~t(E
z*Ck@rlD;o%Ww+~(nJ)6wpPV()1{T3T+WX5d0u`d}P1L0V9k$ENB6$$W`a?k`vOwb5
zil2&uCwi`^ifu|o_3IvD87)Gu@_NFLt9%)w#_y6fUrWZ%a<0dC$E$M3;sZ)A7+qsx
z0aS8MajIP(o3f8<`HRL2S;oF?ej}n$AwA36D$Se`J*&aOwzxxU9>5;vTO!}7Q#;3H
zagy4m%$302_nn%yFO2HvHawhrl}9N-DkH-H3y<>g(I0IEiY%8aGRqrw&oE5{b_2u+
zdyJ5?w1Nqygz0<2ZQVfET0-C^Wuwal|C(H>e+h#bvom}Hng6T-5xpj+6tyJNh!l4A
zjyf@F<ldt<X3zV0lbJ`_p~fD?M%RV@Zx8%sIEXBVTB#?5S191$E6{zJe^#KNEuJJb
zTRvv`^;<c@`Ni}SIu-W1pI=8}wO=L?&`5qw?ByWMeAFEf9vSox#fRtj*#B?y|No={
z(zO2*T%e%=Wd4Kx;cwCwA0C*f&~~e{;8#~>kKy?92SY(+nm2TXMm;*|^nG@9cWxE*
z#s+<Tk98@y{cXSu+qj$an%>9&mRhh?aLGf)r52T|cQ2CeE6?k2F4}lrveY;{cJL1z
z{co3_Dj$Db4f*dM{$E$O9*2hq{m)bB|3ef=wDF5B^?gyTPMP~^odti&eE9BEcGM`#
zs<0%%xou3$kha|YCHE@oQ!#4tk=5}e*O$D(Ve&W!1Gyb6u?ck6U4VVHN6&Py_XGV4
zUb$&8>{Y>?s3mNm0j%Iw4BEylGVWz+Vfo`MEC(0f_Bd6TCpy7D^W(^`|Np=A-}uLW
zf)zB>)c@=G`C;i%KKx11IyaW5`t(68@e-St1%lnP*e-Z^LwM)q;FO^lUq3?%Hzuxh
zpOG)~t|!masJOL8|23S;tn=Nkz#5(M=Rp-f$v?bhyMF=C^@^*jC$5NJOS$s)sYu7H
z6v8VHS2s##J}sp+(nS@@eP8v9S;LaXJLI&y#}@78lOF2C>dmUP9<?QD`sv0Hd+_$g
zdg=yc7naNDhG^;*bG%F<EBT%oqxevlF8|Fx@5q0wpZ{V-VD6%S5;RTxam|wT?5i{@
zsLv;jv}wN#Z+FgJ3RF}YPs$=agzjxBEdh>N<%aBToNUL-)djLA$PAejszpje;t}^q
z2~z<cOUJ!i{<##Z-#$rmO|1e0TV(@E*}kYQtnJcjP5FoBBpW`;Imm#tTSphC3$ky%
zWp0+!gQc;U*?iM^re^rAsH#D0eCELvmN~^bZhd)3Sc(&vkT$3{rqSd*?zTt6p!<^O
z+xYtG;_1;PKcCIjq+3x@PVGdyj3O|XF$KRJ4Y<wfMdT<f>1r<eD7H~&49XNKBO_xr
zfu5IFeLb`F6@?TlTmOad1ile>MFl4y;ewR3E?L;g)B8UOq41L2Tobb$!M*Y>V3gCY
zSKz7@_ovt{J$K5cj3nx2@os$V!oJ%zIFPbS8uLlVYRt``FKr_-T(t|HmU!H_8nW+7
z*a{^@1U2dl5PJLxH_gnfn1A^Bp!V9V!b*ck57BmPZS6l=wn;NE*hSrTa1AQgo!*x$
zf$}?6XdCEv$Z+xSUqJTJRQ;TUu`ZH4`3~dIXf65t%p!j_Z<^dfbaek0f<ZTBDrd&|
zh7%JH-IgO7>tV3^jXCRGw!D}gFVS>GbLN6=^Z9220&r20N%4g=PD@`lnj6gEinP4r
zy{FCkvz2g)!}Dk}r(~Reo|<G%6>&W2A+GqhRrcMGeoI$pPlEOiGiKkH9Q=5I@qe!>
zDgQO+_=$h{k1+M`nLP_0wl4Z-1o%ivOC5!^r_6{SyB6|ccrpO{<n`%!CngRuFZ%Ry
zQ9)F+bCn<CYH4xlCHhe<j<~*WiSICDgJi9YB%Y31-B#8+W~#(-V~CV&47NnSi$Va5
zU<5SC67sFT%I-b3IXb%jQ!4v=ANP8>FFEi|O{`dNm!PY1OneH)R7IeLg(1r^DkCPo
zh|QG$U4wLB*JlG`zkB+bhaSp`CeHPRgISRSKD8qgN{iOm3$xc|?Q?ah;t~(oQvn1k
zL$4g`c`=hv=t^w!@M|w-7sO5Fhu;`5?)|yPp>h1gLSg#nHw;lGQqW?YK^MGwEApyg
zbo-0xjNLx<eP1awkkuNW?TT@ZI#_A)nA8!l!J)A}M)^4?vF<DHG7B1zM*~q5(nP?>
zwog0dZhrxDA;iY9IdA$g4CW=@kdpT3cBhxI=CjfG_(DEMf8CH3NVSaVbhIg2fA*?$
z;Q<5T!gX21eDl*PYpLX9(JgVlvtCJb_}-Ya=35)i=;%4Qp?mee9f%xanVybvyEi?W
zf9Lt84z&Z%9y_Cdts}R)GV(J@!%tXr{nD92>MtPk_JhG+CpuJlI7vKoat>zGFd*|A
zEiYM)=oq1}21i{76Z*w_GV^6Qj;ozqIaqo5xs{Lo&}r4>1qEf-J#T2w^b6eOj1H&V
z@ZdyRx!>oVxxzLSxeo*u<rM65Jnh^_Mv_Q{uciL>nr($LhwxG#>)%#9Nr$CpTVE-g
z=}i@TfO(g{dLc87BTTzge&3P@FcYL&Z_1L7;dNo*@fiVng_^DGoPxaN1D<tXQ)hgK
zgOjw2ENQxE@uT^fqz(BRKE4&SB#b;nqu(sMN@q3`&LZ<FF7Ep|lAo;8#dJleu*sty
zRylV-=k75Z_IuPwRoUQ~7dc3jvd>~@85pyK7CX&0eYhw4eIAm3`t$l+cWNumOq-|6
zW1~7_5binn!g)}?d3t7mYoMYA+2IkNgVMKP)|6@%#|IEavp#vQE_8*Vp{%G+L#H2L
zC=G~zb}h6UTd0uuvL-hpBU4hygQnXRBOPUF<8$j4$Ld*4W9En_6EE*4hmqz?>}{;P
z9fy5LTnAE|+EfBTr6NUSx=vC%s&_Q};E67WTCbcLg|HeUC_`XnbxA5PK&$%~FUhwN
zA_b+q%C|b9Z1(S1n#Cj*{bSv_>yL^>T0=vv@&(MsUHjdJ&U4?|5(Y6h|B~`aO7m`>
zu@6l#7u3}y$<^FpgMKc*Kpr}3u9NiKuZg^Ymf*%`AMd;AF)(QzMcU=Uvp?Ou3yv*r
zE#P+@GmxWPnY-MQ6Mf~N{tN)@>-7I2xlS^DpB}U4)TDi<Ptje}od;ou!f1nBB%+Xo
zq69R~Nog56zBa`*y^_6HSmb5Cjb}wSa8ntW9UgXIHN1p*;Vw6yxduI}rq>P1j6NvH
zaBr&41aB`c2H5a(ZUP9}<ba*G*NR(bL?dBdyoP)?Nk<+0l}rXCI$}EE{dpH&^=DH-
z&Qv<`75Y4O+XXDo3ZTm$`%gGS#Ym3oPF&0TFO&K-W+-!#NwvGwJur@cU@F&W?)plt
z`(CS7Ybt=$T_LJ#&YR5bt%7Y{nM_ZeAwu3X_Kz!Mm_H=AS4*`EL=YFS<`kpc6o<P4
zkzw`O=@<p%Cl}4r?*S^{>74KB006hFI(kATbGw);k9cDn5ixcwS`q{%^Gi6QsCwFL
zY0FK>^4fi?o@z++6F%0Qi<`^;rrY<XU7JO^w1;key}Yq5KzQjHz7-D#E=i;%_0+2x
zTK3KI;DWY)ITBBQI!^Ld!1sy%WFuaWv%V*63YgBACXIW*z8T*<vtxj`aHml*96TF{
zZXKlTlxtZe6V|rG?KwH{yWcGeEbH!6nUq-aEoB!u=)R}4{;n#B0NM>=N9J?U7-K(I
zogS-$raG&$k5rZ>F0C;tf|+CQn{uNQDf8j8GS*L$q99&@%Z{c5Yg%EEos!M{6Dr%h
zrtK`!qED=>1npg7nN(P_WC2lMw`lB0imN4gvXQEo?(en{<v^)MTjm!s{`2EcChmLS
z@;yfyrZJr=CN%5_kw!Mn`w7Q#KQcpOm&hcDta<*~D=Kz+q!sd3C9y=pB)z!Z+YU4w
zy%N*i<L{A&N=C8o4tsUj!aZq=N|^M6LIWo~&A&Kg9GISW&6BmSq2w`^K){@J(kH(?
zOmmL`7qLxE^!V6S;Q0=!>|02c28Cx8<w>f_g72fSy3At-hM6R@6EopaBSlq8EDrX|
zq55f4$1&PA6vM75?nE=&>eG9Rqgqc2T9AX*m}OQEXYq?QQyl!DX6esD{EF@%uG(!d
zA5PwGO0LcX=`vXV)fLVM*E8YNZZ_oUKT=Rh2w4I{Fr=nl*6f3d0euN<8gzTn9Th6)
z1nHF>--g1D43@F>Mcf~&khDSGLBe-WuGqU30jc{>Pu>KSV?PwJe&T}Y{)P-2N_b7P
z@No((!@bh@=CsC2Wk>ejV<+5=BeQAbRrc@bW3pZTc%MCVs_sAOf!j2PWK?@H)TALv
z6C~=cAlDNcT~4``vpieSaGJI`R&T`4>V~F4cdzFl<V+f0%2|5p{T^-`Z%T+JU`96c
zu9D2soSc|?G>^p_6=Y=B5^#+a9k$_5x!gd*pKCZ@=d?uK0jzU;PLY!r;z)}1cw2(2
zcUAVwXYWzTXF)Dn1E!>XYei4%xZipncG7p&xl24hs@U2FVsb{3=+WrK{Nhoa0X>bj
zQDyVN<6HgvTYmuqJy8eac7;+RK?dp%g6hf#N-dqSQ9!-b#+wBQ>%kD;sC<Qb`A~<x
z_~K6Wv+}y*X(|2;8=2ZO7E1H9(#(L-$e$Xf2n{I}f;J0JJfPy*Prr@xNgLBwlQfda
z68g?D@M``vn=%kD9(iHBjRa)qJ5$pJw`UenD@%v-Z_P8^&SAeGknDIh=z|H7s5%ri
zV35>2n?KgCTTi?%zM`-#d-N((UAg=(Kz`p6^DEichWk?OKEBj_%cNtw;`BO6_N!~!
zE)X$_o7wSDUI?vEI1HM+ahVIgEXmNlf6as_8{PrQy_pG{@I_=$2JYtH5*s)`kW%cs
zw#g$sHspFyIxr78^W#f4-OdU%kBipw?u{YPr>6-4TYV%qPnQ}vu*OLuBgU3>t$rov
z_1viN4(h-I4`UTOD)7ow@~!u5=mqS{Bo8jNkcTi(zv#P<`N`(bUqgRtl#ib!=HC7Z
z!#riKGySpBMx}LT%WlixI<;=RYof4Y_S^<T>Es+*lFNHwmnm|vB2>$YPb36ZUe&gb
z`(!2+=cdbOd+@!lniurJ4j8GElsN1g&&cx3?;ejxr?#^%PsOgL#RP3*uZ8Q(c^fp{
zN2fu-x^f!3(e@!5Jdl{TdtuyxL6XEjcl4(729v2esudkkH3-xEF9p0X>^C#rh`SV3
zXFvg8V=vE#<&}hlE(QpbEO+8U^Z7Fp6mq`B?YDlRpKGr%BOj}fDOd{!7{UlOY0%mo
z?e|HEe>?ZgpXCy0!>mGaqha&k^MaK0DKN+EB)bwIQt~P9O!pV2vNgN=i$(4l=yh)a
zOi&OylXMs(c9E+IWPMa;57wOB<f~I)mG{{-j<3`oo3|_E+0M_4PRVw=$7bv2EP4My
zpEX?m1^e;<xRnrHZ822{b4aX;s(;fpHw_g~Y>ToQ3e8E7G_}2`Q%4N>t(8I)rj97(
zr~GY3B<rAOK;6@7`XoK2yAWxCgIw!i;1fnraF}1MU%;Im(dV`oCVLa{>s<Sbd$AQh
zFS)BDGDRwykd*s#$5Ne#kf>eP!%}61sii8hpfBnc*d4`ARPKU>HkY(gNO8UuM)PR-
zWGxxs0U#Ka4jTIl=y{69+%fL!*Ub59A2b80?VN@@`M|K3x}wmY;m9=U3Ic=1hQ%ZX
z9_HCrsJdZGiz7>xUD7Hww|H(a58GNgLn|YurW^3$${J2YfvT{_%jtXv_aOiJd6S+z
zthAso_sdx^twN^`J7>_LJOUKXOjBb}%yE|#)8&tvvHSAXol_f^R#hU<Fx%tz*7KF|
zl35U5=43IcjqEAR8W)WzIFF{uPosoeIrwpGI>KSsrD0&_r702&g*8BWvNqG+>xS=)
zMbwN-!9Uf4d>g*w$WL-0AI!PDymb8+Kz2YynmJObxe!(L5KVGPXcdw;6gNq3kHpvb
z4>8xpw~@~Y_ur$ESc>scnBw4|U`}GF?o)n3-64&PEi031Bt}9VH=QE<E`4@=4SqH$
zTNF5*-M<#K)<W_rl7D7Va61(J3s`4jGCuI+q&!}22DDu%(n1ss@;;l;t`-*f&g`gc
zN5Bb9$yqu&@3Ol_Svo|cWIT`=B})z(itrWtm_+3TDpu~_3VR8Yt}6y=;UeQ`m&g*^
zW(W@)({*?YEC=^eB9WlRhnPDmp@Fc{x&bZ2BT5r6A(6vMOg|X;Wv}nOdX@sP2+g|4
zljk<BHzJEkOB_ya3Zt}wk^;elh6^djW?b|lZo1BITF4^FW|;{jq}eS$aCuYHNAA4S
zf1|Di!c3?w+}i-+GbX2U1LUF_bwAqeBGblpR|Wm$R{b==c)kYAP7~NC%gpb^BZ?pP
zD=CRmxHGv?C^$@nun#m(wE7)@8`}wvEyx%XuE<O)s=<AkH8k!4K_LY#Yz;A=*j*2q
z3oBon??AC8d;%+PE?H>%UwN!3r9|K>*%wZ_{OHFyC>4$t6lE6cF=?aJ{=pG-loucT
z{sM}$G+zklnHsl=4YKFg_C?x#w|@5VbKRFtXKxAwWQdt~f`iI?apXD7le8j52Fvkd
zmvHXz&kr_PLFLkb4IG^U@xaf$CyIqdFXx?+83XqH7#?Wueom;q<7D|f*`Ko!j=O??
z0r6iHJapve&A1EQT?I>Us>YLg=-lYEP9?EU2?()y)47#xBg~{}ry{g?+#vF<6?VD6
z4nmmnV^g$I>iL}AP%_uH-4|9P_-?S~%#PccM)RVMhMDc>3T4yY(9b!5)Eb>AiM;SN
z!1L!i3NQ4r4{58T5z+#_<}ZE*$KD-cQ*@`cTg<gf=gd`B+sb}M!}@|kxE_gsZU}84
z?%q->GTteWA7C*`X!P<M?*Bm)_5eY2*Ddvy2S{Sd#!;K3na|ZXw+g2uBr_r=9o%M0
zrQ{RiRfHPpB$Vq}j#3^<`8|FrM@d9+PYa!>K>RON$w=m3+8BLswCh)T4M;<V?nL}V
z5I9&%n6#U>5cc+KCK~%Sj^dTD%JT9@#S?)Eu|KTh<QZGJ-^_QaWroAT!YfeEuIqqZ
zvCKZ}K01UiT0sJ&j4aysWuy~se~>j^{bdS01cF)(h7dh89v+{fLD%)`W{#V&3i{&o
zRUwGag#K4nG@xu<S$b#ca4eP;`;uc!PfkU_IO0tXjpOY%H21UcT>|-Gx(d2<XLy|e
z-=082+%vi8q?>g1${}Z*?y!DN5OTMf<Z>u$bxoprE8>=ns{4MP>#j>(SLYvUQ!=|j
z=EMD6g6>t)E*mcx?+LBB{U~YjVaoCT!<zSl!ahz>uI#(vSiFNDztf8<KgS#256GhG
z4iknrt0*AJt4MXhic2#M7Dll=iIdiaZy|#m+}8HH;$Xl&A{`JOOa@~fQ-X4r<5VJ`
zNbskI&LB;%ia8+3rW4IdiE2TKhNxkgE)9SP!|ik!b=T9+8ee-3YNa9ZGw^A5&CT<#
z(N~l*Rv*;zPzQ&DA8gjRlsy`!FS2s;kjz8UmaTMy!Xz%dsX*VZq&V1%7E2sMODx8b
zpX!AOfPrfm9<ui;PD)GHUr%(d{xti-H}h&_;=81*{H%{sR|PvsUj?QEk>TNmPj^hd
z*eQ%mbNQk{M9*RtT`N;>%|%(B{Hzrif3qW%`o<a8^{kR6k@}=fW>64_|M~^hyhkqw
zNVevZlTLTE)d`a^5=7wu4}`R^ZqGW}e&7mw^tHZgA=z*<ww9f2PNh$d+hw?%&?%Dx
zs{8HrTtC07eTC=<sWTNfIEZPqZh4B8L*i8bb3sXQ)I|+X>iKt0L8voS>K183LjUC+
zUNd!ntv0=4AD7VPr*t>afQC-{zcW|=T+!jPuXwpS{GR~qzjJ5*QcGLvRQY&$X6dDJ
Z;PY)4VWAiDY&4|lp(I<8ZQuS*{|5?yN#y_l

literal 27462
zcmeI5`*RyPlIMT#zoL#8*K0?crFvJfNAjJ=H?i&wdnRIf)*H7Q8;TNH5;-oiG-OGB
zbj<(0pD$1kkfLn4$8+u87@I7j@JIlOL?V$$q`o=*AB$$*zQ3-#WxKAQesiiHuP!%N
zXOn6(@#b~8+nr4|&GgT^$<uE<?{ry}3k5vSJcwCWZRyRI<#t!KXOo-ua$1<I%3~8Z
z<+?hX+^(v(*Uh#CYO`sp4VmAr7VYwEQQfZQ)zrK^S#4JBs;sBGd0AIy;Q*<t?Eych
z^=k9R+gA12WJOIAZ@H~5&n74Ibaz5`s&;pBu`M@?lgpBdn~lG_y&CFqTYOb*s%_ae
zTc^qEH-a+_{mc)ac-`B&Y^$wWJ@ZpPNjqRJmb!L>k#^PA)zd##*LC^+->M7$y1c5q
z{|n616Px6A(FNi3YuCQ7tKG7yKxWswu47}$?qprAHhvJLag@w1{WUN1ogGDI_=a($
zX(QrBFdwVnnjvc2$`7*gG7A<L_til!?5C^jt9+qRr>5Iazuz`Z`_pu~UQElyx)DJ(
zo2e*eI^8sF_5IDZe)La^vMs+~tr@)&=;yn4>-snSBrtQ}$tlqu(HkbvvTd)wKRJ2(
z_O1Um_M7e1NfZQu3Qjyb2_|6%s;kx2l6j%<<*J5)=QrCeQ{{P68!EKZ@nYp&tftr1
zX1=PY$~W;exqjQcJDUVv;Ds5V$x}LXUA9YaaW;7sMt+edewbt>B$|?m|5HB?axZup
z2E>GZK8w9D@gw5MQbXcNE<)ccV@nnFG@iyE)=Km;2q1hpGvy;7#eM{&*^Agu$I3!a
z_1^mF@^6i$eRbxiQ@$V8)7($8!ZXijL(f7F3{d^+FkVjg>f?>i>oB?}KD;MBu<^-k
z=JPe^3#WdV11*T7TEQ9LV(c*34MRW7Kvm>9hzLACOZn94G;(r#%Bmu@zzAOPVRG{H
zR5bGRpC13_I60OVyLMa7+pmNqp~Y%imD_c>eKT#VckKhIb7m+ec#)Km7j?8r=r)YT
z2oj=Y;l59!*NG_jS?GnfkOo~Ooj}r_U^+=%9Uor7qao4sb{f^O2Yp0kUp}1j|HG?<
z46|`)fISVD`Cfzvo#f>7G<Dn*Lte)+JDDsEGI>&u>Ea}qijutuRg^q{5LTPZ<|`or
zHDx+qZRgO!!<bV6yLwi51%s%8<hQhMhD(tMu4XZdYZ6A&(9@u&b0%Ky#|aA@0~*JE
z958PDhO-11ep2KnM^<E0KT0yDXiyYD$S4^VDoVnP0hkpqxr8~X5TIiXgC7LZ)K4NF
zMW76EIII0UE~c><&K#DP0B07H5jgQ|W}s3er=}!@IRphz3IN1K$UsLn$A?#ixNApq
zhVtMK0|nCb=mEpsuCBk5(XQShBO?E3iTtcVXOSv~K{vxb3_94zyg-;@GR%ThUl3zf
zteHQXrL`ZMIUHzKri@|QaSc{6Kq;AVN?{HA#UZT(T4HRyY#0Fn9!EfxJY)oRH}iS5
z+kGXYFke>lH(wh6Yj8Nrc?5Ck=P93V5JcC^yXYm$Zk~IZml7G`B4Htpqr16TjhQen
zGr)N)iU;tNm@_*97&4<9O%tB`HTnUZ*$>hOOpU+XR69}QS28kvbH4Q8ykes<GqP74
zFa07)=YANAgTm}!V<9VHl!^7FegJN>6mcm5hhfa31@nWED(u4_DJ1@u5Q_k)5Kp8L
z0n3<4ew;HR5&U$PNluJ&xLcOP2#Xm|qQDNX08%LQXGLB!$7n#v{G<U<M>w>Nmn5UT
zVR$D+ZM~)C@ODY2Vc|!4EEWyT*zr$_%+J)DBvV791QtAl0BtaWKIjY0NJCLelxI`e
zZW8jOwOyF_44GGPsaQS%=$d>5l5rI0`dTs<CIvbFu#TYW6gf9Z;<=xRS<xaf^q9q_
z3#y&88&;%sgN6y)CKDk)Wp3DIWyU<wf)WI<GP6`^)k?rj-%|x1K<IDF?Pj&P`by~B
zT?bnB!kepyFaJ;s4A@UIm@-63kcZ{>P!nVN63T+|pr|-z`A*W@&vRJNjFrOAvm5|6
zu#^B8GDt-sA)R%d6&r3s22f7Um&I6_S&HWf4I)o}#`Y-5BHu1B4DHbsMuf%q!}&ru
z9!t^w3KO!zuq!!OVMH+F3d7rjR+!6jRo`sCmfhyNDT<=<idFt$G97F_K}C{e&xTzY
z;{o!t<9w0$#f)Ww3D}J@=tgu)GDeymC6Zj?mJ5iXz-<OgapYr0jod0)!@QVG=IMa-
zda%p8B`F27K`=ChSaSfdSClkSLChxOu(5s8%b}O-MCmP@kfy)XeiYHLhQ|Oy&%NCP
zGG?JKw5ZKWVe;khpuL?oJa)+EnVf7S&3^k3pvh<+kfpZXNI51WOL98&Ng}2onv4%%
zFAJt{wfzdY%VJfclvzBG4ri<iZ~^TXQu@vpMgw~$5Sf`wqL$H_&Cz9obEKg-2#d<B
zkh%eh)I8xA1s|J10h0rn)1*o%!49K?qMr^Bl>rDPiEWLMZT|+=BMIpU>$xl3{yf%m
zar3n-E41W+0}V=85{h)#P>atptjv6NZi!ZkIE|i>tw@<4dYUE1i@j_a2IZXuhAxRC
zvyX@cfOi1&ETV_5O&~kIXvRvFW&)pU`sr!GLn?xu&rGaU9ZCGi<4|IdDQnuPIk~5@
zqhYt<3#jduMJrFArC#4;t&!nzMViw-ZKe^Z*lWUO_3f9`R8;vQj9<@|1o{$Ps^W8C
zKiezih18DX%+1YKJh*ks!?MT&0Jx!8J4<PvzRq97ue0N7wIk3Ds+B$w0N~-UrCz9L
z!96I#NR!uT7|k3|F)TUKM^g4hT#jg!;zvXEkZnfwu3TT&U(4#e=3J%RUNq(Qp?neP
zI+R)>aMva#3@Q(p8ERD{{<yS(FpI<!=Cw)NONny|y=32jSd^f29W-i)yk2-4En~E8
zZsat4oiPKLm{vcNyOiLmCZ+^csU4bu|9&tx&}8Qs^o8!#X^7qC#F11mMxpJbNgc?l
z7^JNCN^>=}hzaU|QkWV0DU<>%#Q;g7vyg)<c2$T75mKcTfk~$2e`uK6{0a5#Uv9ph
z;{?-|ha4n$Xk{f#SoQ{6d<~0Ai!n}G)TOv(QD`R@r`iV@aRyaFrj2KStgVpnj%W`O
zQM<A6l&DTb!sw&bixrWEkO12>2nF$9o6wM5Ad4(nP`MXUniF`feL)J7HJhv=^}|T}
zFLXmxQ34yXw$Gjk24tc(P5AC%Q~XSOkt8jivB?M{NLo+Z*%@zi^)<U!2LtG6qkl*K
zLv88A9>)ukZx%-X&Cn9xj}~_h{FT|DgUhGi@NYPRG@<cea5#_b;)i!9;2cz5Z!6B_
zHr0GM3Dr^Cj<dE)4w-lU)viT1IrrzynnT=ev)eV>)zxZakFd=V^~X*<4}didyV~-4
zwG+G(PE-H)Ww~C}@6Z0&Tr_R-eH;W&_^zHL1o*C=<OKMx{#X9o=K1bbvuVD+lm6{u
zwbP04*=1SR7v=oTa3Fo`SkfM?(82E~7V5;F56zneI(H}DQ*DzG@#^pqTssIoa`I~q
z&d2!O2?smd(m6~vLv^~FZ&%mt)ARH7;(Rx6uB)?6_162>KfZi4Ihj1Eo4HXEKYn5|
zEX&>UtU7!8AHMHb|LJe5#m&5W^ypFb<oxm3(?`|mY5I6teRuBbO!Yb4de%M)9zT8(
zbmgzGHBja=!L5I6n=J>iPujDwcEJ40kAL_*=j#|IJUaI&@k3j!AGO4^OPM5mH}NOm
zRRFbxI*0(-gIVsw{Hfi(|Ecw{A~4X(187f7ogB)cnOR-Fe{}x%@n7lqeEFz){L^1G
z5Ee@#{WBcnOCLHt>Fq3t=_8MaPA{6pdvCGYo=xf&O9h=v>*#u}BV)`cmX(Yz-c}cy
zDcePLC(HGgQ|#?~tR*&!)m3u{p+wIuTa7MN6K8DD>1uO*)AnW})Y>*zOpy_WeLtHl
zw&hz@En5>#wWY4UXx>e{6^GO3=Y48<(>Ba3<f@ij&E;h`3m#m}j>E_K^1f>N)O)L`
z%ZsXZdehuiTP7uk@eSx5+R3tRa#|*Gx>(&hSYKZ^8-uXjo7MF&iw(+7PssZ5Lgi-D
z+-&C6y#+_yTf+GUn;lGYs;$%PE`s5(q3LwDEEmmN&xxs5mu<Be2TEw~8g2`j7MyO%
z+dF{I&sT8Vt)N|PSHLLI_e~8(&L%HRWBPF`d-D{ZF_GcR+2n}j%V1|TwD0`<;-+nz
zjq0cy5v5zlQ4k~D4;?WjdrlAHPFL$IZ?~NvH3r%miyY>O405cs$J^x!>lS0AQPxa>
z4qi8wWjB(CL}StC_Q0AU9ng@0(#<9D_=g#b?Aduli5Df4r=yjJ*~%8T(F=B9ftN1B
zyWwF$?s{=Z$eqQPGZJRpK0+u{o1w?EnGScMRCT?&-mOOaBl@g~b2n$(T(9N>{cF8C
z-CdU(C+87Z%65b-IwTt)CxX(q5}@N`G|3-p%(3a+O|75ZTBm8MfA7_KVa2$=*-jsv
zU{~7w9DC6B&!pLoagN9v8u(@C)|g~GL#&sM&B)pa(%Ro~6ofFIX&*2{e&_55nO?iB
z2$f8Pwi8W7KC9WnD?v(*DgSTTPZ*^PyM-thT+Vy-kQ5&Ztf22#hrv=;{n!N4EGAO)
zoa0UY>q*>?oH!*G3a3AO*f4bOSo)ZRbbp$O+MJ2T>pr#h3)2xE)+cQnF;8@#Fd@sD
z5695vJU>5NwEDqwnsBuGj;&8TTUB4o4D~6@YwgUyzAi;iJZs;%SAZ^B(ZDYF5?Ea>
z-<LOedH@`iZdq$xPXA-Ko+BF1t7TKO&YeyANVAM1jkjgJ`q0S>o?Y89nw!J0B|&sC
zu2A9au3;fY2sxV|aPJ&}#aRb!Bd1-nw$)#5R>-OSB)Bt&hCZ<&(Ym$A^=+cV)&4Pq
zK63^+Eb_}Rok-_Vtd&JAT_s;wNMiQ>(M!}g9BN0Xx-du3<0S`+CKHD%VK^S2bi?S(
zEwh;Hi%mUcG}kgr*c~PDo~+z%b~2EiLfeWx$I&^oM{k{!^_+bw;<h`Nx~R6iK$NdA
z6`hXO)dwrkmq9QGC>)u?$x)ijV2t9En9FssVyHIR#F0o2lhek|Ony$rdjNpb$7EqN
z6z>@7jPvQlqOC{h=?R0;zl^z#G2rv_-Obe%<|9X?osJ#!S=a;sV-N5tN{8**M%}|=
zo12ym)4&Jr$;BqIsq44f@*1&u7_VS#md$Xvde_%~&yq5QYHsR}_NU7+0{S>8_R84%
z4;>KUc-{}zXn*yAU9V^HG+-KVE~1qEv~f3C9bRoa&d=+W@Nd^3v^zWYQbuuroO{x6
zML_`cZx5>TG6cK8@ZQa=4!{BCd!c>T+-#lQ7uEDzm|hihwQ83)7e)c(Y^qO&?SnDs
zA9g$c+<0xnWrXWyw?aw5@-_;*U0uh7OQ#tJ4Yet()$XjfSTQ|&=+ynWR_C_+$MwWC
zVBCvv{`DN5X^+9W0Qz8=-m+~$zbgSOhVLf1)Q1?6$)Gbu^=mY~=!PlE3#wm?zp!vJ
zzp!YT>;$qx;#X(*CL$x7H1)C$iWG@jSnm{*H3^YKgaDVmSm(cjiOE4mQUE8BMVmR*
zjbh6990y?fqj)Su3!TklqmyCijzo!0S-?oZ%9Db!k(b4`jtXTIicypaV9SOqYcit9
zMl?lR9nogBLdWnlBF+SW?wG@9E1zPkM4GV}RZ9dh)NwOMkqEP%dD06>vXrO8n3gDk
z3SvPs$W_OMWc^~bL?ZuXriFSv(3v&&0Z_|xIBG0!kfcq14*4`drc3l1TN<6dV0xpz
zNF~n+5UP7jzZgS?!nQ(4F*w9VR{`n;41J)hG6KQQD#p~4qzFR>J*_sK2I+tRvm0~D
zT1?G<t*zhKa}&{1&pYm!2F}c~lVE=nI6^o*>GtamOv4d|^}$RZ^C12}@c|P)ShoLU
zVvY#xCac<V&|;%1v28rParvJLbZpNiJ0bvJh7S<ymBT)4IW!dOfg2CZ%3gMInN45I
z#?>ra*nJ%@nssG$k-|~0-Z-gOYF3{>(Ec0RWn#w11DZ*eo~zo1R_lvuv0(8y`DBy(
z#`eFz>x=dcblR?<ZN1IiwY~f4A@Dl3zDxTgrtfs&W>;;eS2wFg_4$4HSIT<-tg@fk
ziPMuCvmy>l-5~0yQ&x#wZ7o%qo*mF6Ye>r_v3;HJpgtQ@59qQ(Qir$U=TsNY90Jwr
zD6%*kE0fT}1e5!+m{Jc_jAQ1Zwt)(Jg4FNOG8=gSc?X6sN~5^!g41cI{XW{xF4LH1
zC2^3vy0S8f=QiwPbs}1hGTq%mcaYFk-S5edm7upG7aj-9N}CV%pZpBn8{T`hue&k$
zz0;qbq1Dx`E>H%BwcOe20{Z;C(+eI`YmU?V!Fpc<@>iqu-Ct$Q^-YGt*##OxJRm>Q
zMMts|y$qQ8hj?i&HU2}q{D*k?s}?T@fmQrqPtG&b-|lXEE_<+zH%wz##CUT*fZG%Q
z47i6%e_h>2guhRmtr(e5oUK2F=p~eU1jJ8t!6fhJ8Tu=B*d+~tqwhz6d?MdHnCn(W
z7FmR7?slX3;YhS!YY$-#jRGBq$KD3N_N-e#yU=~-(8?W69NA&0%Bf>7cx)vxb}M$B
zKR>@Ks|lU7^dW-wKpo!Dd0X1FfyRA;NJbg_7^PTH1HCwq`=EXu6zKK1$YHuXoW^%(
zv?c`R<=mW!?CZ?>;S^N+@31$f`}_Kok>2}4`gPX#G1_VPF!ychkaAZ(8%4@a&<b5$
zZCx!cu*Wz*-|LhS*{XM(1}-YBChWO|+q>g9+ZWR;&fHRWNB4pFa7VpI#}N#DE)(oQ
z4cI$Crd~gm)}@nW_=ud`owM!WM>ft?zo|UIWM(%-@!qmI-R<q{3=k2dGiPj#hxB!D
zT>ZS+T&}KeWFh1||HB`Dq|iuipJ1;R7k5wqi2h%{CA9B@azn2sSHB~+6?Fy2A40>Y
zD={WonBDsT(FqJm_mufr>88*=z@u=+eD{p@FUjuXUxeM?qm&p(_h<5ZM_+M_b}gCw
zoftld92tK6-;v=Do%&)7KXzD@2Ir7LJ&@t=4Z9s=4ga2Dg8~1UVs8&>nAJ97?{)8`
zV@!QH{gUo}XxHa)d}pTAO?gKZIm{RbleWG?Tplp{mll}!470@5ZME=zVA<JG^b4~5
zuKzAN_OU^Ae;Y!q>FyU=|9-r=mK!i|@v^fCmXYwU3@6B8nAaX^tbaC{{H_N#5(;91
zoTUA_NCi97FNc&LEwB+KzrwkL1rjo1HHE)?8R_v47N7?+{<5U;*@B}juqR=<Ea+e}
z)F&N{J`!noR$plE<qr2h*c>0z^6zv@WPz3Yxbbq%Bks}jV~g%eZGB^oSNC^P@Za4!
z4yt^oPp&rqPvu2jdCg_-WzkyPg2C?d|6#zK;c|j3LHh?i(pkXajt)tmw!c#aM7iZU
z4;>uMD7vi9k&%vRWGr=9*x=xF89q%oC7*h~Z(0odZ#D}K%0ezB^#g2wkzZ|9`%vzX
zs#ENn{tnD7hfLhZ)Fl-z&)lr(rS6~nuI4Vrrv2^v9~O`L`!K$4!uY!y515a_$B%zw
zi+%g8{q<#9`R%ulx;JC8$mZWk^ts@)ZFm2MRmvkY1|0KplzKjS{1^elxoV<59W=db
z_^I&|*>`g24Hlo=O>}P+*a(AElZywRgE6(4rp70z6k&TKaerQC@SJn<JJ7jR^q%Wm
zOFOit6$dbf9OvgtGS)+8A>jG8;@IxC@}4(qTnOyEXDt_XICR1)51Zn^`vHR7Zp_hM
zS$mJ3Klgt7|CI39R@K`gsONjgG|2|V`xL%w<{0emq0Au7eRN?rYEatsheD2_k-w1@
zpd|oQL>KsKeS^7_4(m1zm#)lVZf68|r`R}YY|CVJ;3zQu?zTg9bAhSt-W>*6rQoX)
z434HlxPa??fnkATe1Ub2!eqTo;)gZ1H@I4uMY{Eg`<E!tWzRTHF=HW6V7tOIO}WB}
z$0_r$CMVnrO}OLzg7U9rx+5QcGNDOIY+LXPWS`bdT&6cNs*#~mZoXZ7ic>87(ZxXL
zqB7w1wD2)~Nd;d(xde|%k*=NMn<>CFrpUP-jXxU9g>Y^Z<I4^s9PYJec~Z&&ML{Kc
z8c^nPF{U)>5(A!@npt2d#a(`)Fnr|EWs9+s5*&3Ym17kg7QMol$xCl-X?$ssKfxHO
z`xy!v3o1DnFg_3QT_axs`|!CMjX$IwJ{hPX4L_{qsLKnu2ut&_6o)&Z{O#p<(Tnki
zN1w2#qGhIsrUHw3uD%Kd*aOKe11`l9Io*+YQ;?Aj6MaH>-jVT@VvWD4VK{Ny`NF{Y
zu%<p4I3--}#V3?;gr%ptW-beKBc$;x%&1zl%6TBe4IDF4rN&OT*7Y=KG1gBV<PWc8
z3x>IGiMeT}w%MPql5PNTAPI8i5D*uu)MtGX$y|(!3PPNKd{|>?g|!=It9fMTX1`V-
z=MX0C!gs+e6g3)Rw63V=Q6_sUON&_{LeX>Vt;o@idKfY6ni$rOjD^lj3}X;xW`^bm
z7FnTuYvCD~QIb&{eZVr#41*yE23YTmF1~K&7@A?UCPzytMs>Q7q26h5ZSO6DEPnb{
zfgW{V&a(=9DKZNTk!v>1umLvMHbib{1hNb9su<#FET=J%@Xk$aT?#i~x>*2rWCKTI
z!?|2@5$L=&6$6U!X;H-K-JzukL9RK+5KvhD#R>}Zi{nSWun8d%6J_uNN8p_c{r1_l
zcEHhdZ21hC7VsB2ex<c)GjIsOS6(NyI8KcxiP4%(Yp}7aG7HAB$3s>FEG}-={sD7m
zck!@A;b>{s?CKX;8o1`$PV4Hj{gm6-gN0&r5R2X6G+H_yV~N0p?C3ETKX6_I`$}*j
zrFEqnQ(Q1HY2%lSI&($r#c^3>T!%oCv-mZ07#hZz%iu!9Ouwb$DV7L%K7jMfanPPi
z3)s?U<lHYRm=7c*SBe<P>mfwN)!&uOt^Ij@b8+&29P0W6VR>to^iJT|1M-X0)VkcY
z%|agFKPhp*S9G@kCWId-nHS5wL;}0cW!;<LAy5k*yd#sJ3XF?c!|^tRO|BRyt}40!
z1RxeqM*IoNvK3c<PWU-BXq|{yv*9xVE-eChhn#7`>G5R~q|fCC(8N+>VzXk8jCbu!
zm*M0&D8h9SOeVC|$Yl-H;GYDyrr0P)h(rOtaAi0g;9d;&j|)ia>jfu8)Jb3Pbq06U
z9k|@|QXb1v<xMc}jmbil^i+Np!2zBM6Rd{et1_*YfiFUa)+DjwKBxw0<&VQ$?(t(4
zm+M8X8Zl0H1)D5*y_PdROoKB}6=4i5qvQe?9T}JC?9)sv@@i?q$>olsTBvXgCl??%
z@r{Z(14a$_g=Nez1*dM64#77MW}p@+`Fu=rEvX?6Xju7;KaxV;L}DY{A%ajnZUlKM
z%t~e5G~#0%F-K!BZ|JPN2{(o0eGoAVCzr1gG-sGDQx-QPAV-$h<gxj?Fh=C^2x;KU
z$(RiREShDvG$xma^l+8~ERBpy5L!n6Y;i`C)(4nRCvq~A(o-PGay>V)04&J_VOtV=
zsGc#8BaITv7$}Z$N<)nCeWk&Iut6MM$M2;;LD|Y>^3w-`q6I;UF)NQ)qM>mJhU%~&
zhn?ZMEW`ID$i(zrA=5m15(V+T(e{^F=Hx8@J@;j9E-G`|;xl;Cz>p4L{NgOB>(2u?
z5<DRlvS+i|62e8mEFut&cWNBgFaS&rW-p}8uBQ~k&~VsAO~n#S`8912sdQJOKty4x
z8W%c>$JH*mNrMgn8*z^Sft^>$vGVdH`V?)jWeFe)+L7kB1dZQql{A+v3^nL31AeYm
zzwxqd0BNejHUyCn6prL2pFBVcp|;2fPx2b1K8cO!Y3IGuc&0*#e@AvoK*;0~D3O~*
zZ7q;Ifh1-ZBhYJ_rN-koJV^4dI6`Xu@~b7xpui?mc_{z2)HjC2o$wjUbQ6ycbps>h
zj)%53YHW62D=C(QGujZUG+vMi6?A=OoQ~r<msX|4OKmxH`NQyNF{mJUhW9rZmL8_p
zJegDlH<v@n?#BB#%#lF?8XgDbZrrRn@~o?B#Qe0Wq?ZINP>rRUYf9O{)IAtH8PY&q
zoemJF4TFII?)FK9VfeAg#`sS+zF1Sa*)<nZI&A1K6c_}Gs1%?=byzNcd)z+5=W5Ot
zrTejc<dzBB+h_^(AZWwcxcw6aGgwFw7I@zZQ!QyMHF%2d((Yp{chxULOc&5)JZZ4@
zG+RTaJF2DbX7#blSk9&EBRU?N>L{=VotIl#uR3xCv`$;tooA!Q#4H5PWjv&m*Vm{7
zjV9{k&pTw8r$0?DN-ipY@5!5vH@QTWg?HE;5&tpa+^Mrj$1%;^UNR+*6RqnXDs~Ws
z_HZ?S^Zp=ZwLz(fV)h_Ee`w+jCte59<}xYm*w5T43yJL1v#{`m`1rM{tdHbRSD<_I
ztHsLH)<?HZQ{&>^0JYZx{r=kJe*)4*8B}zC_U6y!ZE4k=yOX!g_D!|jopc4Wd6lPG
zJ~w}yhQDG&dAt{GcC|sS@b}YXj&BO%JWmZ=VFL)3pPP_w?+vLcJG|Jw*o^P*Wa}!o
zml}QC^Ho0HRRlNm^yZ;)Y?rH@CvPKyd#pga<Yop%{^vuAY&!VAZ#>-bbJ~n~QLlV<
z5!p3qY|UZjyRB)Qh8*TT0@}H(uh#zwf);*^qPn{s+L6(Ir<ts4MrpTGDg|G4L4%zd
zy7qC$vx8og_+zCQua7zETFLBynCX@>I?pdhDCUAQfVyHwf0@S@N!(M4y!Y*?(EA<L
P>4|(U>6V)Lt6BdA@0nm`


From a4ca9893ee422acc8b25442d71066b2df3251a86 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 9 Dec 2024 16:09:09 -0500
Subject: [PATCH 40/57] Allow unlisted collections in single public collection
 GET

---
 backend/btrixcloud/colls.py      | 10 +++++--
 backend/test/test_collections.py | 50 ++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 7e60544309..40e4180658 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -258,12 +258,16 @@ async def get_collection_out(
         return CollOut.from_dict(result)
 
     async def get_public_collection_out(
-        self, coll_id: UUID, org: Organization
+        self, coll_id: UUID, org: Organization, allow_unlisted: bool = False
     ) -> PublicCollOut:
         """Get PublicCollOut by id"""
         result = await self.get_collection_raw(coll_id)
 
-        if result.get("access") != "public":
+        allowed_access = [CollAccessType.PUBLIC]
+        if allow_unlisted:
+            allowed_access.append(CollAccessType.UNLISTED)
+
+        if result.get("access") not in allowed_access:
             raise HTTPException(status_code=404, detail="collection_not_found")
 
         result["resources"] = await self.get_collection_crawl_resources(coll_id)
@@ -1004,7 +1008,7 @@ async def get_public_collection(
         if not org.enablePublicProfile:
             raise HTTPException(status_code=404, detail="collection_not_found")
 
-        return await colls.get_public_collection_out(coll_id, org)
+        return await colls.get_public_collection_out(coll_id, org, allow_unlisted=True)
 
     @app.get(
         "/orgs/{oid}/collections/{coll_id}/urls",
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 854c2c430b..2ca5c9159a 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -1128,6 +1128,56 @@ def test_get_public_collection(default_org_id):
     assert r.json()["detail"] == "collection_not_found"
 
 
+def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id):
+    # Make second public coll unlisted
+    r = requests.patch(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_public_coll_id}",
+        headers=crawler_auth_headers,
+        json={
+            "access": "unlisted",
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    # Verify single public collection GET endpoint works for unlisted collection
+    r = requests.get(
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_second_public_coll_id}"
+    )
+    assert r.status_code == 200
+    coll = r.json()
+
+    assert coll["id"] == _public_coll_id
+    assert coll["oid"] == default_org_id
+    assert coll["name"]
+    assert coll["resources"]
+    assert coll["dateEarliest"]
+    assert coll["dateLatest"]
+    assert coll["crawlCount"] > 0
+    assert coll["pageCount"] > 0
+    assert coll["totalSize"] > 0
+
+    for field in NON_PUBLIC_COLL_FIELDS:
+        assert field not in coll
+
+    assert coll["caption"] == CAPTION
+
+    assert coll["homeUrl"]
+    assert coll["homeUrlTs"]
+
+    thumbnail = coll["thumbnail"]
+    assert thumbnail
+
+    assert thumbnail["name"]
+    assert thumbnail["path"]
+    assert thumbnail["hash"]
+    assert thumbnail["size"]
+    assert thumbnail["mime"]
+
+    for field in NON_PUBLIC_IMAGE_FIELDS:
+        assert field not in thumbnail
+
+
 def test_delete_thumbnail(crawler_auth_headers, default_org_id):
     r = requests.delete(
         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/thumbnail",

From af0d798693bf19fe4b8f86ec41f6b5b5fa1118dc Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 9 Dec 2024 16:13:20 -0500
Subject: [PATCH 41/57] Add access field to PublicCollOut

---
 backend/btrixcloud/models.py     |  4 +++-
 backend/test/test_collections.py | 12 +++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 430ca14a22..118900b969 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1292,7 +1292,7 @@ class CollOut(BaseMongoModel):
     # Sorted by count, descending
     tags: Optional[List[str]] = []
 
-    access: CollAccessType = CollAccessType.PRIVATE
+    access: CollAccessType
 
     homeUrl: Optional[AnyHttpUrl] = None
     homeUrlTs: Optional[datetime] = None
@@ -1318,6 +1318,8 @@ class PublicCollOut(BaseMongoModel):
     dateEarliest: Optional[datetime] = None
     dateLatest: Optional[datetime] = None
 
+    access: CollAccessType
+
     homeUrl: Optional[AnyHttpUrl] = None
     homeUrlTs: Optional[datetime] = None
 
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 2ca5c9159a..9350d4c6b4 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -19,7 +19,6 @@
 NON_PUBLIC_COLL_FIELDS = (
     "modified",
     "tags",
-    "access",
     "homeUrlPageId",
 )
 NON_PUBLIC_IMAGE_FIELDS = ("originalFilename", "userid", "userName", "created")
@@ -874,6 +873,7 @@ def test_list_public_collections(
     for collection in collections:
         assert collection["id"] in (_public_coll_id, _second_public_coll_id)
         assert collection["oid"]
+        assert collection["access"] == "public"
         assert collection["name"]
         assert collection["dateEarliest"]
         assert collection["dateLatest"]
@@ -1034,6 +1034,7 @@ def test_list_public_colls_home_url_thumbnail():
     for coll in collections:
         assert coll["id"] in (_public_coll_id, _second_public_coll_id)
         assert coll["oid"]
+        assert coll["access"] == "public"
         assert coll["name"]
         assert coll["resources"]
         assert coll["dateEarliest"]
@@ -1076,6 +1077,7 @@ def test_get_public_collection(default_org_id):
 
     assert coll["id"] == _public_coll_id
     assert coll["oid"] == default_org_id
+    assert coll["access"] == "public"
     assert coll["name"]
     assert coll["resources"]
     assert coll["dateEarliest"]
@@ -1147,8 +1149,9 @@ def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id):
     assert r.status_code == 200
     coll = r.json()
 
-    assert coll["id"] == _public_coll_id
+    assert coll["id"] == _second_public_coll_id
     assert coll["oid"] == default_org_id
+    assert coll["access"] == "unlisted"
     assert coll["name"]
     assert coll["resources"]
     assert coll["dateEarliest"]
@@ -1160,11 +1163,6 @@ def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id):
     for field in NON_PUBLIC_COLL_FIELDS:
         assert field not in coll
 
-    assert coll["caption"] == CAPTION
-
-    assert coll["homeUrl"]
-    assert coll["homeUrlTs"]
-
     thumbnail = coll["thumbnail"]
     assert thumbnail
 

From 72afd60a8f837fd62eced201479d0b2a21a52fd6 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Mon, 9 Dec 2024 17:14:30 -0500
Subject: [PATCH 42/57] Don't expect thumbnail for unlisted collection

---
 backend/test/test_collections.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 9350d4c6b4..13ddb62dac 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -1163,18 +1163,6 @@ def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id):
     for field in NON_PUBLIC_COLL_FIELDS:
         assert field not in coll
 
-    thumbnail = coll["thumbnail"]
-    assert thumbnail
-
-    assert thumbnail["name"]
-    assert thumbnail["path"]
-    assert thumbnail["hash"]
-    assert thumbnail["size"]
-    assert thumbnail["mime"]
-
-    for field in NON_PUBLIC_IMAGE_FIELDS:
-        assert field not in thumbnail
-
 
 def test_delete_thumbnail(crawler_auth_headers, default_org_id):
     r = requests.delete(

From 902e49e0ab4da0bd7a4d34de291b7543cc79190c Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 10 Dec 2024 10:28:16 -0500
Subject: [PATCH 43/57] Fix thumbnail-related 500 error with private
 collections list

---
 backend/btrixcloud/colls.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 40e4180658..629d94f14e 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -294,7 +294,7 @@ async def list_collections(
         access: Optional[str] = None,
     ):
         """List all collections for org"""
-        # pylint: disable=too-many-locals, duplicate-code
+        # pylint: disable=too-many-locals, duplicate-code, too-many-branches
         # Zero-index page for query
         page = page - 1
         skip = page * page_size
@@ -351,16 +351,22 @@ async def list_collections(
         collections: List[Union[CollOut, PublicCollOut]] = []
 
         for res in items:
-            if public_colls_out:
-                res["resources"] = await self.get_collection_crawl_resources(res["_id"])
+            res["resources"] = await self.get_collection_crawl_resources(res["_id"])
+
+            thumbnail = res.get("thumbnail")
+            if thumbnail:
+                image_file = ImageFile(**thumbnail)
 
-                thumbnail = res.get("thumbnail")
-                if thumbnail:
-                    image_file = ImageFile(**thumbnail)
+                if public_colls_out:
                     res["thumbnail"] = await image_file.get_public_image_file_out(
                         org, self.storage_ops
                     )
+                else:
+                    res["thumbnail"] = await image_file.get_image_file_out(
+                        org, self.storage_ops
+                    )
 
+            if public_colls_out:
                 collections.append(PublicCollOut.from_dict(res))
             else:
                 collections.append(CollOut.from_dict(res))

From 57825a2de39dc8d98b37d6d058fa64671fe893e9 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 10 Dec 2024 16:21:26 -0500
Subject: [PATCH 44/57] Add defaultThumbnailName field to collections

This allows the frontend to specify which default thumbnail to use
without needing to re-upload the same file to the backend and
stream if from s3 each time it's used.

The field can be set on collection creation and is is updatable
via the collection PATCH endpoint.
---
 backend/btrixcloud/colls.py      |  1 +
 backend/btrixcloud/models.py     |  6 ++++++
 backend/test/test_collections.py | 34 ++++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 629d94f14e..c41d090b6f 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -118,6 +118,7 @@ async def add_collection(self, oid: UUID, coll_in: CollIn):
             caption=coll_in.caption,
             modified=modified,
             access=coll_in.access,
+            defaultThumbnailName=coll_in.defaultThumbnailName,
         )
         try:
             await self.collections.insert_one(coll.to_dict())
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 118900b969..97fb4816f8 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1258,6 +1258,7 @@ class Collection(BaseMongoModel):
     homeUrlPageId: Optional[UUID] = None
 
     thumbnail: Optional[ImageFile] = None
+    defaultThumbnailName: Optional[str] = None
 
 
 # ============================================================================
@@ -1271,6 +1272,8 @@ class CollIn(BaseModel):
 
     access: CollAccessType = CollAccessType.PRIVATE
 
+    defaultThumbnailName: Optional[str] = None
+
 
 # ============================================================================
 class CollOut(BaseMongoModel):
@@ -1300,6 +1303,7 @@ class CollOut(BaseMongoModel):
 
     resources: List[CrawlFileOut] = []
     thumbnail: Optional[ImageFileOut] = None
+    defaultThumbnailName: Optional[str] = None
 
 
 # ============================================================================
@@ -1325,6 +1329,7 @@ class PublicCollOut(BaseMongoModel):
 
     resources: List[CrawlFileOut] = []
     thumbnail: Optional[PublicImageFileOut] = None
+    defaultThumbnailName: Optional[str] = None
 
 
 # ============================================================================
@@ -1335,6 +1340,7 @@ class UpdateColl(BaseModel):
     description: Optional[str] = None
     caption: Optional[str] = None
     access: Optional[CollAccessType] = None
+    defaultThumbnailName: Optional[str] = None
 
 
 # ============================================================================
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 13ddb62dac..630b95690e 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -38,6 +38,8 @@
 def test_create_collection(
     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 ):
+    default_thumbnail_name = "default-thumbnail.jpg"
+
     r = requests.post(
         f"{API_PREFIX}/orgs/{default_org_id}/collections",
         headers=crawler_auth_headers,
@@ -45,6 +47,7 @@ def test_create_collection(
             "crawlIds": [crawler_crawl_id],
             "name": COLLECTION_NAME,
             "caption": CAPTION,
+            "defaultThumbnailName": default_thumbnail_name,
         },
     )
     assert r.status_code == 200
@@ -83,6 +86,8 @@ def test_create_collection(
     assert data["dateEarliest"]
     assert data["dateLatest"]
 
+    assert data["defaultThumbnailName"] == default_thumbnail_name
+
 
 def test_create_public_collection(
     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
@@ -176,6 +181,7 @@ def test_update_collection(
     assert modified.endswith("Z")
     assert data["dateEarliest"]
     assert data["dateLatest"]
+    assert data["defaultThumbnailName"]
 
 
 def test_rename_collection(
@@ -327,6 +333,7 @@ def test_get_collection(crawler_auth_headers, default_org_id):
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
     assert data["dateEarliest"]
     assert data["dateLatest"]
+    assert data["defaultThumbnailName"]
 
 
 def test_get_collection_replay(crawler_auth_headers, default_org_id):
@@ -348,6 +355,7 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id):
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
     assert data["dateEarliest"]
     assert data["dateLatest"]
+    assert data["defaultThumbnailName"]
 
     resources = data["resources"]
     assert resources
@@ -466,6 +474,7 @@ def test_add_upload_to_collection(crawler_auth_headers, default_org_id):
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
     assert data["dateEarliest"]
     assert data["dateLatest"]
+    assert data["defaultThumbnailName"]
 
     # Verify it was added
     r = requests.get(
@@ -525,6 +534,7 @@ def test_list_collections(
     assert first_coll["access"] == "private"
     assert first_coll["dateEarliest"]
     assert first_coll["dateLatest"]
+    assert first_coll["defaultThumbnailName"]
 
     second_coll = [coll for coll in items if coll["name"] == SECOND_COLLECTION_NAME][0]
     assert second_coll["id"]
@@ -1011,6 +1021,28 @@ def test_upload_collection_thumbnail(crawler_auth_headers, default_org_id):
     assert thumbnail["created"]
 
 
+def test_set_collection_default_thumbnail(crawler_auth_headers, default_org_id):
+    default_thumbnail_name = "orange-default.avif"
+
+    r = requests.patch(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_public_coll_id}",
+        headers=crawler_auth_headers,
+        json={"defaultThumbnailName": default_thumbnail_name},
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+
+    assert data["id"] == _second_public_coll_id
+    assert data["defaultThumbnailName"] == default_thumbnail_name
+
+
 def test_list_public_colls_home_url_thumbnail():
     # Check we get expected data for each public collection
     # and nothing we don't expect
@@ -1066,6 +1098,7 @@ def test_list_public_colls_home_url_thumbnail():
 
         if coll["id"] == _second_public_coll_id:
             assert coll["description"]
+            assert coll["defaultThumbnailName"] == "orange-default.avif"
 
 
 def test_get_public_collection(default_org_id):
@@ -1159,6 +1192,7 @@ def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id):
     assert coll["crawlCount"] > 0
     assert coll["pageCount"] > 0
     assert coll["totalSize"] > 0
+    assert coll["defaultThumbnailName"] == "orange-default.avif"
 
     for field in NON_PUBLIC_COLL_FIELDS:
         assert field not in coll

From 35474a47bece5bdac7631d65365af06c32c453b7 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 11 Dec 2024 10:43:00 -0500
Subject: [PATCH 45/57] Add fallback values for access in coll out models

---
 backend/btrixcloud/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 97fb4816f8..d189ee1d71 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1295,7 +1295,7 @@ class CollOut(BaseMongoModel):
     # Sorted by count, descending
     tags: Optional[List[str]] = []
 
-    access: CollAccessType
+    access: CollAccessType = CollAccessType.PRIVATE
 
     homeUrl: Optional[AnyHttpUrl] = None
     homeUrlTs: Optional[datetime] = None
@@ -1322,7 +1322,7 @@ class PublicCollOut(BaseMongoModel):
     dateEarliest: Optional[datetime] = None
     dateLatest: Optional[datetime] = None
 
-    access: CollAccessType
+    access: CollAccessType = CollAccessType.PUBLIC
 
     homeUrl: Optional[AnyHttpUrl] = None
     homeUrlTs: Optional[datetime] = None

From 7e8583aa2043a1c6563519cf2e43c957f24c68c2 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 11 Dec 2024 14:36:15 -0500
Subject: [PATCH 46/57] Add ability to unset home url via /home-url endpoint

---
 backend/btrixcloud/colls.py      | 20 +++++++++++++-------
 backend/btrixcloud/models.py     |  2 +-
 backend/test/test_collections.py | 24 ++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index c41d090b6f..0e1cedd990 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -690,13 +690,19 @@ async def set_home_url(
         self, coll_id: UUID, update: UpdateCollHomeUrl, org: Organization
     ) -> Dict[str, bool]:
         """Set home URL for collection and save thumbnail to database"""
-        page = await self.page_ops.get_page(update.pageId, org.id)
-
-        update_query = {
-            "homeUrl": page.url,
-            "homeUrlTs": page.ts,
-            "homeUrlPageId": page.id,
-        }
+        if update.pageId:
+            page = await self.page_ops.get_page(update.pageId, org.id)
+            update_query = {
+                "homeUrl": page.url,
+                "homeUrlTs": page.ts,
+                "homeUrlPageId": page.id,
+            }
+        else:
+            update_query = {
+                "homeUrl": None,
+                "homeUrlTs": None,
+                "homeUrlPageId": None,
+            }
 
         await self.collections.find_one_and_update(
             {"_id": coll_id, "oid": org.id},
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index d189ee1d71..1e37592366 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1347,7 +1347,7 @@ class UpdateColl(BaseModel):
 class UpdateCollHomeUrl(BaseModel):
     """Update home url for collection"""
 
-    pageId: UUID
+    pageId: Optional[UUID] = None
 
 
 # ============================================================================
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 630b95690e..6d503c0f9d 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -1221,6 +1221,30 @@ def test_delete_thumbnail(crawler_auth_headers, default_org_id):
     assert r.json()["detail"] == "thumbnail_not_found"
 
 
+def test_unset_collection_home_url(
+    crawler_auth_headers, default_org_id, crawler_crawl_id
+):
+    # Unset home url
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/home-url",
+        headers=crawler_auth_headers,
+        json={"pageId": None},
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    # Check that fields were set in collection as expected
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data.get("homeUrl") is None
+    assert data.get("homeUrlTs") is None
+    assert data.get("homeUrlPageId") is None
+
+
 def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
     # Delete second collection
     r = requests.delete(

From 4b9a60df08af4d61099f9782332d7109f761809c Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 11 Dec 2024 17:04:21 -0500
Subject: [PATCH 47/57] Add public collection download endpoint

---
 backend/btrixcloud/colls.py      | 24 ++++++++++++++++++++++++
 backend/test/test_collections.py | 20 ++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 0e1cedd990..580b37098a 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -1023,6 +1023,30 @@ async def get_public_collection(
 
         return await colls.get_public_collection_out(coll_id, org, allow_unlisted=True)
 
+    @app.get(
+        "/public/orgs/{org_slug}/collections/{coll_id}/download",
+        tags=["collections", "public"],
+        response_model=bytes,
+    )
+    async def download_public_collection(
+        org_slug: str,
+        coll_id: UUID,
+    ):
+        try:
+            org = await colls.orgs.get_org_by_slug(org_slug)
+        # pylint: disable=broad-exception-caught
+        except Exception:
+            # pylint: disable=raise-missing-from
+            raise HTTPException(status_code=404, detail="collection_not_found")
+
+        if not org.enablePublicProfile:
+            raise HTTPException(status_code=404, detail="collection_not_found")
+
+        # Make sure collection exists and is public/unlisted
+        coll = await colls.get_collection(coll_id, public_or_unlisted_only=True)
+
+        return await colls.download_collection(coll.id, org)
+
     @app.get(
         "/orgs/{oid}/collections/{coll_id}/urls",
         tags=["collections"],
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 6d503c0f9d..2e44580093 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -1245,6 +1245,26 @@ def test_unset_collection_home_url(
     assert data.get("homeUrlPageId") is None
 
 
+def test_download_streaming_public_collection():
+    with TemporaryFile() as fh:
+        with requests.get(
+            f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_public_coll_id}/download",
+            stream=True,
+        ) as r:
+            assert r.status_code == 200
+            for chunk in r.iter_content():
+                fh.write(chunk)
+
+        fh.seek(0)
+        with ZipFile(fh, "r") as zip_file:
+            contents = zip_file.namelist()
+
+            assert len(contents) == 2
+            for filename in contents:
+                assert filename.endswith(".wacz") or filename == "datapackage.json"
+                assert zip_file.getinfo(filename).compress_type == ZIP_STORED
+
+
 def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
     # Delete second collection
     r = requests.delete(

From 7cf0b6238a33d6c1767034da9e9749529b0d8cfe Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 11 Dec 2024 17:14:06 -0500
Subject: [PATCH 48/57] Implement and enforce Collection.allowPublicDownload

---
 backend/btrixcloud/colls.py      |  4 ++++
 backend/btrixcloud/models.py     |  8 ++++++++
 backend/test/test_collections.py | 28 +++++++++++++++++++++++++++-
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 580b37098a..87c7893d45 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -119,6 +119,7 @@ async def add_collection(self, oid: UUID, coll_in: CollIn):
             modified=modified,
             access=coll_in.access,
             defaultThumbnailName=coll_in.defaultThumbnailName,
+            allowPublicDownload=coll_in.allowPublicDownload,
         )
         try:
             await self.collections.insert_one(coll.to_dict())
@@ -1045,6 +1046,9 @@ async def download_public_collection(
         # Make sure collection exists and is public/unlisted
         coll = await colls.get_collection(coll_id, public_or_unlisted_only=True)
 
+        if coll.allowPublicDownload is False:
+            raise HTTPException(status_code=403, detail="not_allowed")
+
         return await colls.download_collection(coll.id, org)
 
     @app.get(
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 1e37592366..315013a0fa 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1260,6 +1260,8 @@ class Collection(BaseMongoModel):
     thumbnail: Optional[ImageFile] = None
     defaultThumbnailName: Optional[str] = None
 
+    allowPublicDownload: Optional[bool] = True
+
 
 # ============================================================================
 class CollIn(BaseModel):
@@ -1273,6 +1275,7 @@ class CollIn(BaseModel):
     access: CollAccessType = CollAccessType.PRIVATE
 
     defaultThumbnailName: Optional[str] = None
+    allowPublicDownload: bool = True
 
 
 # ============================================================================
@@ -1305,6 +1308,8 @@ class CollOut(BaseMongoModel):
     thumbnail: Optional[ImageFileOut] = None
     defaultThumbnailName: Optional[str] = None
 
+    allowPublicDownload: Optional[bool] = True
+
 
 # ============================================================================
 class PublicCollOut(BaseMongoModel):
@@ -1331,6 +1336,8 @@ class PublicCollOut(BaseMongoModel):
     thumbnail: Optional[PublicImageFileOut] = None
     defaultThumbnailName: Optional[str] = None
 
+    allowPublicDownload: Optional[bool] = True
+
 
 # ============================================================================
 class UpdateColl(BaseModel):
@@ -1341,6 +1348,7 @@ class UpdateColl(BaseModel):
     caption: Optional[str] = None
     access: Optional[CollAccessType] = None
     defaultThumbnailName: Optional[str] = None
+    allowPublicDownload: Optional[bool] = None
 
 
 # ============================================================================
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 2e44580093..5ec7b5d326 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -87,6 +87,7 @@ def test_create_collection(
     assert data["dateLatest"]
 
     assert data["defaultThumbnailName"] == default_thumbnail_name
+    assert data["allowPublicDownload"]
 
 
 def test_create_public_collection(
@@ -100,6 +101,7 @@ def test_create_public_collection(
             "name": PUBLIC_COLLECTION_NAME,
             "caption": CAPTION,
             "access": "public",
+            "allowPublicDownload": False,
         },
     )
     assert r.status_code == 200
@@ -1079,6 +1081,8 @@ def test_list_public_colls_home_url_thumbnail():
             assert field not in coll
 
         if coll["id"] == _public_coll_id:
+            assert coll["allowPublicDownload"] is False
+
             assert coll["caption"] == CAPTION
 
             assert coll["homeUrl"]
@@ -1099,6 +1103,7 @@ def test_list_public_colls_home_url_thumbnail():
         if coll["id"] == _second_public_coll_id:
             assert coll["description"]
             assert coll["defaultThumbnailName"] == "orange-default.avif"
+            assert coll["allowPublicDownload"]
 
 
 def test_get_public_collection(default_org_id):
@@ -1127,6 +1132,8 @@ def test_get_public_collection(default_org_id):
     assert coll["homeUrl"]
     assert coll["homeUrlTs"]
 
+    assert coll["allowPublicDownload"] is False
+
     thumbnail = coll["thumbnail"]
     assert thumbnail
 
@@ -1193,6 +1200,7 @@ def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id):
     assert coll["pageCount"] > 0
     assert coll["totalSize"] > 0
     assert coll["defaultThumbnailName"] == "orange-default.avif"
+    assert coll["allowPublicDownload"]
 
     for field in NON_PUBLIC_COLL_FIELDS:
         assert field not in coll
@@ -1245,7 +1253,25 @@ def test_unset_collection_home_url(
     assert data.get("homeUrlPageId") is None
 
 
-def test_download_streaming_public_collection():
+def test_download_streaming_public_collection(crawler_auth_headers, default_org_id):
+    # Check that download is blocked if allowPublicDownload is False
+    with requests.get(
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_public_coll_id}/download",
+        stream=True,
+    ) as r:
+        assert r.status_code == 403
+
+    # Set allowPublicDownload to True and then check downloading works
+    r = requests.patch(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+        json={
+            "allowPublicDownload": True,
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
     with TemporaryFile() as fh:
         with requests.get(
             f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_public_coll_id}/download",

From 7ac420d992cb1c8b238e4d9774cdd156bb9538ef Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 11 Dec 2024 17:21:42 -0500
Subject: [PATCH 49/57] Fix linting

---
 backend/btrixcloud/colls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 87c7893d45..53f7185f16 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -1049,7 +1049,7 @@ async def download_public_collection(
         if coll.allowPublicDownload is False:
             raise HTTPException(status_code=403, detail="not_allowed")
 
-        return await colls.download_collection(coll.id, org)
+        return await colls.download_collection(coll_id, org)
 
     @app.get(
         "/orgs/{oid}/collections/{coll_id}/urls",

From d00cc39384d8f310c1d56df4e14ec5cda0ddbbe1 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 11 Dec 2024 17:24:47 -0500
Subject: [PATCH 50/57] Make sure coll out models return allowPublicDownload as
 bool

---
 backend/btrixcloud/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 315013a0fa..5517067948 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -1308,7 +1308,7 @@ class CollOut(BaseMongoModel):
     thumbnail: Optional[ImageFileOut] = None
     defaultThumbnailName: Optional[str] = None
 
-    allowPublicDownload: Optional[bool] = True
+    allowPublicDownload: bool = True
 
 
 # ============================================================================
@@ -1336,7 +1336,7 @@ class PublicCollOut(BaseMongoModel):
     thumbnail: Optional[PublicImageFileOut] = None
     defaultThumbnailName: Optional[str] = None
 
-    allowPublicDownload: Optional[bool] = True
+    allowPublicDownload: bool = True
 
 
 # ============================================================================

From 4482b381fa05faa9ff126ac0475b26e68dc5be37 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 17 Dec 2024 15:35:22 -0500
Subject: [PATCH 51/57] Make migration idempotent - don't readd existing upload
 pages

---
 backend/btrixcloud/db.py                      |  6 ++---
 .../migrations/migration_0037_upload_pages.py | 26 +++++++++++++++++++
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py
index 460f1a1082..0723258e40 100644
--- a/backend/btrixcloud/db.py
+++ b/backend/btrixcloud/db.py
@@ -95,7 +95,7 @@ async def update_and_prepare_db(
     """
     await ping_db(mdb)
     print("Database setup started", flush=True)
-    if await run_db_migrations(mdb, user_manager, background_job_ops):
+    if await run_db_migrations(mdb, user_manager, background_job_ops, page_ops):
         await drop_indexes(mdb)
     await create_indexes(
         org_ops,
@@ -114,7 +114,7 @@ async def update_and_prepare_db(
 
 
 # ============================================================================
-async def run_db_migrations(mdb, user_manager, background_job_ops):
+async def run_db_migrations(mdb, user_manager, background_job_ops, page_ops):
     """Run database migrations."""
 
     # if first run, just set version and exit
@@ -147,7 +147,7 @@ async def run_db_migrations(mdb, user_manager, background_job_ops):
             migration_module = importlib.util.module_from_spec(spec)
             spec.loader.exec_module(migration_module)
             migration = migration_module.Migration(
-                mdb, background_job_ops=background_job_ops
+                mdb, background_job_ops=background_job_ops, page_ops=page_ops
             )
             if await migration.run():
                 migrations_run = True
diff --git a/backend/btrixcloud/migrations/migration_0037_upload_pages.py b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
index 9bb4408a0d..174e0e31d2 100644
--- a/backend/btrixcloud/migrations/migration_0037_upload_pages.py
+++ b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
@@ -2,6 +2,8 @@
 Migration 0037 -- upload pages
 """
 
+from uuid import UUID
+
 from btrixcloud.migrations import BaseMigration
 
 
@@ -16,6 +18,17 @@ def __init__(self, mdb, **kwargs):
         super().__init__(mdb, migration_version=MIGRATION_VERSION)
 
         self.background_job_ops = kwargs.get("background_job_ops")
+        self.page_ops = kwargs.get("page_ops")
+
+    async def org_upload_pages_already_added(self, oid: UUID) -> bool:
+        """Check if upload pages have already been added for this org"""
+        mdb_crawls = self.mdb["crawls"]
+        async for upload in mdb_crawls.find({"oid": oid, "type": "upload"}):
+            upload_id = upload["_id"]
+            _, total = await self.page_ops.list_pages(upload_id)
+            if total > 0:
+                return True
+        return False
 
     async def migrate_up(self):
         """Perform migration up.
@@ -28,9 +41,22 @@ async def migrate_up(self):
             )
             return
 
+        if self.page_ops is None:
+            print(
+                "Unable to start background job, missing page_ops", flush=True
+            )
+            return
+
         mdb_orgs = self.mdb["organizations"]
         async for org in mdb_orgs.find():
             oid = org["_id"]
+
+            pages_already_added = await self.org_upload_pages_already_added(oid)
+
+            if pages_already_added:
+                print(f"Skipping org {oid}, upload pages already added to db", flush=True)
+                continue
+
             try:
                 await self.background_job_ops.create_re_add_org_pages_job(
                     oid, crawl_type="upload"

From 73813ddab0731b73c2a569c8b4122b22f05cf536 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 17 Dec 2024 15:58:43 -0500
Subject: [PATCH 52/57] Reformat migration

---
 .../btrixcloud/migrations/migration_0037_upload_pages.py  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/backend/btrixcloud/migrations/migration_0037_upload_pages.py b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
index 174e0e31d2..bab57b97ea 100644
--- a/backend/btrixcloud/migrations/migration_0037_upload_pages.py
+++ b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
@@ -42,9 +42,7 @@ async def migrate_up(self):
             return
 
         if self.page_ops is None:
-            print(
-                "Unable to start background job, missing page_ops", flush=True
-            )
+            print("Unable to start background job, missing page_ops", flush=True)
             return
 
         mdb_orgs = self.mdb["organizations"]
@@ -54,7 +52,9 @@ async def migrate_up(self):
             pages_already_added = await self.org_upload_pages_already_added(oid)
 
             if pages_already_added:
-                print(f"Skipping org {oid}, upload pages already added to db", flush=True)
+                print(
+                    f"Skipping org {oid}, upload pages already added to db", flush=True
+                )
                 continue
 
             try:

From 2dce950508e7963eae2e8ead6ea5510950e62a07 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 17 Dec 2024 16:04:37 -0500
Subject: [PATCH 53/57] Fix typing

---
 .../migrations/migration_0037_upload_pages.py         | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/backend/btrixcloud/migrations/migration_0037_upload_pages.py b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
index bab57b97ea..62bfe98237 100644
--- a/backend/btrixcloud/migrations/migration_0037_upload_pages.py
+++ b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
@@ -22,6 +22,13 @@ def __init__(self, mdb, **kwargs):
 
     async def org_upload_pages_already_added(self, oid: UUID) -> bool:
         """Check if upload pages have already been added for this org"""
+        if self.page_ops is None:
+            print(
+                f"page_ops missing, assuming pages need to be added for org {oid}",
+                flush=True,
+            )
+            return False
+
         mdb_crawls = self.mdb["crawls"]
         async for upload in mdb_crawls.find({"oid": oid, "type": "upload"}):
             upload_id = upload["_id"]
@@ -41,10 +48,6 @@ async def migrate_up(self):
             )
             return
 
-        if self.page_ops is None:
-            print("Unable to start background job, missing page_ops", flush=True)
-            return
-
         mdb_orgs = self.mdb["organizations"]
         async for org in mdb_orgs.find():
             oid = org["_id"]

From 979884eed0d8a67292e6339f586ee463ff0102e5 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 17 Dec 2024 23:13:45 -0500
Subject: [PATCH 54/57] Allow getting and downloading public collections if org
 profile disabled

---
 backend/btrixcloud/colls.py      |  6 ---
 backend/test/test_collections.py | 82 ++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 53f7185f16..b0c65671fb 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -1019,9 +1019,6 @@ async def get_public_collection(
             # pylint: disable=raise-missing-from
             raise HTTPException(status_code=404, detail="collection_not_found")
 
-        if not org.enablePublicProfile:
-            raise HTTPException(status_code=404, detail="collection_not_found")
-
         return await colls.get_public_collection_out(coll_id, org, allow_unlisted=True)
 
     @app.get(
@@ -1040,9 +1037,6 @@ async def download_public_collection(
             # pylint: disable=raise-missing-from
             raise HTTPException(status_code=404, detail="collection_not_found")
 
-        if not org.enablePublicProfile:
-            raise HTTPException(status_code=404, detail="collection_not_found")
-
         # Make sure collection exists and is public/unlisted
         coll = await colls.get_collection(coll_id, public_or_unlisted_only=True)
 
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 5ec7b5d326..c2999bb5ad 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -1206,6 +1206,55 @@ def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id):
         assert field not in coll
 
 
+def test_get_public_collection_unlisted_org_profile_disabled(
+    admin_auth_headers, default_org_id
+):
+    # Disable org profile
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/public-profile",
+        headers=admin_auth_headers,
+        json={
+            "enablePublicProfile": False,
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    # Verify we can still get public details for unlisted collection
+    r = requests.get(
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_second_public_coll_id}"
+    )
+    assert r.status_code == 200
+    coll = r.json()
+
+    assert coll["id"] == _second_public_coll_id
+    assert coll["oid"] == default_org_id
+    assert coll["access"] == "unlisted"
+    assert coll["name"]
+    assert coll["resources"]
+    assert coll["dateEarliest"]
+    assert coll["dateLatest"]
+    assert coll["crawlCount"] > 0
+    assert coll["pageCount"] > 0
+    assert coll["totalSize"] > 0
+    assert coll["defaultThumbnailName"] == "orange-default.avif"
+    assert coll["allowPublicDownload"]
+
+    for field in NON_PUBLIC_COLL_FIELDS:
+        assert field not in coll
+
+    # Re-enable org profile
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/public-profile",
+        headers=admin_auth_headers,
+        json={
+            "enablePublicProfile": True,
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+
 def test_delete_thumbnail(crawler_auth_headers, default_org_id):
     r = requests.delete(
         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/thumbnail",
@@ -1291,6 +1340,39 @@ def test_download_streaming_public_collection(crawler_auth_headers, default_org_
                 assert zip_file.getinfo(filename).compress_type == ZIP_STORED
 
 
+def test_download_streaming_public_collection_profile_disabled(
+    admin_auth_headers, default_org_id
+):
+    # Disable org public profile and ensure download still works for public collection
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/public-profile",
+        headers=admin_auth_headers,
+        json={
+            "enablePublicProfile": False,
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    with TemporaryFile() as fh:
+        with requests.get(
+            f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_public_coll_id}/download",
+            stream=True,
+        ) as r:
+            assert r.status_code == 200
+            for chunk in r.iter_content():
+                fh.write(chunk)
+
+        fh.seek(0)
+        with ZipFile(fh, "r") as zip_file:
+            contents = zip_file.namelist()
+
+            assert len(contents) == 2
+            for filename in contents:
+                assert filename.endswith(".wacz") or filename == "datapackage.json"
+                assert zip_file.getinfo(filename).compress_type == ZIP_STORED
+
+
 def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
     # Delete second collection
     r = requests.delete(

From 320b218d64a26ec43aa1af76a5bf267f635187eb Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Tue, 17 Dec 2024 23:53:34 -0500
Subject: [PATCH 55/57] URL decode urlPrefix in /urls endpoint

---
 backend/btrixcloud/colls.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index b0c65671fb..d934db0a7a 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -8,6 +8,7 @@
 from uuid import UUID, uuid4
 from typing import Optional, List, TYPE_CHECKING, cast, Dict, Tuple, Any, Union
 import os
+import urllib.parse
 
 import asyncio
 import pymongo
@@ -1058,10 +1059,11 @@ async def get_collection_url_list(
         page: int = 1,
     ):
         """Retrieve paginated list of urls in collection sorted by snapshot count"""
+        url_prefix = urllib.parse.unquote(urlPrefix) if urlPrefix else None
         pages, total = await colls.list_urls_in_collection(
             coll_id=coll_id,
             oid=oid,
-            url_prefix=urlPrefix,
+            url_prefix=url_prefix,
             page_size=pageSize,
             page=page,
         )

From fb0ad7bbc468ef50a2a4a67ec59750101b871f78 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 18 Dec 2024 00:45:22 -0500
Subject: [PATCH 56/57] Do URL decoding inside list urls method

---
 backend/btrixcloud/colls.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index d934db0a7a..be21c25a5c 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -635,6 +635,7 @@ async def list_urls_in_collection(
         match_query: dict[str, object] = {"oid": oid, "crawl_id": {"$in": crawl_ids}}
 
         if url_prefix:
+            url_prefix = urllib.parse.unquote(url_prefix)
             regex_pattern = f"^{url_prefix}"
             match_query["url"] = {"$regex": regex_pattern, "$options": "i"}
 
@@ -1059,11 +1060,10 @@ async def get_collection_url_list(
         page: int = 1,
     ):
         """Retrieve paginated list of urls in collection sorted by snapshot count"""
-        url_prefix = urllib.parse.unquote(urlPrefix) if urlPrefix else None
         pages, total = await colls.list_urls_in_collection(
             coll_id=coll_id,
             oid=oid,
-            url_prefix=url_prefix,
+            url_prefix=urlPrefix,
             page_size=pageSize,
             page=page,
         )

From f8824092b0d2a2c0eab5a477f47b5bfe0b665049 Mon Sep 17 00:00:00 2001
From: Tessa Walsh <tessa@bitarchivist.net>
Date: Wed, 18 Dec 2024 00:47:54 -0500
Subject: [PATCH 57/57] Escape special characters in url_prefix regex

---
 backend/btrixcloud/colls.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index be21c25a5c..d9ab766aa9 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -8,6 +8,7 @@
 from uuid import UUID, uuid4
 from typing import Optional, List, TYPE_CHECKING, cast, Dict, Tuple, Any, Union
 import os
+import re
 import urllib.parse
 
 import asyncio
@@ -636,7 +637,7 @@ async def list_urls_in_collection(
 
         if url_prefix:
             url_prefix = urllib.parse.unquote(url_prefix)
-            regex_pattern = f"^{url_prefix}"
+            regex_pattern = f"^{re.escape(url_prefix)}"
             match_query["url"] = {"$regex": regex_pattern, "$options": "i"}
 
         aggregate = [{"$match": match_query}]