diff --git a/backend/btrixcloud/background_jobs.py b/backend/btrixcloud/background_jobs.py
index b9667078e0..35a2e75d59 100644
--- a/backend/btrixcloud/background_jobs.py
+++ b/backend/btrixcloud/background_jobs.py
@@ -1,7 +1,6 @@
 """k8s background jobs"""
 
 import asyncio
-import os
 from datetime import datetime
 from typing import Optional, Tuple, Union, List, Dict, TYPE_CHECKING, cast
 from uuid import UUID
@@ -22,6 +21,7 @@
     DeleteReplicaJob,
     DeleteOrgJob,
     RecalculateOrgStatsJob,
+    ReAddOrgPagesJob,
     PaginatedBackgroundJobResponse,
     AnyJob,
     StorageRef,
@@ -286,8 +286,6 @@ async def create_delete_org_job(
         try:
             job_id = await self.crawl_manager.run_delete_org_job(
                 oid=str(org.id),
-                backend_image=os.environ.get("BACKEND_IMAGE", ""),
-                pull_policy=os.environ.get("BACKEND_IMAGE_PULL_POLICY", ""),
                 existing_job_id=existing_job_id,
             )
             if existing_job_id:
@@ -331,8 +329,6 @@ async def create_recalculate_org_stats_job(
         try:
             job_id = await self.crawl_manager.run_recalculate_org_stats_job(
                 oid=str(org.id),
-                backend_image=os.environ.get("BACKEND_IMAGE", ""),
-                pull_policy=os.environ.get("BACKEND_IMAGE_PULL_POLICY", ""),
                 existing_job_id=existing_job_id,
             )
             if existing_job_id:
@@ -366,6 +362,52 @@ async def create_recalculate_org_stats_job(
             print(f"warning: recalculate org stats job could not be started: {exc}")
             return None
 
+    async def create_re_add_org_pages_job(
+        self,
+        oid: UUID,
+        crawl_type: Optional[str] = None,
+        existing_job_id: Optional[str] = None,
+    ):
+        """Create job to (re)add all pages in an org, optionally filtered by crawl type"""
+
+        try:
+            job_id = await self.crawl_manager.run_re_add_org_pages_job(
+                oid=str(oid),
+                crawl_type=crawl_type,
+                existing_job_id=existing_job_id,
+            )
+            if existing_job_id:
+                readd_pages_job = await self.get_background_job(existing_job_id, oid)
+                previous_attempt = {
+                    "started": readd_pages_job.started,
+                    "finished": readd_pages_job.finished,
+                }
+                if readd_pages_job.previousAttempts:
+                    readd_pages_job.previousAttempts.append(previous_attempt)
+                else:
+                    readd_pages_job.previousAttempts = [previous_attempt]
+                readd_pages_job.started = dt_now()
+                readd_pages_job.finished = None
+                readd_pages_job.success = None
+            else:
+                readd_pages_job = ReAddOrgPagesJob(
+                    id=job_id,
+                    oid=oid,
+                    crawl_type=crawl_type,
+                    started=dt_now(),
+                )
+
+            await self.jobs.find_one_and_update(
+                {"_id": job_id}, {"$set": readd_pages_job.to_dict()}, upsert=True
+            )
+
+            return job_id
+        # pylint: disable=broad-exception-caught
+        except Exception as exc:
+            # pylint: disable=raise-missing-from
+            print(f"warning: re-add org pages job could not be started: {exc}")
+            return None
+
     async def job_finished(
         self,
         job_id: str,
@@ -411,7 +453,11 @@ async def job_finished(
     async def get_background_job(
         self, job_id: str, oid: Optional[UUID] = None
     ) -> Union[
-        CreateReplicaJob, DeleteReplicaJob, DeleteOrgJob, RecalculateOrgStatsJob
+        CreateReplicaJob,
+        DeleteReplicaJob,
+        DeleteOrgJob,
+        RecalculateOrgStatsJob,
+        ReAddOrgPagesJob,
     ]:
         """Get background job"""
         query: dict[str, object] = {"_id": job_id}
@@ -435,6 +481,9 @@ def _get_job_by_type_from_data(self, data: dict[str, object]):
         if data["type"] == BgJobType.RECALCULATE_ORG_STATS:
             return RecalculateOrgStatsJob.from_dict(data)
 
+        if data["type"] == BgJobType.READD_ORG_PAGES:
+            return ReAddOrgPagesJob.from_dict(data)
+
         return DeleteOrgJob.from_dict(data)
 
     async def list_background_jobs(
@@ -575,6 +624,13 @@ async def retry_background_job(
                 existing_job_id=job_id,
             )
 
+        if job.type == BgJobType.READD_ORG_PAGES:
+            await self.create_re_add_org_pages_job(
+                org.id,
+                job.crawl_type,
+                existing_job_id=job_id,
+            )
+
         return {"success": True}
 
     async def retry_failed_background_jobs(
diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py
index d913d3362b..01e700f8b5 100644
--- a/backend/btrixcloud/basecrawls.py
+++ b/backend/btrixcloud/basecrawls.py
@@ -1,6 +1,5 @@
 """ base crawl type """
 
-import os
 from datetime import timedelta
 from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast, Tuple
 from uuid import UUID
@@ -29,6 +28,7 @@
     UpdatedResponse,
     DeletedResponseQuota,
     CrawlSearchValuesResponse,
+    PRESIGN_DURATION_SECONDS,
 )
 from .pagination import paginated_format, DEFAULT_PAGE_SIZE
 from .utils import dt_now, date_to_str
@@ -47,11 +47,6 @@
     CrawlConfigOps = UserManager = OrgOps = CollectionOps = PageOps = object
     StorageOps = EventWebhookOps = BackgroundJobOps = object
 
-# Presign duration must be less than 604800 seconds (one week),
-# so set this one minute short of a week.
-PRESIGN_MINUTES_MAX = 10079
-PRESIGN_MINUTES_DEFAULT = PRESIGN_MINUTES_MAX
-
 
 # ============================================================================
 # pylint: disable=too-many-instance-attributes, too-many-public-methods, too-many-lines
@@ -93,16 +88,8 @@ def __init__(
         self.background_job_ops = background_job_ops
         self.page_ops = cast(PageOps, None)
 
-        presign_duration_minutes = int(
-            os.environ.get("PRESIGN_DURATION_MINUTES") or PRESIGN_MINUTES_DEFAULT
-        )
-
-        self.presign_duration_seconds = (
-            min(presign_duration_minutes, PRESIGN_MINUTES_MAX) * 60
-        )
-
         # renew when <25% of time remaining
-        self.expire_at_duration_seconds = int(self.presign_duration_seconds * 0.75)
+        self.expire_at_duration_seconds = int(PRESIGN_DURATION_SECONDS * 0.75)
 
     def set_page_ops(self, page_ops):
         """set page ops reference"""
@@ -336,8 +323,9 @@ async def delete_crawls(
                         status_code=400, detail=f"Error Stopping Crawl: {exc}"
                     )
 
+            await self.page_ops.delete_crawl_pages(crawl_id, org.id)
+
             if type_ == "crawl":
-                await self.page_ops.delete_crawl_pages(crawl_id, org.id)
                 await self.delete_all_crawl_qa_files(crawl_id, org)
 
             crawl_size = await self._delete_crawl_files(crawl, org)
@@ -382,7 +370,7 @@ async def _delete_crawl_files(
         size = 0
         for file_ in crawl.files:
             size += file_.size
-            if not await self.storage_ops.delete_crawl_file_object(org, file_):
+            if not await self.storage_ops.delete_file_object(org, file_):
                 raise HTTPException(status_code=400, detail="file_deletion_error")
             # Not replicating QA run WACZs yet
             if not isinstance(crawl, QARun):
@@ -474,7 +462,7 @@ async def resolve_signed_urls(
             ):
                 exp = now + delta
                 presigned_url = await self.storage_ops.get_presigned_url(
-                    org, file_, self.presign_duration_seconds
+                    org, file_, PRESIGN_DURATION_SECONDS
                 )
 
                 prefix = "files"
diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
index 411e659ac9..d9ab766aa9 100644
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@@ -2,14 +2,20 @@
 Collections API
 """
 
+# pylint: disable=too-many-lines
+
 from collections import Counter
 from uuid import UUID, uuid4
-from typing import Optional, List, TYPE_CHECKING, cast, Dict
+from typing import Optional, List, TYPE_CHECKING, cast, Dict, Tuple, Any, Union
+import os
+import re
+import urllib.parse
 
 import asyncio
 import pymongo
 from fastapi import Depends, HTTPException, Response
 from fastapi.responses import StreamingResponse
+from starlette.requests import Request
 
 from .pagination import DEFAULT_PAGE_SIZE, paginated_format
 from .models import (
@@ -29,10 +35,21 @@
     EmptyResponse,
     UpdatedResponse,
     SuccessResponse,
+    AddedResponse,
+    DeletedResponse,
     CollectionSearchValuesResponse,
     OrgPublicCollections,
     PublicOrgDetails,
     CollAccessType,
+    PageUrlCount,
+    PageIdTimestamp,
+    PaginatedPageUrlCountResponse,
+    UpdateCollHomeUrl,
+    User,
+    ImageFile,
+    ImageFilePreparer,
+    MIN_UPLOAD_PART_SIZE,
+    PublicCollOut,
 )
 from .utils import dt_now
 
@@ -45,11 +62,14 @@
     OrgOps = StorageOps = EventWebhookOps = CrawlOps = object
 
 
+THUMBNAIL_MAX_SIZE = 2_000_000
+
+
 # ============================================================================
 class CollectionOps:
     """ops for working with named collections of crawls"""
 
-    # pylint: disable=too-many-arguments
+    # pylint: disable=too-many-arguments, too-many-instance-attributes, too-many-public-methods
 
     orgs: OrgOps
     storage_ops: StorageOps
@@ -60,6 +80,7 @@ def __init__(self, mdb, storage_ops, orgs, event_webhook_ops):
         self.collections = mdb["collections"]
         self.crawls = mdb["crawls"]
         self.crawl_configs = mdb["crawl_configs"]
+        self.pages = mdb["pages"]
         self.crawl_ops = cast(CrawlOps, None)
 
         self.orgs = orgs
@@ -70,6 +91,11 @@ def set_crawl_ops(self, ops):
         """set crawl ops"""
         self.crawl_ops = ops
 
+    def set_page_ops(self, ops):
+        """set page ops"""
+        # pylint: disable=attribute-defined-outside-init
+        self.page_ops = ops
+
     async def init_index(self):
         """init lookup index"""
         await self.collections.create_index(
@@ -91,8 +117,11 @@ async def add_collection(self, oid: UUID, coll_in: CollIn):
             oid=oid,
             name=coll_in.name,
             description=coll_in.description,
+            caption=coll_in.caption,
             modified=modified,
             access=coll_in.access,
+            defaultThumbnailName=coll_in.defaultThumbnailName,
+            allowPublicDownload=coll_in.allowPublicDownload,
         )
         try:
             await self.collections.insert_one(coll.to_dict())
@@ -100,6 +129,7 @@ async def add_collection(self, oid: UUID, coll_in: CollIn):
             if crawl_ids:
                 await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org)
                 await self.update_collection_counts_and_tags(coll_id)
+                await self.update_collection_dates(coll_id)
                 asyncio.create_task(
                     self.event_webhook_ops.create_added_to_collection_notification(
                         crawl_ids, coll_id, org
@@ -153,6 +183,7 @@ async def add_crawls_to_collection(
             raise HTTPException(status_code=404, detail="collection_not_found")
 
         await self.update_collection_counts_and_tags(coll_id)
+        await self.update_collection_dates(coll_id)
 
         asyncio.create_task(
             self.event_webhook_ops.create_added_to_collection_notification(
@@ -160,7 +191,7 @@ async def add_crawls_to_collection(
             )
         )
 
-        return await self.get_collection(coll_id, org)
+        return await self.get_collection_out(coll_id, org)
 
     async def remove_crawls_from_collection(
         self, coll_id: UUID, crawl_ids: List[str], org: Organization
@@ -177,6 +208,7 @@ async def remove_crawls_from_collection(
             raise HTTPException(status_code=404, detail="collection_not_found")
 
         await self.update_collection_counts_and_tags(coll_id)
+        await self.update_collection_dates(coll_id)
 
         asyncio.create_task(
             self.event_webhook_ops.create_removed_from_collection_notification(
@@ -184,29 +216,79 @@ async def remove_crawls_from_collection(
             )
         )
 
-        return await self.get_collection(coll_id, org)
+        return await self.get_collection_out(coll_id, org)
 
-    async def get_collection(
-        self, coll_id: UUID, org: Organization, resources=False, public_only=False
-    ) -> CollOut:
-        """Get collection by id"""
+    async def get_collection_raw(
+        self, coll_id: UUID, public_or_unlisted_only: bool = False
+    ) -> Dict[str, Any]:
+        """Get collection by id as dict from database"""
         query: dict[str, object] = {"_id": coll_id}
-        if public_only:
+        if public_or_unlisted_only:
             query["access"] = {"$in": ["public", "unlisted"]}
 
         result = await self.collections.find_one(query)
         if not result:
             raise HTTPException(status_code=404, detail="collection_not_found")
 
+        return result
+
+    async def get_collection(
+        self, coll_id: UUID, public_or_unlisted_only: bool = False
+    ) -> Collection:
+        """Get collection by id"""
+        result = await self.get_collection_raw(coll_id, public_or_unlisted_only)
+        return Collection.from_dict(result)
+
+    async def get_collection_out(
+        self,
+        coll_id: UUID,
+        org: Organization,
+        resources=False,
+        public_or_unlisted_only=False,
+    ) -> CollOut:
+        """Get CollOut by id"""
+        result = await self.get_collection_raw(coll_id, public_or_unlisted_only)
+
         if resources:
-            result["resources"] = await self.get_collection_crawl_resources(
-                coll_id, org
+            result["resources"] = await self.get_collection_crawl_resources(coll_id)
+
+        thumbnail = result.get("thumbnail")
+        if thumbnail:
+            image_file = ImageFile(**thumbnail)
+            result["thumbnail"] = await image_file.get_image_file_out(
+                org, self.storage_ops
             )
+
         return CollOut.from_dict(result)
 
+    async def get_public_collection_out(
+        self, coll_id: UUID, org: Organization, allow_unlisted: bool = False
+    ) -> PublicCollOut:
+        """Get PublicCollOut by id"""
+        result = await self.get_collection_raw(coll_id)
+
+        allowed_access = [CollAccessType.PUBLIC]
+        if allow_unlisted:
+            allowed_access.append(CollAccessType.UNLISTED)
+
+        if result.get("access") not in allowed_access:
+            raise HTTPException(status_code=404, detail="collection_not_found")
+
+        result["resources"] = await self.get_collection_crawl_resources(coll_id)
+
+        thumbnail = result.get("thumbnail")
+        if thumbnail:
+            image_file = ImageFile(**thumbnail)
+            result["thumbnail"] = await image_file.get_public_image_file_out(
+                org, self.storage_ops
+            )
+
+        return PublicCollOut.from_dict(result)
+
     async def list_collections(
         self,
-        oid: UUID,
+        org: Organization,
+        public_colls_out: bool = False,
         page_size: int = DEFAULT_PAGE_SIZE,
         page: int = 1,
         sort_by: Optional[str] = None,
@@ -216,21 +298,22 @@ async def list_collections(
         access: Optional[str] = None,
     ):
         """List all collections for org"""
-        # pylint: disable=too-many-locals, duplicate-code
+        # pylint: disable=too-many-locals, duplicate-code, too-many-branches
         # Zero-index page for query
         page = page - 1
         skip = page * page_size
 
-        match_query: dict[str, object] = {"oid": oid}
+        match_query: dict[str, object] = {"oid": org.id}
 
         if name:
             match_query["name"] = name
-
         elif name_prefix:
             regex_pattern = f"^{name_prefix}"
             match_query["name"] = {"$regex": regex_pattern, "$options": "i"}
 
-        if access:
+        if public_colls_out:
+            match_query["access"] = CollAccessType.PUBLIC
+        elif access:
             match_query["access"] = access
 
         aggregate = [{"$match": match_query}]
@@ -269,15 +352,35 @@ async def list_collections(
         except (IndexError, ValueError):
             total = 0
 
-        collections = [CollOut.from_dict(res) for res in items]
+        collections: List[Union[CollOut, PublicCollOut]] = []
+
+        for res in items:
+            res["resources"] = await self.get_collection_crawl_resources(res["_id"])
+
+            thumbnail = res.get("thumbnail")
+            if thumbnail:
+                image_file = ImageFile(**thumbnail)
+
+                if public_colls_out:
+                    res["thumbnail"] = await image_file.get_public_image_file_out(
+                        org, self.storage_ops
+                    )
+                else:
+                    res["thumbnail"] = await image_file.get_image_file_out(
+                        org, self.storage_ops
+                    )
+
+            if public_colls_out:
+                collections.append(PublicCollOut.from_dict(res))
+            else:
+                collections.append(CollOut.from_dict(res))
 
         return collections, total
 
-    async def get_collection_crawl_resources(self, coll_id: UUID, org: Organization):
+    async def get_collection_crawl_resources(self, coll_id: UUID):
         """Return pre-signed resources for all collection crawl files."""
-        coll = await self.get_collection(coll_id, org)
-        if not coll:
-            raise HTTPException(status_code=404, detail="collection_not_found")
+        # Ensure collection exists
+        _ = await self.get_collection_raw(coll_id)
 
         all_files = []
 
@@ -312,6 +415,17 @@ async def get_collection_search_values(self, org: Organization):
         names = [name for name in names if name]
         return {"names": names}
 
+    async def get_collection_crawl_ids(self, coll_id: UUID) -> List[str]:
+        """Return list of crawl ids in collection"""
+        crawl_ids = []
+        async for crawl_raw in self.crawls.find(
+            {"collectionIds": coll_id}, projection=["_id"]
+        ):
+            crawl_id = crawl_raw.get("_id")
+            if crawl_id:
+                crawl_ids.append(crawl_id)
+        return crawl_ids
+
     async def delete_collection(self, coll_id: UUID, org: Organization):
         """Delete collection and remove from associated crawls."""
         await self.crawl_ops.remove_collection_from_all_crawls(coll_id)
@@ -328,7 +442,7 @@ async def delete_collection(self, coll_id: UUID, org: Organization):
 
     async def download_collection(self, coll_id: UUID, org: Organization):
         """Download all WACZs in collection as streaming nested WACZ"""
-        coll = await self.get_collection(coll_id, org, resources=True)
+        coll = await self.get_collection_out(coll_id, org, resources=True)
 
         metadata = {
             "type": "collection",
@@ -346,6 +460,15 @@ async def download_collection(self, coll_id: UUID, org: Organization):
             resp, headers=headers, media_type="application/wacz+zip"
         )
 
+    async def recalculate_org_collection_counts_tags(self, org: Organization):
+        """Recalculate counts and tags for collections in org"""
+        collections, _ = await self.list_collections(
+            org,
+            page_size=100_000,
+        )
+        for coll in collections:
+            await self.update_collection_counts_and_tags(coll.id)
+
     async def update_collection_counts_and_tags(self, collection_id: UUID):
         """Set current crawl info in config when crawl begins"""
         crawl_count = 0
@@ -353,6 +476,9 @@ async def update_collection_counts_and_tags(self, collection_id: UUID):
         total_size = 0
         tags = []
 
+        coll = await self.get_collection(collection_id)
+        org = await self.orgs.get_org_by_id(coll.oid)
+
         async for crawl_raw in self.crawls.find({"collectionIds": collection_id}):
             crawl = BaseCrawl.from_dict(crawl_raw)
             if crawl.state not in SUCCESSFUL_STATES:
@@ -361,8 +487,16 @@ async def update_collection_counts_and_tags(self, collection_id: UUID):
             files = crawl.files or []
             for file in files:
                 total_size += file.size
-            if crawl.stats:
-                page_count += crawl.stats.done
+
+            try:
+                _, crawl_pages = await self.page_ops.list_pages(
+                    crawl.id, org, page_size=1_000_000
+                )
+                page_count += crawl_pages
+            # pylint: disable=broad-exception-caught
+            except Exception:
+                pass
+
             if crawl.tags:
                 tags.extend(crawl.tags)
 
@@ -380,6 +514,55 @@ async def update_collection_counts_and_tags(self, collection_id: UUID):
             },
         )
 
+    async def recalculate_org_collection_dates(self, org: Organization):
+        """Recalculate earliest and latest dates for collections in org"""
+        collections, _ = await self.list_collections(
+            org,
+            page_size=100_000,
+        )
+        for coll in collections:
+            await self.update_collection_dates(coll.id)
+
+    async def update_collection_dates(self, coll_id: UUID):
+        """Update collection earliest and latest dates from page timestamps"""
+        coll = await self.get_collection(coll_id)
+        crawl_ids = await self.get_collection_crawl_ids(coll_id)
+
+        earliest_ts = None
+        latest_ts = None
+
+        match_query = {
+            "oid": coll.oid,
+            "crawl_id": {"$in": crawl_ids},
+            "ts": {"$ne": None},
+        }
+
+        cursor = self.pages.find(match_query).sort("ts", 1).limit(1)
+        pages = await cursor.to_list(length=1)
+        try:
+            earliest_page = pages[0]
+            earliest_ts = earliest_page.get("ts")
+        except IndexError:
+            pass
+
+        cursor = self.pages.find(match_query).sort("ts", -1).limit(1)
+        pages = await cursor.to_list(length=1)
+        try:
+            latest_page = pages[0]
+            latest_ts = latest_page.get("ts")
+        except IndexError:
+            pass
+
+        await self.collections.find_one_and_update(
+            {"_id": coll_id},
+            {
+                "$set": {
+                    "dateEarliest": earliest_ts,
+                    "dateLatest": latest_ts,
+                }
+            },
+        )
+
     async def update_crawl_collections(self, crawl_id: str):
         """Update counts and tags for all collections in crawl"""
         crawl = await self.crawls.find_one({"_id": crawl_id})
@@ -398,7 +581,14 @@ async def add_successful_crawl_to_collections(self, crawl_id: str, cid: UUID):
             )
             await self.update_crawl_collections(crawl_id)
 
-    async def get_org_public_collections(self, org_slug: str):
+    async def get_org_public_collections(
+        self,
+        org_slug: str,
+        page_size: int = DEFAULT_PAGE_SIZE,
+        page: int = 1,
+        sort_by: Optional[str] = None,
+        sort_direction: int = 1,
+    ):
         """List public collections for org"""
         try:
             org = await self.orgs.get_org_by_slug(org_slug)
@@ -411,7 +601,12 @@ async def get_org_public_collections(self, org_slug: str):
             raise HTTPException(status_code=404, detail="public_profile_not_found")
 
         collections, _ = await self.list_collections(
-            org.id, access=CollAccessType.PUBLIC
+            org,
+            page_size=page_size,
+            page=page,
+            sort_by=sort_by,
+            sort_direction=sort_direction,
+            public_colls_out=True,
         )
 
         public_org_details = PublicOrgDetails(
@@ -422,10 +617,192 @@ async def get_org_public_collections(self, org_slug: str):
 
         return OrgPublicCollections(org=public_org_details, collections=collections)
 
+    async def list_urls_in_collection(
+        self,
+        coll_id: UUID,
+        oid: UUID,
+        url_prefix: Optional[str] = None,
+        page_size: int = DEFAULT_PAGE_SIZE,
+        page: int = 1,
+    ) -> Tuple[List[PageUrlCount], int]:
+        """List all URLs in collection sorted desc by snapshot count"""
+        # pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements
+        # Zero-index page for query
+        page = page - 1
+        skip = page_size * page
+
+        crawl_ids = await self.get_collection_crawl_ids(coll_id)
+
+        match_query: dict[str, object] = {"oid": oid, "crawl_id": {"$in": crawl_ids}}
+
+        if url_prefix:
+            url_prefix = urllib.parse.unquote(url_prefix)
+            regex_pattern = f"^{re.escape(url_prefix)}"
+            match_query["url"] = {"$regex": regex_pattern, "$options": "i"}
+
+        aggregate = [{"$match": match_query}]
+
+        aggregate.extend(
+            [
+                {
+                    "$group": {
+                        "_id": "$url",
+                        "pages": {"$push": "$$ROOT"},
+                        "count": {"$sum": 1},
+                    },
+                },
+                {"$sort": {"count": -1}},
+                {"$set": {"url": "$_id"}},
+                {
+                    "$facet": {
+                        "items": [
+                            {"$skip": skip},
+                            {"$limit": page_size},
+                        ],
+                        "total": [{"$count": "count"}],
+                    }
+                },
+            ]
+        )
+
+        # Get total
+        cursor = self.pages.aggregate(aggregate)
+        results = await cursor.to_list(length=1)
+        result = results[0]
+        items = result["items"]
+
+        try:
+            total = int(result["total"][0]["count"])
+        except (IndexError, ValueError):
+            total = 0
+
+        return [
+            PageUrlCount(
+                url=data.get("url", ""),
+                count=data.get("count", 0),
+                snapshots=[
+                    PageIdTimestamp(
+                        pageId=p["_id"], ts=p.get("ts"), status=p.get("status", 200)
+                    )
+                    for p in data.get("pages", [])
+                ],
+            )
+            for data in items
+        ], total
+
+    async def set_home_url(
+        self, coll_id: UUID, update: UpdateCollHomeUrl, org: Organization
+    ) -> Dict[str, bool]:
+        """Set home URL for collection and save thumbnail to database"""
+        if update.pageId:
+            page = await self.page_ops.get_page(update.pageId, org.id)
+            update_query = {
+                "homeUrl": page.url,
+                "homeUrlTs": page.ts,
+                "homeUrlPageId": page.id,
+            }
+        else:
+            update_query = {
+                "homeUrl": None,
+                "homeUrlTs": None,
+                "homeUrlPageId": None,
+            }
+
+        await self.collections.find_one_and_update(
+            {"_id": coll_id, "oid": org.id},
+            {"$set": update_query},
+        )
+
+        return {"updated": True}
+
+    async def upload_thumbnail_stream(
+        self, stream, filename: str, coll_id: UUID, org: Organization, user: User
+    ) -> Dict[str, bool]:
+        """Upload file as stream to use as collection thumbnail"""
+        coll = await self.get_collection(coll_id)
+
+        _, extension = os.path.splitext(filename)
+
+        image_filename = f"thumbnail-{str(coll_id)}{extension}"
+
+        prefix = org.storage.get_storage_extra_path(str(org.id)) + "images/"
+
+        file_prep = ImageFilePreparer(
+            prefix,
+            image_filename,
+            original_filename=filename,
+            user=user,
+            created=dt_now(),
+        )
+
+        async def stream_iter():
+            """iterate over each chunk and compute and digest + total size"""
+            async for chunk in stream:
+                file_prep.add_chunk(chunk)
+                yield chunk
+
+        print("Collection thumbnail stream upload starting", flush=True)
+
+        if not await self.storage_ops.do_upload_multipart(
+            org,
+            file_prep.upload_name,
+            stream_iter(),
+            MIN_UPLOAD_PART_SIZE,
+        ):
+            print("Collection thumbnail stream upload failed", flush=True)
+            raise HTTPException(status_code=400, detail="upload_failed")
+
+        print("Collection thumbnail stream upload complete", flush=True)
+
+        thumbnail_file = file_prep.get_image_file(org.storage)
+
+        if thumbnail_file.size > THUMBNAIL_MAX_SIZE:
+            print(
+                "Collection thumbnail stream upload failed: max size (2 MB) exceeded",
+                flush=True,
+            )
+            await self.storage_ops.delete_file_object(org, thumbnail_file)
+            raise HTTPException(status_code=400, detail="upload_failed")
+
+        if coll.thumbnail:
+            if not await self.storage_ops.delete_file_object(org, coll.thumbnail):
+                print(
+                    f"Unable to delete previous collection thumbnail: {coll.thumbnail.filename}"
+                )
+
+        coll.thumbnail = thumbnail_file
+
+        # Update entire document to avoid bson.errors.InvalidDocument exception
+        await self.collections.find_one_and_update(
+            {"_id": coll_id, "oid": org.id},
+            {"$set": coll.to_dict()},
+        )
+
+        return {"added": True}
+
+    async def delete_thumbnail(self, coll_id: UUID, org: Organization):
+        """Delete collection thumbnail"""
+        coll = await self.get_collection(coll_id)
+
+        if not coll.thumbnail:
+            raise HTTPException(status_code=404, detail="thumbnail_not_found")
+
+        if not await self.storage_ops.delete_file_object(org, coll.thumbnail):
+            print(f"Unable to delete collection thumbnail: {coll.thumbnail.filename}")
+            raise HTTPException(status_code=400, detail="file_deletion_error")
+
+        # Delete from database
+        await self.collections.find_one_and_update(
+            {"_id": coll_id, "oid": org.id},
+            {"$set": {"thumbnail": None}},
+        )
+
+        return {"deleted": True}
+
 
 # ============================================================================
 # pylint: disable=too-many-locals
-def init_collections_api(app, mdb, orgs, storage_ops, event_webhook_ops):
+def init_collections_api(app, mdb, orgs, storage_ops, event_webhook_ops, user_dep):
     """init collections api"""
     # pylint: disable=invalid-name, unused-argument, too-many-arguments
 
@@ -461,7 +838,7 @@ async def list_collection_all(
         access: Optional[str] = None,
     ):
         collections, total = await colls.list_collections(
-            org.id,
+            org,
             page_size=pageSize,
             page=page,
             sort_by=sortBy,
@@ -480,10 +857,10 @@ async def list_collection_all(
     async def get_collection_all(org: Organization = Depends(org_viewer_dep)):
         results = {}
         try:
-            all_collections, _ = await colls.list_collections(org.id, page_size=10_000)
+            all_collections, _ = await colls.list_collections(org, page_size=10_000)
             for collection in all_collections:
                 results[collection.name] = await colls.get_collection_crawl_resources(
-                    collection.id, org
+                    collection.id
                 )
         except Exception as exc:
             # pylint: disable=raise-missing-from
@@ -511,7 +888,7 @@ async def get_collection_search_values(
     async def get_collection(
         coll_id: UUID, org: Organization = Depends(org_viewer_dep)
     ):
-        return await colls.get_collection(coll_id, org)
+        return await colls.get_collection_out(coll_id, org)
 
     @app.get(
         "/orgs/{oid}/collections/{coll_id}/replay.json",
@@ -521,7 +898,7 @@ async def get_collection(
     async def get_collection_replay(
         coll_id: UUID, org: Organization = Depends(org_viewer_dep)
     ):
-        return await colls.get_collection(coll_id, org, resources=True)
+        return await colls.get_collection_out(coll_id, org, resources=True)
 
     @app.get(
         "/orgs/{oid}/collections/{coll_id}/public/replay.json",
@@ -533,8 +910,8 @@ async def get_collection_public_replay(
         coll_id: UUID,
         org: Organization = Depends(org_public),
     ):
-        coll = await colls.get_collection(
-            coll_id, org, resources=True, public_only=True
+        coll = await colls.get_collection_out(
+            coll_id, org, resources=True, public_or_unlisted_only=True
         )
         response.headers["Access-Control-Allow-Origin"] = "*"
         response.headers["Access-Control-Allow-Headers"] = "*"
@@ -610,11 +987,126 @@ async def download_collection(
         return await colls.download_collection(coll_id, org)
 
     @app.get(
-        "/public-collections/{org_slug}",
-        tags=["collections"],
+        "/public/orgs/{org_slug}/collections",
+        tags=["collections", "public"],
         response_model=OrgPublicCollections,
     )
-    async def get_org_public_collections(org_slug: str):
-        return await colls.get_org_public_collections(org_slug)
+    async def get_org_public_collections(
+        org_slug: str,
+        pageSize: int = DEFAULT_PAGE_SIZE,
+        page: int = 1,
+        sortBy: Optional[str] = None,
+        sortDirection: int = 1,
+    ):
+        return await colls.get_org_public_collections(
+            org_slug,
+            page_size=pageSize,
+            page=page,
+            sort_by=sortBy,
+            sort_direction=sortDirection,
+        )
+
+    @app.get(
+        "/public/orgs/{org_slug}/collections/{coll_id}",
+        tags=["collections", "public"],
+        response_model=PublicCollOut,
+    )
+    async def get_public_collection(
+        org_slug: str,
+        coll_id: UUID,
+    ):
+        try:
+            org = await colls.orgs.get_org_by_slug(org_slug)
+        # pylint: disable=broad-exception-caught
+        except Exception:
+            # pylint: disable=raise-missing-from
+            raise HTTPException(status_code=404, detail="collection_not_found")
+
+        return await colls.get_public_collection_out(coll_id, org, allow_unlisted=True)
+
+    @app.get(
+        "/public/orgs/{org_slug}/collections/{coll_id}/download",
+        tags=["collections", "public"],
+        response_model=bytes,
+    )
+    async def download_public_collection(
+        org_slug: str,
+        coll_id: UUID,
+    ):
+        try:
+            org = await colls.orgs.get_org_by_slug(org_slug)
+        # pylint: disable=broad-exception-caught
+        except Exception:
+            # pylint: disable=raise-missing-from
+            raise HTTPException(status_code=404, detail="collection_not_found")
+
+        # Make sure collection exists and is public/unlisted
+        coll = await colls.get_collection(coll_id, public_or_unlisted_only=True)
+
+        if coll.allowPublicDownload is False:
+            raise HTTPException(status_code=403, detail="not_allowed")
+
+        return await colls.download_collection(coll_id, org)
+
+    @app.get(
+        "/orgs/{oid}/collections/{coll_id}/urls",
+        tags=["collections"],
+        response_model=PaginatedPageUrlCountResponse,
+    )
+    async def get_collection_url_list(
+        coll_id: UUID,
+        oid: UUID,
+        urlPrefix: Optional[str] = None,
+        pageSize: int = DEFAULT_PAGE_SIZE,
+        page: int = 1,
+    ):
+        """Retrieve paginated list of urls in collection sorted by snapshot count"""
+        pages, total = await colls.list_urls_in_collection(
+            coll_id=coll_id,
+            oid=oid,
+            url_prefix=urlPrefix,
+            page_size=pageSize,
+            page=page,
+        )
+        return paginated_format(pages, total, page, pageSize)
+
+    @app.post(
+        "/orgs/{oid}/collections/{coll_id}/home-url",
+        tags=["collections"],
+        response_model=UpdatedResponse,
+    )
+    async def set_collection_home_url(
+        update: UpdateCollHomeUrl,
+        coll_id: UUID,
+        org: Organization = Depends(org_crawl_dep),
+    ):
+        return await colls.set_home_url(coll_id, update, org)
+
+    @app.put(
+        "/orgs/{oid}/collections/{coll_id}/thumbnail",
+        tags=["collections"],
+        response_model=AddedResponse,
+    )
+    async def upload_thumbnail_stream(
+        request: Request,
+        filename: str,
+        coll_id: UUID,
+        org: Organization = Depends(org_crawl_dep),
+        user: User = Depends(user_dep),
+    ):
+        return await colls.upload_thumbnail_stream(
+            request.stream(), filename, coll_id, org, user
+        )
+
+    @app.delete(
+        "/orgs/{oid}/collections/{coll_id}/thumbnail",
+        tags=["collections"],
+        response_model=DeletedResponse,
+    )
+    async def delete_thumbnail_stream(
+        coll_id: UUID,
+        org: Organization = Depends(org_crawl_dep),
+    ):
+        return await colls.delete_thumbnail(coll_id, org)
 
     return colls
diff --git a/backend/btrixcloud/crawlmanager.py b/backend/btrixcloud/crawlmanager.py
index 7921ca4856..6810929f51 100644
--- a/backend/btrixcloud/crawlmanager.py
+++ b/backend/btrixcloud/crawlmanager.py
@@ -115,8 +115,6 @@ async def run_replica_job(
     async def run_delete_org_job(
         self,
         oid: str,
-        backend_image: str,
-        pull_policy: str,
         existing_job_id: Optional[str] = None,
     ) -> str:
         """run job to delete org and all of its data"""
@@ -127,14 +125,12 @@ async def run_delete_org_job(
             job_id = f"delete-org-{oid}-{secrets.token_hex(5)}"
 
         return await self._run_bg_job_with_ops_classes(
-            oid, backend_image, pull_policy, job_id, job_type=BgJobType.DELETE_ORG.value
+            oid, job_id, job_type=BgJobType.DELETE_ORG.value
         )
 
     async def run_recalculate_org_stats_job(
         self,
         oid: str,
-        backend_image: str,
-        pull_policy: str,
         existing_job_id: Optional[str] = None,
     ) -> str:
         """run job to recalculate storage stats for the org"""
@@ -146,19 +142,32 @@ async def run_recalculate_org_stats_job(
 
         return await self._run_bg_job_with_ops_classes(
             oid,
-            backend_image,
-            pull_policy,
             job_id,
             job_type=BgJobType.RECALCULATE_ORG_STATS.value,
         )
 
-    async def _run_bg_job_with_ops_classes(
+    async def run_re_add_org_pages_job(
         self,
         oid: str,
-        backend_image: str,
-        pull_policy: str,
-        job_id: str,
-        job_type: str,
+        crawl_type: Optional[str] = None,
+        existing_job_id: Optional[str] = None,
+    ) -> str:
+        """run job to recalculate storage stats for the org"""
+
+        if existing_job_id:
+            job_id = existing_job_id
+        else:
+            job_id = f"org-pages-{oid}-{secrets.token_hex(5)}"
+
+        return await self._run_bg_job_with_ops_classes(
+            oid,
+            job_id,
+            job_type=BgJobType.READD_ORG_PAGES.value,
+            crawl_type=crawl_type,
+        )
+
+    async def _run_bg_job_with_ops_classes(
+        self, oid: str, job_id: str, job_type: str, **kwargs
     ) -> str:
         """run background job with access to ops classes"""
 
@@ -166,8 +175,9 @@ async def _run_bg_job_with_ops_classes(
             "id": job_id,
             "oid": oid,
             "job_type": job_type,
-            "backend_image": backend_image,
-            "pull_policy": pull_policy,
+            "backend_image": os.environ.get("BACKEND_IMAGE", ""),
+            "pull_policy": os.environ.get("BACKEND_IMAGE_PULL_POLICY", ""),
+            **kwargs,
         }
 
         data = self.templates.env.get_template("background_job.yaml").render(params)
diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py
index 5a0994fe70..539c408ee6 100644
--- a/backend/btrixcloud/crawls.py
+++ b/backend/btrixcloud/crawls.py
@@ -918,7 +918,7 @@ async def delete_crawl_qa_run_files(
         """delete crawl qa wacz files"""
         qa_run = await self.get_qa_run(crawl_id, qa_run_id, org)
         for file_ in qa_run.files:
-            if not await self.storage_ops.delete_crawl_file_object(org, file_):
+            if not await self.storage_ops.delete_file_object(org, file_):
                 raise HTTPException(status_code=400, detail="file_deletion_error")
             # Not replicating QA run WACZs yet
             # await self.background_job_ops.create_delete_replica_jobs(
diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py
index f453442191..0723258e40 100644
--- a/backend/btrixcloud/db.py
+++ b/backend/btrixcloud/db.py
@@ -17,7 +17,7 @@
 from .migrations import BaseMigration
 
 
-CURR_DB_VERSION = "0036"
+CURR_DB_VERSION = "0037"
 
 
 # ============================================================================
@@ -82,6 +82,7 @@ async def update_and_prepare_db(
     invite_ops,
     storage_ops,
     page_ops,
+    background_job_ops,
     db_inited,
 ):
     """Prepare database for application.
@@ -94,7 +95,7 @@ async def update_and_prepare_db(
     """
     await ping_db(mdb)
     print("Database setup started", flush=True)
-    if await run_db_migrations(mdb, user_manager, page_ops):
+    if await run_db_migrations(mdb, user_manager, background_job_ops, page_ops):
         await drop_indexes(mdb)
     await create_indexes(
         org_ops,
@@ -113,7 +114,7 @@ async def update_and_prepare_db(
 
 
 # ============================================================================
-async def run_db_migrations(mdb, user_manager, page_ops):
+async def run_db_migrations(mdb, user_manager, background_job_ops, page_ops):
     """Run database migrations."""
 
     # if first run, just set version and exit
@@ -145,7 +146,9 @@ async def run_db_migrations(mdb, user_manager, page_ops):
             assert spec.loader
             migration_module = importlib.util.module_from_spec(spec)
             spec.loader.exec_module(migration_module)
-            migration = migration_module.Migration(mdb, page_ops=page_ops)
+            migration = migration_module.Migration(
+                mdb, background_job_ops=background_job_ops, page_ops=page_ops
+            )
             if await migration.run():
                 migrations_run = True
         except ImportError as err:
diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py
index f6b678cb82..927a03dcb8 100644
--- a/backend/btrixcloud/main.py
+++ b/backend/btrixcloud/main.py
@@ -223,7 +223,9 @@ def main() -> None:
         profiles,
     )
 
-    coll_ops = init_collections_api(app, mdb, org_ops, storage_ops, event_webhook_ops)
+    coll_ops = init_collections_api(
+        app, mdb, org_ops, storage_ops, event_webhook_ops, current_active_user
+    )
 
     base_crawl_init = (
         app,
@@ -243,14 +245,15 @@ def main() -> None:
 
     crawls = init_crawls_api(crawl_manager, *base_crawl_init)
 
+    upload_ops = init_uploads_api(*base_crawl_init)
+
     page_ops = init_pages_api(
-        app, mdb, crawls, org_ops, storage_ops, current_active_user
+        app, mdb, crawls, org_ops, storage_ops, background_job_ops, current_active_user
     )
 
     base_crawl_ops.set_page_ops(page_ops)
     crawls.set_page_ops(page_ops)
-
-    init_uploads_api(*base_crawl_init)
+    upload_ops.set_page_ops(page_ops)
 
     org_ops.set_ops(base_crawl_ops, profiles, coll_ops, background_job_ops)
 
@@ -260,6 +263,8 @@ def main() -> None:
 
     crawl_config_ops.set_coll_ops(coll_ops)
 
+    coll_ops.set_page_ops(page_ops)
+
     # run only in first worker
     if run_once_lock("btrix-init-db"):
         asyncio.create_task(
@@ -273,6 +278,7 @@ def main() -> None:
                 invites,
                 storage_ops,
                 page_ops,
+                background_job_ops,
                 db_inited,
             )
         )
diff --git a/backend/btrixcloud/main_bg.py b/backend/btrixcloud/main_bg.py
index 2fba05e53f..709139d8d2 100644
--- a/backend/btrixcloud/main_bg.py
+++ b/backend/btrixcloud/main_bg.py
@@ -12,6 +12,7 @@
 
 job_type = os.environ.get("BG_JOB_TYPE")
 oid = os.environ.get("OID")
+crawl_type = os.environ.get("CRAWL_TYPE")
 
 
 # ============================================================================
@@ -27,7 +28,7 @@ async def main():
         )
         return 1
 
-    (org_ops, _, _, _, _, _, _, _, _, _, user_manager) = init_ops()
+    (org_ops, _, _, _, _, page_ops, coll_ops, _, _, _, _, user_manager) = init_ops()
 
     if not oid:
         print("Org id missing, quitting")
@@ -57,6 +58,17 @@ async def main():
             traceback.print_exc()
             return 1
 
+    if job_type == BgJobType.READD_ORG_PAGES:
+        try:
+            await page_ops.re_add_all_crawl_pages(org, crawl_type=crawl_type)
+            await coll_ops.recalculate_org_collection_dates(org)
+            await coll_ops.recalculate_org_collection_counts_tags(org)
+            return 0
+        # pylint: disable=broad-exception-caught
+        except Exception:
+            traceback.print_exc()
+            return 1
+
     print(f"Provided job type {job_type} not currently supported")
     return 1
 
diff --git a/backend/btrixcloud/main_op.py b/backend/btrixcloud/main_op.py
index a6f6654be3..af7a2d0956 100644
--- a/backend/btrixcloud/main_op.py
+++ b/backend/btrixcloud/main_op.py
@@ -31,6 +31,7 @@ def main():
         crawl_config_ops,
         _,
         crawl_ops,
+        _,
         page_ops,
         coll_ops,
         _,
diff --git a/backend/btrixcloud/migrations/migration_0037_upload_pages.py b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
new file mode 100644
index 0000000000..62bfe98237
--- /dev/null
+++ b/backend/btrixcloud/migrations/migration_0037_upload_pages.py
@@ -0,0 +1,72 @@
+"""
+Migration 0037 -- upload pages
+"""
+
+from uuid import UUID
+
+from btrixcloud.migrations import BaseMigration
+
+
+MIGRATION_VERSION = "0037"
+
+
+class Migration(BaseMigration):
+    """Migration class."""
+
+    # pylint: disable=unused-argument
+    def __init__(self, mdb, **kwargs):
+        super().__init__(mdb, migration_version=MIGRATION_VERSION)
+
+        self.background_job_ops = kwargs.get("background_job_ops")
+        self.page_ops = kwargs.get("page_ops")
+
+    async def org_upload_pages_already_added(self, oid: UUID) -> bool:
+        """Check if upload pages have already been added for this org"""
+        if self.page_ops is None:
+            print(
+                f"page_ops missing, assuming pages need to be added for org {oid}",
+                flush=True,
+            )
+            return False
+
+        mdb_crawls = self.mdb["crawls"]
+        async for upload in mdb_crawls.find({"oid": oid, "type": "upload"}):
+            upload_id = upload["_id"]
+            _, total = await self.page_ops.list_pages(upload_id)
+            if total > 0:
+                return True
+        return False
+
+    async def migrate_up(self):
+        """Perform migration up.
+
+        Start background jobs to parse uploads and add their pages to db
+        """
+        if self.background_job_ops is None:
+            print(
+                "Unable to start background job, missing background_job_ops", flush=True
+            )
+            return
+
+        mdb_orgs = self.mdb["organizations"]
+        async for org in mdb_orgs.find():
+            oid = org["_id"]
+
+            pages_already_added = await self.org_upload_pages_already_added(oid)
+
+            if pages_already_added:
+                print(
+                    f"Skipping org {oid}, upload pages already added to db", flush=True
+                )
+                continue
+
+            try:
+                await self.background_job_ops.create_re_add_org_pages_job(
+                    oid, crawl_type="upload"
+                )
+            # pylint: disable=broad-exception-caught
+            except Exception as err:
+                print(
+                    f"Error starting background job to add upload pges to org {oid}: {err}",
+                    flush=True,
+                )
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
index 93e708c4d0..5517067948 100644
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@@ -5,6 +5,9 @@
 from datetime import datetime
 from enum import Enum, IntEnum
 from uuid import UUID
+import base64
+import hashlib
+import mimetypes
 import os
 
 from typing import Optional, List, Dict, Union, Literal, Any, get_args
@@ -21,6 +24,7 @@
     BeforeValidator,
     TypeAdapter,
 )
+from pathvalidate import sanitize_filename
 
 # from fastapi_users import models as fastapi_users_models
 
@@ -29,6 +33,20 @@
 # crawl scale for constraint
 MAX_CRAWL_SCALE = int(os.environ.get("MAX_CRAWL_SCALE", 3))
 
+# Presign duration must be less than 604800 seconds (one week),
+# so set this one minute short of a week
+PRESIGN_MINUTES_MAX = 10079
+PRESIGN_MINUTES_DEFAULT = PRESIGN_MINUTES_MAX
+
+# Expire duration seconds for presigned urls
+PRESIGN_DURATION_MINUTES = int(
+    os.environ.get("PRESIGN_DURATION_MINUTES") or PRESIGN_MINUTES_DEFAULT
+)
+PRESIGN_DURATION_SECONDS = min(PRESIGN_DURATION_MINUTES, PRESIGN_MINUTES_MAX) * 60
+
+# Minimum part size for file uploads
+MIN_UPLOAD_PART_SIZE = 10000000
+
 # annotated types
 # ============================================================================
 
@@ -779,6 +797,9 @@ class BaseCrawl(CoreCrawlable, BaseMongoModel):
 
     reviewStatus: ReviewStatus = None
 
+    filePageCount: Optional[int] = 0
+    errorPageCount: Optional[int] = 0
+
 
 # ============================================================================
 class CollIdName(BaseModel):
@@ -995,9 +1016,6 @@ class Crawl(BaseCrawl, CrawlConfigCore):
     qa: Optional[QARun] = None
     qaFinished: Optional[Dict[str, QARun]] = {}
 
-    filePageCount: Optional[int] = 0
-    errorPageCount: Optional[int] = 0
-
 
 # ============================================================================
 class CrawlCompleteIn(BaseModel):
@@ -1050,6 +1068,155 @@ class UpdateUpload(UpdateCrawl):
     """Update modal that also includes name"""
 
 
+# ============================================================================
+class FilePreparer:
+    """wrapper to compute digest / name for streaming upload"""
+
+    def __init__(self, prefix, filename):
+        self.upload_size = 0
+        self.upload_hasher = hashlib.sha256()
+        self.upload_name = prefix + self.prepare_filename(filename)
+
+    def add_chunk(self, chunk):
+        """add chunk for file"""
+        self.upload_size += len(chunk)
+        self.upload_hasher.update(chunk)
+
+    def get_crawl_file(self, storage: StorageRef):
+        """get crawl file"""
+        return CrawlFile(
+            filename=self.upload_name,
+            hash=self.upload_hasher.hexdigest(),
+            size=self.upload_size,
+            storage=storage,
+        )
+
+    def prepare_filename(self, filename):
+        """prepare filename by sanitizing and adding extra string
+        to avoid duplicates"""
+        name = sanitize_filename(filename.rsplit("/", 1)[-1])
+        parts = name.split(".")
+        randstr = base64.b32encode(os.urandom(5)).lower()
+        parts[0] += "-" + randstr.decode("utf-8")
+        return ".".join(parts)
+
+
+# ============================================================================
+
+### USER-UPLOADED IMAGES ###
+
+
+# ============================================================================
+class ImageFileOut(BaseModel):
+    """output for user-upload imaged file (conformance to Data Resource Spec)"""
+
+    name: str
+    path: str
+    hash: str
+    size: int
+
+    originalFilename: str
+    mime: str
+    userid: UUID
+    userName: str
+    created: datetime
+
+
+# ============================================================================
+class PublicImageFileOut(BaseModel):
+    """public output for user-upload imaged file (conformance to Data Resource Spec)"""
+
+    name: str
+    path: str
+    hash: str
+    size: int
+
+    mime: str
+
+
+# ============================================================================
+class ImageFile(BaseFile):
+    """User-uploaded image file"""
+
+    originalFilename: str
+    mime: str
+    userid: UUID
+    userName: str
+    created: datetime
+
+    async def get_image_file_out(self, org, storage_ops) -> ImageFileOut:
+        """Get ImageFileOut with new presigned url"""
+        presigned_url = await storage_ops.get_presigned_url(
+            org, self, PRESIGN_DURATION_SECONDS
+        )
+
+        return ImageFileOut(
+            name=self.filename,
+            path=presigned_url or "",
+            hash=self.hash,
+            size=self.size,
+            originalFilename=self.originalFilename,
+            mime=self.mime,
+            userid=self.userid,
+            userName=self.userName,
+            created=self.created,
+        )
+
+    async def get_public_image_file_out(self, org, storage_ops) -> PublicImageFileOut:
+        """Get PublicImageFileOut with new presigned url"""
+        presigned_url = await storage_ops.get_presigned_url(
+            org, self, PRESIGN_DURATION_SECONDS
+        )
+
+        return PublicImageFileOut(
+            name=self.filename,
+            path=presigned_url or "",
+            hash=self.hash,
+            size=self.size,
+            mime=self.mime,
+        )
+
+
+# ============================================================================
+class ImageFilePreparer(FilePreparer):
+    """Wrapper for user image streaming uploads"""
+
+    # pylint: disable=too-many-arguments, too-many-function-args
+
+    def __init__(
+        self,
+        prefix,
+        filename,
+        original_filename: str,
+        user: User,
+        created: datetime,
+    ):
+        super().__init__(prefix, filename)
+
+        self.original_filename = original_filename
+        self.mime, _ = mimetypes.guess_type(original_filename) or ("image/jpeg", None)
+        self.userid = user.id
+        self.user_name = user.name
+        self.created = created
+
+    def get_image_file(
+        self,
+        storage: StorageRef,
+    ) -> ImageFile:
+        """get user-uploaded image file"""
+        return ImageFile(
+            filename=self.upload_name,
+            hash=self.upload_hasher.hexdigest(),
+            size=self.upload_size,
+            storage=storage,
+            originalFilename=self.original_filename,
+            mime=self.mime,
+            userid=self.userid,
+            userName=self.user_name,
+            created=self.created,
+        )
+
+
 # ============================================================================
 
 ### COLLECTIONS ###
@@ -1071,17 +1238,30 @@ class Collection(BaseMongoModel):
     name: str = Field(..., min_length=1)
     oid: UUID
     description: Optional[str] = None
+    caption: Optional[str] = None
     modified: Optional[datetime] = None
 
     crawlCount: Optional[int] = 0
     pageCount: Optional[int] = 0
     totalSize: Optional[int] = 0
 
+    dateEarliest: Optional[datetime] = None
+    dateLatest: Optional[datetime] = None
+
     # Sorted by count, descending
     tags: Optional[List[str]] = []
 
     access: CollAccessType = CollAccessType.PRIVATE
 
+    homeUrl: Optional[AnyHttpUrl] = None
+    homeUrlTs: Optional[datetime] = None
+    homeUrlPageId: Optional[UUID] = None
+
+    thumbnail: Optional[ImageFile] = None
+    defaultThumbnailName: Optional[str] = None
+
+    allowPublicDownload: Optional[bool] = True
+
 
 # ============================================================================
 class CollIn(BaseModel):
@@ -1089,16 +1269,74 @@ class CollIn(BaseModel):
 
     name: str = Field(..., min_length=1)
     description: Optional[str] = None
+    caption: Optional[str] = None
     crawlIds: Optional[List[str]] = []
 
     access: CollAccessType = CollAccessType.PRIVATE
 
+    defaultThumbnailName: Optional[str] = None
+    allowPublicDownload: bool = True
+
 
 # ============================================================================
-class CollOut(Collection):
+class CollOut(BaseMongoModel):
     """Collection output model with annotations."""
 
+    name: str
+    oid: UUID
+    description: Optional[str] = None
+    caption: Optional[str] = None
+    modified: Optional[datetime] = None
+
+    crawlCount: Optional[int] = 0
+    pageCount: Optional[int] = 0
+    totalSize: Optional[int] = 0
+
+    dateEarliest: Optional[datetime] = None
+    dateLatest: Optional[datetime] = None
+
+    # Sorted by count, descending
+    tags: Optional[List[str]] = []
+
+    access: CollAccessType = CollAccessType.PRIVATE
+
+    homeUrl: Optional[AnyHttpUrl] = None
+    homeUrlTs: Optional[datetime] = None
+    homeUrlPageId: Optional[UUID] = None
+
+    resources: List[CrawlFileOut] = []
+    thumbnail: Optional[ImageFileOut] = None
+    defaultThumbnailName: Optional[str] = None
+
+    allowPublicDownload: bool = True
+
+
+# ============================================================================
+class PublicCollOut(BaseMongoModel):
+    """Collection output model with annotations."""
+
+    name: str
+    oid: UUID
+    description: Optional[str] = None
+    caption: Optional[str] = None
+
+    crawlCount: Optional[int] = 0
+    pageCount: Optional[int] = 0
+    totalSize: Optional[int] = 0
+
+    dateEarliest: Optional[datetime] = None
+    dateLatest: Optional[datetime] = None
+
+    access: CollAccessType = CollAccessType.PUBLIC
+
+    homeUrl: Optional[AnyHttpUrl] = None
+    homeUrlTs: Optional[datetime] = None
+
     resources: List[CrawlFileOut] = []
+    thumbnail: Optional[PublicImageFileOut] = None
+    defaultThumbnailName: Optional[str] = None
+
+    allowPublicDownload: bool = True
 
 
 # ============================================================================
@@ -1107,7 +1345,17 @@ class UpdateColl(BaseModel):
 
     name: Optional[str] = None
     description: Optional[str] = None
+    caption: Optional[str] = None
     access: Optional[CollAccessType] = None
+    defaultThumbnailName: Optional[str] = None
+    allowPublicDownload: Optional[bool] = None
+
+
+# ============================================================================
+class UpdateCollHomeUrl(BaseModel):
+    """Update home url for collection"""
+
+    pageId: Optional[UUID] = None
 
 
 # ============================================================================
@@ -1167,7 +1415,7 @@ class OrgPublicCollections(BaseModel):
 
     org: PublicOrgDetails
 
-    collections: List[CollOut] = []
+    collections: List[PublicCollOut] = []
 
 
 # ============================================================================
@@ -2067,6 +2315,7 @@ class BgJobType(str, Enum):
     DELETE_REPLICA = "delete-replica"
     DELETE_ORG = "delete-org"
     RECALCULATE_ORG_STATS = "recalculate-org-stats"
+    READD_ORG_PAGES = "readd-org-pages"
 
 
 # ============================================================================
@@ -2119,6 +2368,14 @@ class RecalculateOrgStatsJob(BackgroundJob):
     type: Literal[BgJobType.RECALCULATE_ORG_STATS] = BgJobType.RECALCULATE_ORG_STATS
 
 
+# ============================================================================
+class ReAddOrgPagesJob(BackgroundJob):
+    """Model for tracking jobs to readd an org's pages"""
+
+    type: Literal[BgJobType.READD_ORG_PAGES] = BgJobType.READD_ORG_PAGES
+    crawl_type: Optional[str] = None
+
+
 # ============================================================================
 # Union of all job types, for response model
 
@@ -2129,6 +2386,7 @@ class RecalculateOrgStatsJob(BackgroundJob):
         BackgroundJob,
         DeleteOrgJob,
         RecalculateOrgStatsJob,
+        ReAddOrgPagesJob,
     ]
 ]
 
@@ -2240,7 +2498,7 @@ class PageWithAllQA(Page):
 class PageOut(Page):
     """Model for pages output, no QA"""
 
-    status: Optional[int] = 200
+    status: int = 200
 
 
 # ============================================================================
@@ -2266,6 +2524,24 @@ class PageNoteUpdatedResponse(BaseModel):
     data: PageNote
 
 
+# ============================================================================
+class PageIdTimestamp(BaseModel):
+    """Simplified model for page info to include in PageUrlCount"""
+
+    pageId: UUID
+    ts: Optional[datetime] = None
+    status: int = 200
+
+
+# ============================================================================
+class PageUrlCount(BaseModel):
+    """Model for counting pages by URL"""
+
+    url: AnyHttpUrl
+    count: int = 0
+    snapshots: List[PageIdTimestamp] = []
+
+
 # ============================================================================
 
 ### GENERIC RESPONSE MODELS ###
@@ -2512,3 +2788,10 @@ class PaginatedUserEmailsResponse(PaginatedResponse):
     """Response model for user emails with org info"""
 
     items: List[UserEmailWithOrgInfo]
+
+
+# ============================================================================
+class PaginatedPageUrlCountResponse(PaginatedResponse):
+    """Response model for page count by url"""
+
+    items: List[PageUrlCount]
diff --git a/backend/btrixcloud/ops.py b/backend/btrixcloud/ops.py
index 23629de2aa..bee24d00c5 100644
--- a/backend/btrixcloud/ops.py
+++ b/backend/btrixcloud/ops.py
@@ -16,6 +16,7 @@
 from .pages import PageOps
 from .profiles import ProfileOps
 from .storages import StorageOps
+from .uploads import UploadOps
 from .users import UserManager
 from .webhooks import EventWebhookOps
 
@@ -26,6 +27,7 @@ def init_ops() -> Tuple[
     CrawlConfigOps,
     BaseCrawlOps,
     CrawlOps,
+    UploadOps,
     PageOps,
     CollectionOps,
     ProfileOps,
@@ -70,7 +72,7 @@ def init_ops() -> Tuple[
 
     coll_ops = CollectionOps(mdb, crawl_manager, org_ops, event_webhook_ops)
 
-    base_crawl_ops = BaseCrawlOps(
+    base_crawl_init = (
         mdb,
         user_manager,
         org_ops,
@@ -81,23 +83,17 @@ def init_ops() -> Tuple[
         background_job_ops,
     )
 
-    crawl_ops = CrawlOps(
-        crawl_manager,
-        mdb,
-        user_manager,
-        org_ops,
-        crawl_config_ops,
-        coll_ops,
-        storage_ops,
-        event_webhook_ops,
-        background_job_ops,
-    )
+    base_crawl_ops = BaseCrawlOps(*base_crawl_init)
 
-    page_ops = PageOps(mdb, crawl_ops, org_ops, storage_ops)
+    crawl_ops = CrawlOps(crawl_manager, *base_crawl_init)
 
-    base_crawl_ops.set_page_ops(page_ops)
+    upload_ops = UploadOps(*base_crawl_init)
 
+    page_ops = PageOps(mdb, crawl_ops, org_ops, storage_ops, background_job_ops)
+
+    base_crawl_ops.set_page_ops(page_ops)
     crawl_ops.set_page_ops(page_ops)
+    upload_ops.set_page_ops(page_ops)
 
     background_job_ops.set_ops(crawl_ops, profile_ops)
 
@@ -109,11 +105,14 @@ def init_ops() -> Tuple[
 
     crawl_config_ops.set_coll_ops(coll_ops)
 
+    coll_ops.set_page_ops(page_ops)
+
     return (
         org_ops,
         crawl_config_ops,
         base_crawl_ops,
         crawl_ops,
+        upload_ops,
         page_ops,
         coll_ops,
         profile_ops,
diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py
index a980567c49..251d959be1 100644
--- a/backend/btrixcloud/pages.py
+++ b/backend/btrixcloud/pages.py
@@ -6,7 +6,7 @@
 from typing import TYPE_CHECKING, Optional, Tuple, List, Dict, Any, Union
 from uuid import UUID, uuid4
 
-from fastapi import Depends, HTTPException
+from fastapi import Depends, HTTPException, Request
 import pymongo
 
 from .models import (
@@ -24,6 +24,7 @@
     PageNoteEdit,
     PageNoteDelete,
     QARunBucketStats,
+    StartedResponse,
     StartedResponseBool,
     UpdatedResponse,
     DeletedResponse,
@@ -34,11 +35,12 @@
 from .utils import str_to_date, str_list_to_bools, dt_now
 
 if TYPE_CHECKING:
+    from .background_jobs import BackgroundJobOps
     from .crawls import CrawlOps
     from .orgs import OrgOps
     from .storages import StorageOps
 else:
-    CrawlOps = StorageOps = OrgOps = object
+    CrawlOps = StorageOps = OrgOps = BackgroundJobOps = object
 
 
 # ============================================================================
@@ -49,18 +51,24 @@ class PageOps:
     crawl_ops: CrawlOps
     org_ops: OrgOps
     storage_ops: StorageOps
+    background_job_ops: BackgroundJobOps
 
-    def __init__(self, mdb, crawl_ops, org_ops, storage_ops):
+    def __init__(self, mdb, crawl_ops, org_ops, storage_ops, background_job_ops):
         self.pages = mdb["pages"]
         self.crawls = mdb["crawls"]
         self.crawl_ops = crawl_ops
         self.org_ops = org_ops
         self.storage_ops = storage_ops
+        self.background_job_ops = background_job_ops
 
     async def init_index(self):
         """init index for pages db collection"""
         await self.pages.create_index([("crawl_id", pymongo.HASHED)])
 
+    async def set_ops(self, background_job_ops: BackgroundJobOps):
+        """Set ops classes as needed"""
+        self.background_job_ops = background_job_ops
+
     async def add_crawl_pages_to_db_from_wacz(self, crawl_id: str, batch_size=100):
         """Add pages to database from WACZ files"""
         pages_buffer: List[Page] = []
@@ -94,9 +102,14 @@ def _get_page_from_dict(
         self, page_dict: Dict[str, Any], crawl_id: str, oid: UUID
     ) -> Page:
         """Return Page object from dict"""
-        page_id = page_dict.get("id")
+        page_id = page_dict.get("id", "")
         if not page_id:
-            print(f'Page {page_dict.get("url")} has no id - assigning UUID', flush=True)
+            page_id = uuid4()
+
+        try:
+            UUID(page_id)
+        except ValueError:
+            page_id = uuid4()
 
         status = page_dict.get("status")
         if not status and page_dict.get("loadState"):
@@ -199,10 +212,7 @@ async def update_crawl_file_and_error_counts(
             inc_query["errorPageCount"] = error_count
 
         await self.crawls.find_one_and_update(
-            {
-                "_id": crawl_id,
-                "type": "crawl",
-            },
+            {"_id": crawl_id},
             {"$inc": inc_query},
         )
 
@@ -554,13 +564,17 @@ async def re_add_crawl_pages(self, crawl_id: str, oid: UUID):
         print(f"Deleted pages for crawl {crawl_id}", flush=True)
         await self.add_crawl_pages_to_db_from_wacz(crawl_id)
 
-    async def re_add_all_crawl_pages(self, oid: UUID):
-        """Re-add pages for all crawls in org"""
-        crawl_ids = await self.crawls.distinct(
-            "_id", {"type": "crawl", "finished": {"$ne": None}}
-        )
+    async def re_add_all_crawl_pages(
+        self, org: Organization, crawl_type: Optional[str] = None
+    ):
+        """Re-add pages for all crawls and uploads in org"""
+        match_query: Dict[str, object] = {"finished": {"$ne": None}}
+        if crawl_type in ("crawl", "upload"):
+            match_query["type"] = crawl_type
+
+        crawl_ids = await self.crawls.distinct("_id", match_query)
         for crawl_id in crawl_ids:
-            await self.re_add_crawl_pages(crawl_id, oid)
+            await self.re_add_crawl_pages(crawl_id, org.id)
 
     async def get_qa_run_aggregate_counts(
         self,
@@ -630,47 +644,102 @@ async def get_qa_run_aggregate_counts(
 
         return sorted(return_data, key=lambda bucket: bucket.lowerBoundary)
 
+    def get_crawl_type_from_pages_route(self, request: Request):
+        """Get crawl type to filter on from request route"""
+        crawl_type = None
+
+        try:
+            route_path = request.scope["route"].path
+            type_path = route_path.split("/")[4]
+
+            if type_path == "uploads":
+                crawl_type = "upload"
+            if type_path == "crawls":
+                crawl_type = "crawl"
+        except (IndexError, AttributeError):
+            pass
+
+        return crawl_type
+
 
 # ============================================================================
 # pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme
-def init_pages_api(app, mdb, crawl_ops, org_ops, storage_ops, user_dep):
+def init_pages_api(
+    app, mdb, crawl_ops, org_ops, storage_ops, background_job_ops, user_dep
+):
     """init pages API"""
     # pylint: disable=invalid-name
 
-    ops = PageOps(mdb, crawl_ops, org_ops, storage_ops)
+    ops = PageOps(mdb, crawl_ops, org_ops, storage_ops, background_job_ops)
 
     org_crawl_dep = org_ops.org_crawl_dep
 
     @app.post(
         "/orgs/{oid}/crawls/all/pages/reAdd",
-        tags=["pages"],
-        response_model=StartedResponseBool,
+        tags=["pages", "crawls"],
+        response_model=StartedResponse,
+    )
+    @app.post(
+        "/orgs/{oid}/uploads/all/pages/reAdd",
+        tags=["pages", "uploads"],
+        response_model=StartedResponse,
+    )
+    @app.post(
+        "/orgs/{oid}/all-crawls/all/pages/reAdd",
+        tags=["pages", "all-crawls"],
+        response_model=StartedResponse,
     )
     async def re_add_all_crawl_pages(
-        org: Organization = Depends(org_crawl_dep), user: User = Depends(user_dep)
+        request: Request,
+        org: Organization = Depends(org_crawl_dep),
+        user: User = Depends(user_dep),
     ):
-        """Re-add pages for all crawls in org (superuser only)"""
+        """Re-add pages for all crawls in org (superuser only, may delete page QA data!)"""
         if not user.is_superuser:
             raise HTTPException(status_code=403, detail="Not Allowed")
 
-        asyncio.create_task(ops.re_add_all_crawl_pages(org.id))
-        return {"started": True}
+        crawl_type = ops.get_crawl_type_from_pages_route(request)
+        job_id = await ops.background_job_ops.create_re_add_org_pages_job(
+            org.id, crawl_type=crawl_type
+        )
+        return {"started": job_id or ""}
 
     @app.post(
         "/orgs/{oid}/crawls/{crawl_id}/pages/reAdd",
-        tags=["pages"],
+        tags=["pages", "crawls"],
+        response_model=StartedResponseBool,
+    )
+    @app.post(
+        "/orgs/{oid}/uploads/{crawl_id}/pages/reAdd",
+        tags=["pages", "uploads"],
+        response_model=StartedResponseBool,
+    )
+    @app.post(
+        "/orgs/{oid}/all-crawls/{crawl_id}/pages/reAdd",
+        tags=["pages", "all-crawls"],
         response_model=StartedResponseBool,
     )
     async def re_add_crawl_pages(
-        crawl_id: str, org: Organization = Depends(org_crawl_dep)
+        crawl_id: str,
+        org: Organization = Depends(org_crawl_dep),
     ):
-        """Re-add pages for crawl"""
+        """Re-add pages for crawl (may delete page QA data!)"""
         asyncio.create_task(ops.re_add_crawl_pages(crawl_id, org.id))
         return {"started": True}
 
     @app.get(
         "/orgs/{oid}/crawls/{crawl_id}/pages/{page_id}",
-        tags=["pages"],
+        tags=["pages", "crawls"],
+        response_model=PageOut,
+    )
+    @app.get(
+        "/orgs/{oid}/uploads/{crawl_id}/pages/{page_id}",
+        tags=["pages", "uploads"],
+        response_model=PageOut,
+    )
+    @app.get(
+        "/orgs/{oid}/all-crawls/{crawl_id}/pages/{page_id}",
+        tags=["pages", "all-crawls"],
         response_model=PageOut,
     )
     async def get_page(
@@ -692,7 +761,7 @@ async def get_page_with_qa(
         page_id: UUID,
         org: Organization = Depends(org_crawl_dep),
     ):
-        """GET single page"""
+        """GET single page with QA details"""
         return await ops.get_page_out(page_id, org.id, crawl_id, qa_run_id=qa_run_id)
 
     @app.patch(
@@ -753,12 +822,22 @@ async def delete_page_notes(
         delete: PageNoteDelete,
         org: Organization = Depends(org_crawl_dep),
     ):
-        """Edit page note"""
+        """Delete page note"""
         return await ops.delete_page_notes(page_id, org.id, delete, crawl_id)
 
     @app.get(
         "/orgs/{oid}/crawls/{crawl_id}/pages",
-        tags=["pages"],
+        tags=["pages", "crawls"],
+        response_model=PaginatedPageOutResponse,
+    )
+    @app.get(
+        "/orgs/{oid}/uploads/{crawl_id}/pages",
+        tags=["pages", "uploads"],
+        response_model=PaginatedPageOutResponse,
+    )
+    @app.get(
+        "/orgs/{oid}/all-crawls/{crawl_id}/pages",
+        tags=["pages", "all-crawls"],
         response_model=PaginatedPageOutResponse,
     )
     async def get_pages_list(
diff --git a/backend/btrixcloud/profiles.py b/backend/btrixcloud/profiles.py
index ab72422472..9b8ae8da8f 100644
--- a/backend/btrixcloud/profiles.py
+++ b/backend/btrixcloud/profiles.py
@@ -426,7 +426,7 @@ async def delete_profile(
 
         # Delete file from storage
         if profile.resource:
-            await self.storage_ops.delete_crawl_file_object(org, profile.resource)
+            await self.storage_ops.delete_file_object(org, profile.resource)
             await self.orgs.inc_org_bytes_stored(
                 org.id, -profile.resource.size, "profile"
             )
diff --git a/backend/btrixcloud/storages.py b/backend/btrixcloud/storages.py
index 50b9557a92..43b6fabcd5 100644
--- a/backend/btrixcloud/storages.py
+++ b/backend/btrixcloud/storages.py
@@ -476,9 +476,7 @@ async def get_presigned_url(
 
         return presigned_url
 
-    async def delete_crawl_file_object(
-        self, org: Organization, crawlfile: BaseFile
-    ) -> bool:
+    async def delete_file_object(self, org: Organization, crawlfile: BaseFile) -> bool:
         """delete crawl file from storage."""
         return await self._delete_file(org, crawlfile.filename, crawlfile.storage)
 
diff --git a/backend/btrixcloud/uploads.py b/backend/btrixcloud/uploads.py
index ded0630719..e95b30427f 100644
--- a/backend/btrixcloud/uploads.py
+++ b/backend/btrixcloud/uploads.py
@@ -1,9 +1,6 @@
 """ handle user uploads into browsertrix """
 
 import uuid
-import hashlib
-import os
-import base64
 from urllib.parse import unquote
 from uuid import UUID
 
@@ -13,7 +10,6 @@
 from fastapi import Depends, UploadFile, File
 from fastapi import HTTPException
 from starlette.requests import Request
-from pathvalidate import sanitize_filename
 
 from .basecrawls import BaseCrawlOps
 from .storages import CHUNK_SIZE
@@ -27,18 +23,16 @@
     Organization,
     PaginatedCrawlOutResponse,
     User,
-    StorageRef,
     UpdatedResponse,
     DeletedResponseQuota,
     AddedResponseIdQuota,
+    FilePreparer,
+    MIN_UPLOAD_PART_SIZE,
 )
 from .pagination import paginated_format, DEFAULT_PAGE_SIZE
 from .utils import dt_now
 
 
-MIN_UPLOAD_PART_SIZE = 10000000
-
-
 # ============================================================================
 class UploadOps(BaseCrawlOps):
     """upload ops"""
@@ -105,9 +99,10 @@ async def stream_iter():
         if prev_upload:
             try:
                 await self._delete_crawl_files(prev_upload, org)
+                await self.page_ops.delete_crawl_pages(prev_upload.id, org.id)
             # pylint: disable=broad-exception-caught
             except Exception as exc:
-                print("replace file deletion failed", exc)
+                print(f"Error handling previous upload: {exc}", flush=True)
 
         return await self._create_upload(
             files, name, description, collections, tags, id_, org, user
@@ -195,6 +190,8 @@ async def _create_upload(
             self.event_webhook_ops.create_upload_finished_notification(crawl_id, org.id)
         )
 
+        asyncio.create_task(self.page_ops.add_crawl_pages_to_db_from_wacz(crawl_id))
+
         await self.orgs.inc_org_bytes_stored(org.id, file_size, "upload")
 
         quota_reached = self.orgs.storage_quota_reached(org)
@@ -224,39 +221,6 @@ async def delete_uploads(
         return {"deleted": True, "storageQuotaReached": quota_reached}
 
 
-# ============================================================================
-class FilePreparer:
-    """wrapper to compute digest / name for streaming upload"""
-
-    def __init__(self, prefix, filename):
-        self.upload_size = 0
-        self.upload_hasher = hashlib.sha256()
-        self.upload_name = prefix + self.prepare_filename(filename)
-
-    def add_chunk(self, chunk):
-        """add chunk for file"""
-        self.upload_size += len(chunk)
-        self.upload_hasher.update(chunk)
-
-    def get_crawl_file(self, storage: StorageRef):
-        """get crawl file"""
-        return CrawlFile(
-            filename=self.upload_name,
-            hash=self.upload_hasher.hexdigest(),
-            size=self.upload_size,
-            storage=storage,
-        )
-
-    def prepare_filename(self, filename):
-        """prepare filename by sanitizing and adding extra string
-        to avoid duplicates"""
-        name = sanitize_filename(filename.rsplit("/", 1)[-1])
-        parts = name.split(".")
-        randstr = base64.b32encode(os.urandom(5)).lower()
-        parts[0] += "-" + randstr.decode("utf-8")
-        return ".".join(parts)
-
-
 # ============================================================================
 class UploadFileReader(BufferedReader):
     """Compute digest on file upload"""
@@ -446,3 +410,5 @@ async def delete_uploads(
         org: Organization = Depends(org_crawl_dep),
     ):
         return await ops.delete_uploads(delete_list, org, user)
+
+    return ops
diff --git a/backend/test/data/thumbnail.jpg b/backend/test/data/thumbnail.jpg
new file mode 100644
index 0000000000..133cfae2b0
Binary files /dev/null and b/backend/test/data/thumbnail.jpg differ
diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
index 4faf42540d..c2999bb5ad 100644
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@@ -1,5 +1,6 @@
 import requests
 import os
+from uuid import uuid4
 
 from zipfile import ZipFile, ZIP_STORED
 from tempfile import TemporaryFile
@@ -12,12 +13,24 @@
 UPDATED_NAME = "Updated tést cöllection"
 SECOND_COLLECTION_NAME = "second-collection"
 DESCRIPTION = "Test description"
+CAPTION = "Short caption"
+UPDATED_CAPTION = "Updated caption"
+
+NON_PUBLIC_COLL_FIELDS = (
+    "modified",
+    "tags",
+    "homeUrlPageId",
+)
+NON_PUBLIC_IMAGE_FIELDS = ("originalFilename", "userid", "userName", "created")
+
 
 _coll_id = None
 _second_coll_id = None
 _public_coll_id = None
+_second_public_coll_id = None
 upload_id = None
 modified = None
+default_org_slug = None
 
 curr_dir = os.path.dirname(os.path.realpath(__file__))
 
@@ -25,12 +38,16 @@
 def test_create_collection(
     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 ):
+    default_thumbnail_name = "default-thumbnail.jpg"
+
     r = requests.post(
         f"{API_PREFIX}/orgs/{default_org_id}/collections",
         headers=crawler_auth_headers,
         json={
             "crawlIds": [crawler_crawl_id],
             "name": COLLECTION_NAME,
+            "caption": CAPTION,
+            "defaultThumbnailName": default_thumbnail_name,
         },
     )
     assert r.status_code == 200
@@ -49,6 +66,29 @@ def test_create_collection(
     assert _coll_id in r.json()["collectionIds"]
     assert r.json()["collections"] == [{"name": COLLECTION_NAME, "id": _coll_id}]
 
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+
+    assert data["id"] == _coll_id
+    assert data["name"] == COLLECTION_NAME
+    assert data["caption"] == CAPTION
+    assert data["crawlCount"] == 1
+    assert data["pageCount"] > 0
+    assert data["totalSize"] > 0
+    modified = data["modified"]
+    assert modified
+    assert modified.endswith("Z")
+
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
+
+    assert data["defaultThumbnailName"] == default_thumbnail_name
+    assert data["allowPublicDownload"]
+
 
 def test_create_public_collection(
     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
@@ -59,7 +99,9 @@ def test_create_public_collection(
         json={
             "crawlIds": [crawler_crawl_id],
             "name": PUBLIC_COLLECTION_NAME,
+            "caption": CAPTION,
             "access": "public",
+            "allowPublicDownload": False,
         },
     )
     assert r.status_code == 200
@@ -115,6 +157,7 @@ def test_update_collection(
         headers=crawler_auth_headers,
         json={
             "description": DESCRIPTION,
+            "caption": UPDATED_CAPTION,
         },
     )
     assert r.status_code == 200
@@ -130,6 +173,7 @@ def test_update_collection(
     assert data["id"] == _coll_id
     assert data["name"] == COLLECTION_NAME
     assert data["description"] == DESCRIPTION
+    assert data["caption"] == UPDATED_CAPTION
     assert data["crawlCount"] == 1
     assert data["pageCount"] > 0
     assert data["totalSize"] > 0
@@ -137,6 +181,9 @@ def test_update_collection(
     modified = data["modified"]
     assert modified
     assert modified.endswith("Z")
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
+    assert data["defaultThumbnailName"]
 
 
 def test_rename_collection(
@@ -211,6 +258,8 @@ def test_add_remove_crawl_from_collection(
     assert data["totalSize"] > 0
     assert data["modified"] >= modified
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
 
     # Verify it was added
     r = requests.get(
@@ -233,6 +282,8 @@ def test_add_remove_crawl_from_collection(
     assert data["totalSize"] == 0
     assert data["modified"] >= modified
     assert data.get("tags", []) == []
+    assert data.get("dateEarliest") is None
+    assert data.get("dateLatest") is None
 
     # Verify they were removed
     r = requests.get(
@@ -261,6 +312,8 @@ def test_add_remove_crawl_from_collection(
     assert data["totalSize"] > 0
     assert data["modified"] >= modified
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
 
 
 def test_get_collection(crawler_auth_headers, default_org_id):
@@ -274,11 +327,15 @@ def test_get_collection(crawler_auth_headers, default_org_id):
     assert data["name"] == UPDATED_NAME
     assert data["oid"] == default_org_id
     assert data["description"] == DESCRIPTION
+    assert data["caption"] == UPDATED_CAPTION
     assert data["crawlCount"] == 2
     assert data["pageCount"] > 0
     assert data["totalSize"] > 0
     assert data["modified"] >= modified
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
+    assert data["defaultThumbnailName"]
 
 
 def test_get_collection_replay(crawler_auth_headers, default_org_id):
@@ -292,11 +349,15 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id):
     assert data["name"] == UPDATED_NAME
     assert data["oid"] == default_org_id
     assert data["description"] == DESCRIPTION
+    assert data["caption"] == UPDATED_CAPTION
     assert data["crawlCount"] == 2
     assert data["pageCount"] > 0
     assert data["totalSize"] > 0
     assert data["modified"] >= modified
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
+    assert data["defaultThumbnailName"]
 
     resources = data["resources"]
     assert resources
@@ -413,6 +474,9 @@ def test_add_upload_to_collection(crawler_auth_headers, default_org_id):
     assert data["totalSize"] > 0
     assert data["modified"]
     assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
+    assert data["defaultThumbnailName"]
 
     # Verify it was added
     r = requests.get(
@@ -459,16 +523,20 @@ def test_list_collections(
     assert len(items) == 3
 
     first_coll = [coll for coll in items if coll["name"] == UPDATED_NAME][0]
-    assert first_coll["id"]
+    assert first_coll["id"] == _coll_id
     assert first_coll["name"] == UPDATED_NAME
     assert first_coll["oid"] == default_org_id
     assert first_coll["description"] == DESCRIPTION
+    assert first_coll["caption"] == UPDATED_CAPTION
     assert first_coll["crawlCount"] == 3
     assert first_coll["pageCount"] > 0
     assert first_coll["totalSize"] > 0
     assert first_coll["modified"]
     assert first_coll["tags"] == ["wr-test-2", "wr-test-1"]
     assert first_coll["access"] == "private"
+    assert first_coll["dateEarliest"]
+    assert first_coll["dateLatest"]
+    assert first_coll["defaultThumbnailName"]
 
     second_coll = [coll for coll in items if coll["name"] == SECOND_COLLECTION_NAME][0]
     assert second_coll["id"]
@@ -481,6 +549,8 @@ def test_list_collections(
     assert second_coll["modified"]
     assert second_coll["tags"] == ["wr-test-2"]
     assert second_coll["access"] == "private"
+    assert second_coll["dateEarliest"]
+    assert second_coll["dateLatest"]
 
 
 def test_remove_upload_from_collection(crawler_auth_headers, default_org_id):
@@ -742,11 +812,14 @@ def test_list_public_collections(
         json={
             "crawlIds": [crawler_crawl_id],
             "name": "Second public collection",
+            "description": "Lorem ipsum",
             "access": "public",
         },
     )
     assert r.status_code == 200
-    second_public_coll_id = r.json()["id"]
+
+    global _second_public_coll_id
+    _second_public_coll_id = r.json()["id"]
 
     # Get default org slug
     r = requests.get(
@@ -755,7 +828,10 @@ def test_list_public_collections(
     )
     assert r.status_code == 200
     data = r.json()
-    org_slug = data["slug"]
+
+    global default_org_slug
+    default_org_slug = data["slug"]
+
     org_name = data["name"]
 
     # Verify that public profile isn't enabled
@@ -764,7 +840,7 @@ def test_list_public_collections(
     assert data["publicUrl"] == ""
 
     # Try listing public collections without org public profile enabled
-    r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}")
+    r = requests.get(f"{API_PREFIX}/public/orgs/{default_org_slug}/collections")
     assert r.status_code == 404
     assert r.json()["detail"] == "public_profile_not_found"
 
@@ -795,7 +871,7 @@ def test_list_public_collections(
     assert data["publicUrl"] == public_url
 
     # List public collections with no auth (no public profile)
-    r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}")
+    r = requests.get(f"{API_PREFIX}/public/orgs/{default_org_slug}/collections")
     assert r.status_code == 200
     data = r.json()
 
@@ -807,12 +883,19 @@ def test_list_public_collections(
     collections = data["collections"]
     assert len(collections) == 2
     for collection in collections:
-        assert collection["id"] in (_public_coll_id, second_public_coll_id)
+        assert collection["id"] in (_public_coll_id, _second_public_coll_id)
+        assert collection["oid"]
         assert collection["access"] == "public"
+        assert collection["name"]
+        assert collection["dateEarliest"]
+        assert collection["dateLatest"]
+        assert collection["crawlCount"] > 0
+        assert collection["pageCount"] > 0
+        assert collection["totalSize"] > 0
 
     # Test non-existing slug - it should return a 404 but not reveal
     # whether or not an org exists with that slug
-    r = requests.get(f"{API_PREFIX}/public-collections/nonexistentslug")
+    r = requests.get(f"{API_PREFIX}/public/orgs/nonexistentslug/collections")
     assert r.status_code == 404
     assert r.json()["detail"] == "public_profile_not_found"
 
@@ -820,7 +903,7 @@ def test_list_public_collections(
 def test_list_public_collections_no_colls(non_default_org_id, admin_auth_headers):
     # Test existing org that's not public - should return same 404 as
     # if org doesn't exist
-    r = requests.get(f"{API_PREFIX}/public-collections/{NON_DEFAULT_ORG_SLUG}")
+    r = requests.get(f"{API_PREFIX}/public/orgs/{NON_DEFAULT_ORG_SLUG}/collections")
     assert r.status_code == 404
     assert r.json()["detail"] == "public_profile_not_found"
 
@@ -837,13 +920,459 @@ def test_list_public_collections_no_colls(non_default_org_id, admin_auth_headers
 
     # List public collections with no auth - should still get profile even
     # with no public collections
-    r = requests.get(f"{API_PREFIX}/public-collections/{NON_DEFAULT_ORG_SLUG}")
+    r = requests.get(f"{API_PREFIX}/public/orgs/{NON_DEFAULT_ORG_SLUG}/collections")
     assert r.status_code == 200
     data = r.json()
     assert data["org"]["name"] == NON_DEFAULT_ORG_NAME
     assert data["collections"] == []
 
 
+def test_set_collection_home_url(
+    crawler_auth_headers, default_org_id, crawler_crawl_id
+):
+    # Get a page id from crawler_crawl_id
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data["total"] >= 1
+
+    page = data["items"][0]
+    assert page
+
+    page_id = page["id"]
+    assert page_id
+
+    page_url = page["url"]
+    page_ts = page["ts"]
+
+    # Set page as home url
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/home-url",
+        headers=crawler_auth_headers,
+        json={"pageId": page_id},
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    # Check that fields were set in collection as expected
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data["homeUrl"] == page_url
+    assert data["homeUrlTs"] == page_ts
+    assert data["homeUrlPageId"] == page_id
+
+
+def test_collection_url_list(crawler_auth_headers, default_org_id):
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/urls",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+
+    assert data["total"] >= 1
+    urls = data["items"]
+    assert urls
+
+    for url in urls:
+        assert url["url"]
+        assert url["count"] >= 1
+
+        snapshots = url["snapshots"]
+        assert snapshots
+
+        for snapshot in snapshots:
+            assert snapshot["pageId"]
+            assert snapshot["ts"]
+            assert snapshot["status"]
+
+
+def test_upload_collection_thumbnail(crawler_auth_headers, default_org_id):
+    with open(os.path.join(curr_dir, "data", "thumbnail.jpg"), "rb") as fh:
+        r = requests.put(
+            f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/thumbnail?filename=thumbnail.jpg",
+            headers=crawler_auth_headers,
+            data=read_in_chunks(fh),
+        )
+        assert r.status_code == 200
+        assert r.json()["added"]
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    thumbnail = r.json()["thumbnail"]
+
+    assert thumbnail["name"]
+    assert thumbnail["path"]
+    assert thumbnail["hash"]
+    assert thumbnail["size"] > 0
+
+    assert thumbnail["originalFilename"] == "thumbnail.jpg"
+    assert thumbnail["mime"] == "image/jpeg"
+    assert thumbnail["userid"]
+    assert thumbnail["userName"]
+    assert thumbnail["created"]
+
+
+def test_set_collection_default_thumbnail(crawler_auth_headers, default_org_id):
+    default_thumbnail_name = "orange-default.avif"
+
+    r = requests.patch(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_public_coll_id}",
+        headers=crawler_auth_headers,
+        json={"defaultThumbnailName": default_thumbnail_name},
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+
+    assert data["id"] == _second_public_coll_id
+    assert data["defaultThumbnailName"] == default_thumbnail_name
+
+
+def test_list_public_colls_home_url_thumbnail():
+    # Check we get expected data for each public collection
+    # and nothing we don't expect
+    non_public_fields = (
+        "oid",
+        "modified",
+        "crawlCount",
+        "pageCount",
+        "totalSize",
+        "tags",
+        "access",
+        "homeUrlPageId",
+    )
+    non_public_image_fields = ("originalFilename", "userid", "userName", "created")
+
+    r = requests.get(f"{API_PREFIX}/public/orgs/{default_org_slug}/collections")
+    assert r.status_code == 200
+    collections = r.json()["collections"]
+    assert len(collections) == 2
+
+    for coll in collections:
+        assert coll["id"] in (_public_coll_id, _second_public_coll_id)
+        assert coll["oid"]
+        assert coll["access"] == "public"
+        assert coll["name"]
+        assert coll["resources"]
+        assert coll["dateEarliest"]
+        assert coll["dateLatest"]
+        assert coll["crawlCount"] > 0
+        assert coll["pageCount"] > 0
+        assert coll["totalSize"] > 0
+
+        for field in NON_PUBLIC_COLL_FIELDS:
+            assert field not in coll
+
+        if coll["id"] == _public_coll_id:
+            assert coll["allowPublicDownload"] is False
+
+            assert coll["caption"] == CAPTION
+
+            assert coll["homeUrl"]
+            assert coll["homeUrlTs"]
+
+            thumbnail = coll["thumbnail"]
+            assert thumbnail
+
+            assert thumbnail["name"]
+            assert thumbnail["path"]
+            assert thumbnail["hash"]
+            assert thumbnail["size"]
+            assert thumbnail["mime"]
+
+            for field in NON_PUBLIC_IMAGE_FIELDS:
+                assert field not in thumbnail
+
+        if coll["id"] == _second_public_coll_id:
+            assert coll["description"]
+            assert coll["defaultThumbnailName"] == "orange-default.avif"
+            assert coll["allowPublicDownload"]
+
+
+def test_get_public_collection(default_org_id):
+    r = requests.get(
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_public_coll_id}"
+    )
+    assert r.status_code == 200
+    coll = r.json()
+
+    assert coll["id"] == _public_coll_id
+    assert coll["oid"] == default_org_id
+    assert coll["access"] == "public"
+    assert coll["name"]
+    assert coll["resources"]
+    assert coll["dateEarliest"]
+    assert coll["dateLatest"]
+    assert coll["crawlCount"] > 0
+    assert coll["pageCount"] > 0
+    assert coll["totalSize"] > 0
+
+    for field in NON_PUBLIC_COLL_FIELDS:
+        assert field not in coll
+
+    assert coll["caption"] == CAPTION
+
+    assert coll["homeUrl"]
+    assert coll["homeUrlTs"]
+
+    assert coll["allowPublicDownload"] is False
+
+    thumbnail = coll["thumbnail"]
+    assert thumbnail
+
+    assert thumbnail["name"]
+    assert thumbnail["path"]
+    assert thumbnail["hash"]
+    assert thumbnail["size"]
+    assert thumbnail["mime"]
+
+    for field in NON_PUBLIC_IMAGE_FIELDS:
+        assert field not in thumbnail
+
+    # Invalid org slug - don't reveal whether org exists or not, use
+    # same exception as if collection doesn't exist
+    r = requests.get(
+        f"{API_PREFIX}/public/orgs/doesntexist/collections/{_public_coll_id}"
+    )
+    assert r.status_code == 404
+    assert r.json()["detail"] == "collection_not_found"
+
+    # Invalid collection id
+    random_uuid = uuid4()
+    r = requests.get(
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{random_uuid}"
+    )
+    assert r.status_code == 404
+    assert r.json()["detail"] == "collection_not_found"
+
+    # Collection isn't public
+    r = requests.get(
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{ _coll_id}"
+    )
+    assert r.status_code == 404
+    assert r.json()["detail"] == "collection_not_found"
+
+
+def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id):
+    # Make second public coll unlisted
+    r = requests.patch(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_public_coll_id}",
+        headers=crawler_auth_headers,
+        json={
+            "access": "unlisted",
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    # Verify single public collection GET endpoint works for unlisted collection
+    r = requests.get(
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_second_public_coll_id}"
+    )
+    assert r.status_code == 200
+    coll = r.json()
+
+    assert coll["id"] == _second_public_coll_id
+    assert coll["oid"] == default_org_id
+    assert coll["access"] == "unlisted"
+    assert coll["name"]
+    assert coll["resources"]
+    assert coll["dateEarliest"]
+    assert coll["dateLatest"]
+    assert coll["crawlCount"] > 0
+    assert coll["pageCount"] > 0
+    assert coll["totalSize"] > 0
+    assert coll["defaultThumbnailName"] == "orange-default.avif"
+    assert coll["allowPublicDownload"]
+
+    for field in NON_PUBLIC_COLL_FIELDS:
+        assert field not in coll
+
+
+def test_get_public_collection_unlisted_org_profile_disabled(
+    admin_auth_headers, default_org_id
+):
+    # Disable org profile
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/public-profile",
+        headers=admin_auth_headers,
+        json={
+            "enablePublicProfile": False,
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    # Verify we can still get public details for unlisted collection
+    r = requests.get(
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_second_public_coll_id}"
+    )
+    assert r.status_code == 200
+    coll = r.json()
+
+    assert coll["id"] == _second_public_coll_id
+    assert coll["oid"] == default_org_id
+    assert coll["access"] == "unlisted"
+    assert coll["name"]
+    assert coll["resources"]
+    assert coll["dateEarliest"]
+    assert coll["dateLatest"]
+    assert coll["crawlCount"] > 0
+    assert coll["pageCount"] > 0
+    assert coll["totalSize"] > 0
+    assert coll["defaultThumbnailName"] == "orange-default.avif"
+    assert coll["allowPublicDownload"]
+
+    for field in NON_PUBLIC_COLL_FIELDS:
+        assert field not in coll
+
+    # Re-enable org profile
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/public-profile",
+        headers=admin_auth_headers,
+        json={
+            "enablePublicProfile": True,
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+
+def test_delete_thumbnail(crawler_auth_headers, default_org_id):
+    r = requests.delete(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/thumbnail",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    assert r.json()["deleted"]
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    assert r.json().get("thumbnail") is None
+
+    r = requests.delete(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_public_coll_id}/thumbnail",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 404
+    assert r.json()["detail"] == "thumbnail_not_found"
+
+
+def test_unset_collection_home_url(
+    crawler_auth_headers, default_org_id, crawler_crawl_id
+):
+    # Unset home url
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}/home-url",
+        headers=crawler_auth_headers,
+        json={"pageId": None},
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    # Check that fields were set in collection as expected
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data.get("homeUrl") is None
+    assert data.get("homeUrlTs") is None
+    assert data.get("homeUrlPageId") is None
+
+
+def test_download_streaming_public_collection(crawler_auth_headers, default_org_id):
+    # Check that download is blocked if allowPublicDownload is False
+    with requests.get(
+        f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_public_coll_id}/download",
+        stream=True,
+    ) as r:
+        assert r.status_code == 403
+
+    # Set allowPublicDownload to True and then check downloading works
+    r = requests.patch(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_public_coll_id}",
+        headers=crawler_auth_headers,
+        json={
+            "allowPublicDownload": True,
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    with TemporaryFile() as fh:
+        with requests.get(
+            f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_public_coll_id}/download",
+            stream=True,
+        ) as r:
+            assert r.status_code == 200
+            for chunk in r.iter_content():
+                fh.write(chunk)
+
+        fh.seek(0)
+        with ZipFile(fh, "r") as zip_file:
+            contents = zip_file.namelist()
+
+            assert len(contents) == 2
+            for filename in contents:
+                assert filename.endswith(".wacz") or filename == "datapackage.json"
+                assert zip_file.getinfo(filename).compress_type == ZIP_STORED
+
+
+def test_download_streaming_public_collection_profile_disabled(
+    admin_auth_headers, default_org_id
+):
+    # Disable org public profile and ensure download still works for public collection
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/public-profile",
+        headers=admin_auth_headers,
+        json={
+            "enablePublicProfile": False,
+        },
+    )
+    assert r.status_code == 200
+    assert r.json()["updated"]
+
+    with TemporaryFile() as fh:
+        with requests.get(
+            f"{API_PREFIX}/public/orgs/{default_org_slug}/collections/{_public_coll_id}/download",
+            stream=True,
+        ) as r:
+            assert r.status_code == 200
+            for chunk in r.iter_content():
+                fh.write(chunk)
+
+        fh.seek(0)
+        with ZipFile(fh, "r") as zip_file:
+            contents = zip_file.namelist()
+
+            assert len(contents) == 2
+            for filename in contents:
+                assert filename.endswith(".wacz") or filename == "datapackage.json"
+                assert zip_file.getinfo(filename).compress_type == ZIP_STORED
+
+
 def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
     # Delete second collection
     r = requests.delete(
diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py
index fb7543d0a2..3fb1c1c44b 100644
--- a/backend/test/test_uploads.py
+++ b/backend/test/test_uploads.py
@@ -232,6 +232,49 @@ def test_get_upload_replay_json_admin(
     assert "files" not in data
 
 
+def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id):
+    # Give time for pages to finish being uploaded
+    time.sleep(10)
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/pages",
+        headers=admin_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+
+    assert data["total"] > 0
+
+    pages = data["items"]
+    for page in pages:
+        assert page["id"]
+        assert page["oid"]
+        assert page["crawl_id"] == upload_id
+        assert page["url"]
+        assert page["ts"]
+        assert page.get("title") or page.get("title") is None
+
+    page_id = pages[0]["id"]
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/pages/{page_id}",
+        headers=admin_auth_headers,
+    )
+    assert r.status_code == 200
+    page = r.json()
+
+    assert page["id"] == page_id
+    assert page["oid"]
+    assert page["crawl_id"]
+    assert page["url"]
+    assert page["ts"]
+    assert page.get("title") or page.get("title") is None
+
+    assert page["notes"] == []
+    assert page.get("userid") is None
+    assert page.get("modified") is None
+    assert page.get("approved") is None
+
+
 def test_replace_upload(
     admin_auth_headers, default_org_id, uploads_collection_id, upload_id
 ):
diff --git a/chart/app-templates/background_job.yaml b/chart/app-templates/background_job.yaml
index 132d3bf8fe..f47dd2acfd 100644
--- a/chart/app-templates/background_job.yaml
+++ b/chart/app-templates/background_job.yaml
@@ -8,7 +8,7 @@ metadata:
     btrix.org: {{ oid }}
 
 spec:
-  ttlSecondsAfterFinished: 0
+  ttlSecondsAfterFinished: 90
   backoffLimit: 3
   template:
     spec:
@@ -38,6 +38,9 @@ spec:
           - name: OID
             value: {{ oid }}
 
+          - name: CRAWL_TYPE
+            value: {{ crawl_type }}
+
           envFrom:
             - configMapRef:
                 name: backend-env-config