diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py index 504830f068..bd889be25a 100644 --- a/backend/btrixcloud/db.py +++ b/backend/btrixcloud/db.py @@ -17,7 +17,7 @@ from .migrations import BaseMigration -CURR_DB_VERSION = "0039" +CURR_DB_VERSION = "0040" # ============================================================================ diff --git a/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py b/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py new file mode 100644 index 0000000000..2f72fc39bd --- /dev/null +++ b/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py @@ -0,0 +1,43 @@ +""" +Migration 0040 -- archived item pageCount +""" + +from btrixcloud.migrations import BaseMigration + + +MIGRATION_VERSION = "0040" + + +class Migration(BaseMigration): + """Migration class.""" + + # pylint: disable=unused-argument + def __init__(self, mdb, **kwargs): + super().__init__(mdb, migration_version=MIGRATION_VERSION) + + self.page_ops = kwargs.get("page_ops") + + async def migrate_up(self): + """Perform migration up. + + Calculate and store pageCount for archived items that don't have it yet + """ + crawls_mdb = self.mdb["crawls"] + + if self.page_ops is None: + print( + "Unable to set pageCount for archived items, missing page_ops", + flush=True, + ) + return + + async for crawl_raw in crawls_mdb.find({"pageCount": None}): + crawl_id = crawl_raw["_id"] + try: + await self.page_ops.set_archived_item_page_count(crawl_id) + # pylint: disable=broad-exception-caught + except Exception as err: + print( + f"Error saving pageCount for archived item {crawl_id}: {err}", + flush=True, + ) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 02184b6fa3..9bcd7557c3 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -797,6 +797,8 @@ class BaseCrawl(CoreCrawlable, BaseMongoModel): reviewStatus: ReviewStatus = None + pageCount: Optional[int] = 0 + filePageCount: Optional[int] = 0 errorPageCount: Optional[int] = 0 @@ -872,6 +874,7 @@ class CrawlOut(BaseMongoModel): lastQAState: Optional[str] = None lastQAStarted: Optional[datetime] = None + pageCount: Optional[int] = 0 filePageCount: Optional[int] = 0 errorPageCount: Optional[int] = 0 @@ -1914,6 +1917,8 @@ class OrgMetrics(BaseModel): crawlCount: int uploadCount: int pageCount: int + crawlPageCount: int + uploadPageCount: int profileCount: int workflowsRunningCount: int maxConcurrentCrawls: int diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index 737397fbf5..bad9fa49f8 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -1503,6 +1503,7 @@ async def do_crawl_finished_tasks( ) if state in SUCCESSFUL_STATES and crawl.oid: + await self.page_ops.set_archived_item_page_count(crawl.id) await self.org_ops.inc_org_bytes_stored( crawl.oid, status.filesAddedSize, "crawl" ) diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 318cf6e2fd..3486cc29a4 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -939,7 +939,10 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: archived_item_count = 0 crawl_count = 0 upload_count = 0 + page_count = 0 + crawl_page_count = 0 + upload_page_count = 0 async for item_data in self.crawls_db.find({"oid": org.id}): item = BaseCrawl.from_dict(item_data) @@ -948,10 +951,12 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: archived_item_count += 1 if item.type == "crawl": crawl_count += 1 + crawl_page_count += item.pageCount or 0 if item.type == "upload": upload_count += 1 - if item.stats: - page_count += item.stats.done + upload_page_count += item.pageCount or 0 + if item.pageCount: + page_count += item.pageCount profile_count = await self.profiles_db.count_documents({"oid": org.id}) workflows_running_count = await self.crawls_db.count_documents( @@ -975,6 +980,8 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: "crawlCount": crawl_count, "uploadCount": upload_count, "pageCount": page_count, + "crawlPageCount": crawl_page_count, + "uploadPageCount": upload_page_count, "profileCount": profile_count, "workflowsRunningCount": workflows_running_count, "maxConcurrentCrawls": max_concurrent_crawls, diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py index 251d959be1..f796d94a49 100644 --- a/backend/btrixcloud/pages.py +++ b/backend/btrixcloud/pages.py @@ -92,6 +92,8 @@ async def add_crawl_pages_to_db_from_wacz(self, crawl_id: str, batch_size=100): if pages_buffer: await self._add_pages_to_db(crawl_id, pages_buffer) + await self.set_archived_item_page_count(crawl_id) + print(f"Added pages for crawl {crawl_id} to db", flush=True) # pylint: disable=broad-exception-caught, raise-missing-from except Exception as err: @@ -661,6 +663,14 @@ def get_crawl_type_from_pages_route(self, request: Request): return crawl_type + async def set_archived_item_page_count(self, crawl_id: str): + """Store archived item page count in crawl document""" + _, page_count = await self.list_pages(crawl_id) + + await self.crawls.find_one_and_update( + {"_id": crawl_id}, {"$set": {"pageCount": page_count}} + ) + # ============================================================================ # pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index 36f0237f7d..26f5574681 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -877,6 +877,14 @@ def test_re_add_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_ ) assert r.status_code == 403 + # Check that pageCount was stored on crawl + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + assert r.json()["pageCount"] > 0 + def test_crawl_page_notes(crawler_auth_headers, default_org_id, crawler_crawl_id): note_text = "testing" diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index 3fb1c1c44b..5a55b36c5c 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -274,6 +274,14 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id): assert page.get("modified") is None assert page.get("approved") is None + # Check that pageCount was stored on upload + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + assert r.json()["pageCount"] > 0 + def test_replace_upload( admin_auth_headers, default_org_id, uploads_collection_id, upload_id diff --git a/frontend/src/features/archived-items/archived-item-list.ts b/frontend/src/features/archived-items/archived-item-list.ts index 858d0360e8..eab15f1bb2 100644 --- a/frontend/src/features/archived-items/archived-item-list.ts +++ b/frontend/src/features/archived-items/archived-item-list.ts @@ -252,7 +252,24 @@ export class ArchivedItemListItem extends BtrixElement { ${isUpload - ? notApplicable + ? html` +
+ ${this.localize.number( + this.item.pageCount ? +this.item.pageCount : 0, + { + notation: "compact", + }, + )} +
+
` : html` ${this.safeRender((crawl) => { - const pagesComplete = +(crawl.stats?.done || 0); const pagesFound = +(crawl.stats?.found || 0); if (crawl.finished) { + const pagesComplete = crawl.pageCount ? +crawl.pageCount : 0; return `${this.localize.number(pagesComplete, { notation: "compact" })} ${pluralOf("pages", pagesComplete)}`; } + const pagesComplete = +(crawl.stats?.done || 0); return `${this.localize.number(pagesComplete, { notation: "compact" })} / ${this.localize.number(pagesFound, { notation: "compact" })} ${pluralOf("pages", pagesFound)}`; })} diff --git a/frontend/src/pages/org/archived-item-detail/archived-item-detail.ts b/frontend/src/pages/org/archived-item-detail/archived-item-detail.ts index 5f64d806c3..49a201af3f 100644 --- a/frontend/src/pages/org/archived-item-detail/archived-item-detail.ts +++ b/frontend/src/pages/org/archived-item-detail/archived-item-detail.ts @@ -859,7 +859,7 @@ export class ArchivedItemDetail extends BtrixElement { ? html`${this.item.fileSize ? html`${this.localize.bytes(this.item.fileSize || 0, { unitDisplay: "narrow", - })}${this.item.stats + })}${this.item.stats?.done ? html`,` - : ""}` + : html`, + ${this.localize.number( + this.item.pageCount ? +this.item.pageCount : 0, + )} + + ${pluralOf( + "pages", + this.item.pageCount ? +this.item.pageCount : 0, + )}`}` : html`${msg("Unknown")}`}` : html``} diff --git a/frontend/src/pages/org/dashboard.ts b/frontend/src/pages/org/dashboard.ts index 1e22e677cd..56d17f8b39 100644 --- a/frontend/src/pages/org/dashboard.ts +++ b/frontend/src/pages/org/dashboard.ts @@ -28,6 +28,8 @@ type Metrics = { crawlCount: number; uploadCount: number; pageCount: number; + crawlPageCount: number; + uploadPageCount: number; profileCount: number; workflowsRunningCount: number; maxConcurrentCrawls: number; @@ -236,10 +238,31 @@ export class Dashboard extends BtrixElement { pluralLabel: msg("Crawl Workflows Waiting"), iconProps: { name: "hourglass-split", color: "violet" }, })} + ${this.renderStat({ - value: metrics.pageCount, + value: metrics.crawlPageCount, singleLabel: msg("Page Crawled"), pluralLabel: msg("Pages Crawled"), + iconProps: { + name: "file-richtext-fill", + color: this.colors.crawls, + }, + })} + ${this.renderStat({ + value: metrics.uploadPageCount, + singleLabel: msg("Page Uploaded"), + pluralLabel: msg("Pages Uploaded"), + iconProps: { + name: "file-richtext-fill", + color: this.colors.uploads, + }, + })} + ${this.renderStat({ + value: metrics.pageCount, + singleLabel: msg("Page Total"), + pluralLabel: msg("Pages Total"), iconProps: { name: "file-richtext-fill" }, })} diff --git a/frontend/src/types/crawler.ts b/frontend/src/types/crawler.ts index 5ec383204c..95edaba426 100644 --- a/frontend/src/types/crawler.ts +++ b/frontend/src/types/crawler.ts @@ -164,6 +164,7 @@ type ArchivedItemBase = { activeQAStats: { done: number; found: number } | null; lastQAState: CrawlState | null; lastQAStarted: string | null; + pageCount?: number; filePageCount?: number; errorPageCount?: number; }; diff --git a/frontend/webpack.config.js b/frontend/webpack.config.js index aba2ffacc3..999abe87aa 100644 --- a/frontend/webpack.config.js +++ b/frontend/webpack.config.js @@ -5,7 +5,6 @@ const childProcess = require("child_process"); const fs = require("fs"); const path = require("path"); - const CopyPlugin = require("copy-webpack-plugin"); const ForkTsCheckerWebpackPlugin = require("fork-ts-checker-webpack-plugin"); const HtmlWebpackPlugin = require("html-webpack-plugin"); diff --git a/frontend/xliff/de.xlf b/frontend/xliff/de.xlf index 12ec9a718c..4baf473ed0 100644 --- a/frontend/xliff/de.xlf +++ b/frontend/xliff/de.xlf @@ -1597,18 +1597,6 @@ Are you sure you want to delete ? - - Start building your Collection. - - - No Collections Found - - - Organize your crawls into a Collection to easily replay them together. - - - Your organization doesn't have any Collections, yet. - Search by Name @@ -1624,9 +1612,6 @@ Row Actions - - No Collections Yet. - Shareable Collection @@ -3018,7 +3003,7 @@ - + @@ -3861,9 +3846,6 @@ Start typing a URL... - - No matching pages found. - ReplayWeb.Page default view @@ -3873,9 +3855,6 @@ Show a single URL snapshot - - Configure Replay Home - Preview @@ -3992,9 +3971,6 @@ Add items to select a home page - - Configure Home - Edit About Section @@ -4166,6 +4142,78 @@ to Date range formatted to show full month name and year + + Last Crawl + + + No Subscription + + + Active Subscription + + + Trial + + + Trial Cancelled + + + Trial Canceled + + + Payment Failed + + + Choose a snapshot + + + Enter a page URL to choose snapshot + + + Search for a page in this collection + + + Page exists in collection + + + Page not found in collection. Please check the URL and try again + + + No matching page found. + + + Couldn't load preview. Try another snapshot + + + Configure Replay View + + + Choose a page snapshot + + + Configure View + + + Your org doesn’t have any collections yet. + + + Collections let you easily organize, replay, and share multiple crawls. + + + Page Uploaded + + + Pages Uploaded + + + Page Total + + + Pages Total + + + Copy Link to Profile + diff --git a/frontend/xliff/es.xlf b/frontend/xliff/es.xlf index e4f436593a..dec450d65c 100644 --- a/frontend/xliff/es.xlf +++ b/frontend/xliff/es.xlf @@ -1766,19 +1766,6 @@ ? ¿Está seguro que desea eliminar ? - - Start building your Collection. - - - No Collections Found - No se han encontrado colecciones - - - Organize your crawls into a Collection to easily replay them together. - - - Your organization doesn't have any Collections, yet. - Search by Name Buscar por nombre @@ -1797,10 +1784,6 @@ Row Actions - - No Collections Yet. - Aún no hay colecciones. - Shareable Collection Colección compartible @@ -3273,7 +3256,7 @@ - + @@ -4501,9 +4484,6 @@ Start typing a URL... - - No matching pages found. - ReplayWeb.Page default view @@ -4513,9 +4493,6 @@ Show a single URL snapshot - - Configure Replay Home - Preview @@ -4632,9 +4609,6 @@ Add items to select a home page - - Configure Home - Edit About Section @@ -4806,6 +4780,78 @@ to Date range formatted to show full month name and year + + Last Crawl + + + No Subscription + + + Active Subscription + + + Trial + + + Trial Cancelled + + + Trial Canceled + + + Payment Failed + + + Choose a snapshot + + + Enter a page URL to choose snapshot + + + Search for a page in this collection + + + Page exists in collection + + + Page not found in collection. Please check the URL and try again + + + No matching page found. + + + Couldn't load preview. Try another snapshot + + + Configure Replay View + + + Choose a page snapshot + + + Configure View + + + Your org doesn’t have any collections yet. + + + Collections let you easily organize, replay, and share multiple crawls. + + + Page Uploaded + + + Pages Uploaded + + + Page Total + + + Pages Total + + + Copy Link to Profile + diff --git a/frontend/xliff/fr.xlf b/frontend/xliff/fr.xlf index be27a8019c..37b0cc394d 100644 --- a/frontend/xliff/fr.xlf +++ b/frontend/xliff/fr.xlf @@ -2125,22 +2125,6 @@ ? Êtes-vous certain de vouloir supprimer ? - - Start building your Collection. - Démarrer la création de la collection. - - - No Collections Found - Aucune collection trouvée - - - Organize your crawls into a Collection to easily replay them together. - Organiser vos collectes en collection pour pouvoir les rejouer ensemble. - - - Your organization doesn't have any Collections, yet. - Votre organisation ne détient aucune collection présentement. - Search by Name Rechercher par nom @@ -2161,10 +2145,6 @@ Row Actions Rangée actions - - No Collections Yet. - Aucune collection jusqu'à présent. - Shareable Collection Collection partageable @@ -3782,7 +3762,7 @@ - + @@ -4625,9 +4605,6 @@ Start typing a URL... - - No matching pages found. - ReplayWeb.Page default view @@ -4637,9 +4614,6 @@ Show a single URL snapshot - - Configure Replay Home - Preview @@ -4756,9 +4730,6 @@ Add items to select a home page - - Configure Home - Edit About Section @@ -4930,6 +4901,78 @@ to Date range formatted to show full month name and year + + Last Crawl + + + No Subscription + + + Active Subscription + + + Trial + + + Trial Cancelled + + + Trial Canceled + + + Payment Failed + + + Choose a snapshot + + + Enter a page URL to choose snapshot + + + Search for a page in this collection + + + Page exists in collection + + + Page not found in collection. Please check the URL and try again + + + No matching page found. + + + Couldn't load preview. Try another snapshot + + + Configure Replay View + + + Choose a page snapshot + + + Configure View + + + Your org doesn’t have any collections yet. + + + Collections let you easily organize, replay, and share multiple crawls. + + + Page Uploaded + + + Pages Uploaded + + + Page Total + + + Pages Total + + + Copy Link to Profile + diff --git a/frontend/xliff/pt.xlf b/frontend/xliff/pt.xlf index 6de5673f02..c365279e79 100644 --- a/frontend/xliff/pt.xlf +++ b/frontend/xliff/pt.xlf @@ -1597,18 +1597,6 @@ Are you sure you want to delete ? - - Start building your Collection. - - - No Collections Found - - - Organize your crawls into a Collection to easily replay them together. - - - Your organization doesn't have any Collections, yet. - Search by Name @@ -1624,9 +1612,6 @@ Row Actions - - No Collections Yet. - Shareable Collection @@ -3018,7 +3003,7 @@ - + @@ -3861,9 +3846,6 @@ Start typing a URL... - - No matching pages found. - ReplayWeb.Page default view @@ -3873,9 +3855,6 @@ Show a single URL snapshot - - Configure Replay Home - Preview @@ -3992,9 +3971,6 @@ Add items to select a home page - - Configure Home - Edit About Section @@ -4166,6 +4142,78 @@ to Date range formatted to show full month name and year + + Last Crawl + + + No Subscription + + + Active Subscription + + + Trial + + + Trial Cancelled + + + Trial Canceled + + + Payment Failed + + + Choose a snapshot + + + Enter a page URL to choose snapshot + + + Search for a page in this collection + + + Page exists in collection + + + Page not found in collection. Please check the URL and try again + + + No matching page found. + + + Couldn't load preview. Try another snapshot + + + Configure Replay View + + + Choose a page snapshot + + + Configure View + + + Your org doesn’t have any collections yet. + + + Collections let you easily organize, replay, and share multiple crawls. + + + Page Uploaded + + + Pages Uploaded + + + Page Total + + + Pages Total + + + Copy Link to Profile +