Skip to content

Commit 634bd69

Browse files
✨ Add stats on conversations (#828)
* ✨ Add stats on conversations * 🩹 Fix conversation stats compute * ⚡️ Slightly better index * 🔊 * 🩹 Some fixes * 🐛 Fix aggregation query * 🐛 Fix $merge stage of query * 🐛 Fix query on sessionId * ✨ Compute weekly/monthly unique users * 🩹 Final tweaks * ⚡️ Split aggregations for better perf No need to recompute DAUs / WAUs and so on * ♻️ Deprecate PARQUET_EXPORT_SECRET & fix sec vuln * Add ADMIN_API_SECRET to CD action --------- Co-authored-by: Nathan Sarrazin <sarrazin.nathan@gmail.com>
1 parent 6244c37 commit 634bd69

File tree

9 files changed

+316
-22
lines changed

9 files changed

+316
-22
lines changed

.env

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,9 @@ PUBLIC_ANNOUNCEMENT_BANNERS=`[
109109

110110
PARQUET_EXPORT_DATASET=
111111
PARQUET_EXPORT_HF_TOKEN=
112-
PARQUET_EXPORT_SECRET=
112+
ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or exporting parquet data
113+
114+
PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
113115

114116
RATE_LIMIT= # requests per minute
115117
MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away

.github/workflows/deploy-release.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ jobs:
2626
MONGODB_URL: ${{ secrets.MONGODB_URL }}
2727
HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
2828
WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
29+
ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
2930
run: npm run updateProdEnv
3031
sync-to-hub:
3132
runs-on: ubuntu-latest

package-lock.json

Lines changed: 26 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scripts/updateProdEnv.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ const OPENID_CONFIG = process.env.OPENID_CONFIG;
77
const MONGODB_URL = process.env.MONGODB_URL;
88
const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
99
const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
10+
const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
1011

1112
// Read the content of the file .env.template
1213
const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
@@ -18,6 +19,7 @@ OPENID_CONFIG=${OPENID_CONFIG}
1819
SERPER_API_KEY=${SERPER_API_KEY}
1920
HF_TOKEN=${HF_TOKEN}
2021
WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
22+
ADMIN_API_SECRET=${ADMIN_API_SECRET}
2123
`;
2224

2325
// Make an HTTP POST request to add the space secrets

src/hooks.server.ts

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
import { COOKIE_NAME, EXPOSE_API, MESSAGES_BEFORE_LOGIN } from "$env/static/private";
1+
import {
2+
ADMIN_API_SECRET,
3+
COOKIE_NAME,
4+
EXPOSE_API,
5+
MESSAGES_BEFORE_LOGIN,
6+
PARQUET_EXPORT_SECRET,
7+
} from "$env/static/private";
28
import type { Handle } from "@sveltejs/kit";
39
import {
410
PUBLIC_GOOGLE_ANALYTICS_ID,
@@ -29,6 +35,18 @@ export const handle: Handle = async ({ event, resolve }) => {
2935
});
3036
}
3137

38+
if (event.url.pathname.startsWith(`${base}/admin/`) || event.url.pathname === `${base}/admin`) {
39+
const ADMIN_SECRET = ADMIN_API_SECRET || PARQUET_EXPORT_SECRET;
40+
41+
if (!ADMIN_SECRET) {
42+
return errorResponse(500, "Admin API is not configured");
43+
}
44+
45+
if (event.request.headers.get("Authorization") !== `Bearer ${ADMIN_SECRET}`) {
46+
return errorResponse(401, "Unauthorized");
47+
}
48+
}
49+
3250
const token = event.cookies.get(COOKIE_NAME);
3351

3452
let secretSessionId: string;

src/lib/server/database.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import type { MessageEvent } from "$lib/types/MessageEvent";
99
import type { Session } from "$lib/types/Session";
1010
import type { Assistant } from "$lib/types/Assistant";
1111
import type { Report } from "$lib/types/Report";
12+
import type { ConversationStats } from "$lib/types/ConversationStats";
1213

1314
if (!MONGODB_URL) {
1415
throw new Error(
@@ -24,7 +25,10 @@ export const connectPromise = client.connect().catch(console.error);
2425

2526
const db = client.db(MONGODB_DB_NAME + (import.meta.env.MODE === "test" ? "-test" : ""));
2627

28+
export const CONVERSATION_STATS_COLLECTION = "conversations.stats";
29+
2730
const conversations = db.collection<Conversation>("conversations");
31+
const conversationStats = db.collection<ConversationStats>(CONVERSATION_STATS_COLLECTION);
2832
const assistants = db.collection<Assistant>("assistants");
2933
const reports = db.collection<Report>("reports");
3034
const sharedConversations = db.collection<SharedConversation>("sharedConversations");
@@ -38,6 +42,7 @@ const bucket = new GridFSBucket(db, { bucketName: "files" });
3842
export { client, db };
3943
export const collections = {
4044
conversations,
45+
conversationStats,
4146
assistants,
4247
reports,
4348
sharedConversations,
@@ -68,6 +73,33 @@ client.on("open", () => {
6873
{ partialFilterExpression: { userId: { $exists: true } } }
6974
)
7075
.catch(console.error);
76+
// To do stats on conversations
77+
conversations.createIndex({ updatedAt: 1 }).catch(console.error);
78+
// Not strictly necessary, could use _id, but more convenient. Also for stats
79+
conversations.createIndex({ createdAt: 1 }).catch(console.error);
80+
// To do stats on conversation messages
81+
conversations.createIndex({ "messages.createdAt": 1 }, { sparse: true }).catch(console.error);
82+
// Unique index for stats
83+
conversationStats
84+
.createIndex(
85+
{
86+
type: 1,
87+
"date.field": 1,
88+
"date.span": 1,
89+
"date.at": 1,
90+
distinct: 1,
91+
},
92+
{ unique: true }
93+
)
94+
.catch(console.error);
95+
// Allow easy check of last computed stat for given type/dateField
96+
conversationStats
97+
.createIndex({
98+
type: 1,
99+
"date.field": 1,
100+
"date.at": 1,
101+
})
102+
.catch(console.error);
71103
abortedGenerations.createIndex({ updatedAt: 1 }, { expireAfterSeconds: 30 }).catch(console.error);
72104
abortedGenerations.createIndex({ conversationId: 1 }, { unique: true }).catch(console.error);
73105
sharedConversations.createIndex({ hash: 1 }, { unique: true }).catch(console.error);

src/lib/types/ConversationStats.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import type { Timestamps } from "./Timestamps";
2+
3+
export interface ConversationStats extends Timestamps {
4+
date: {
5+
at: Date;
6+
span: "day" | "week" | "month";
7+
field: "updatedAt" | "createdAt";
8+
};
9+
type: "conversation" | "message";
10+
/** _id => number of conversations/messages in the month */
11+
distinct: "sessionId" | "userId" | "userOrSessionId" | "_id";
12+
count: number;
13+
}

src/routes/admin/export/+server.ts

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
import {
2-
PARQUET_EXPORT_DATASET,
3-
PARQUET_EXPORT_HF_TOKEN,
4-
PARQUET_EXPORT_SECRET,
5-
} from "$env/static/private";
1+
import { PARQUET_EXPORT_DATASET, PARQUET_EXPORT_HF_TOKEN } from "$env/static/private";
62
import { collections } from "$lib/server/database";
73
import type { Message } from "$lib/types/Message";
84
import { error } from "@sveltejs/kit";
@@ -13,17 +9,13 @@ import parquet from "parquetjs";
139
import { z } from "zod";
1410

1511
// Triger like this:
16-
// curl -X POST "http://localhost:5173/chat/admin/export" -H "Authorization: Bearer <PARQUET_EXPORT_SECRET>" -H "Content-Type: application/json" -d '{"model": "OpenAssistant/oasst-sft-6-llama-30b-xor"}'
12+
// curl -X POST "http://localhost:5173/chat/admin/export" -H "Authorization: Bearer <ADMIN_API_SECRET>" -H "Content-Type: application/json" -d '{"model": "OpenAssistant/oasst-sft-6-llama-30b-xor"}'
1713

1814
export async function POST({ request }) {
19-
if (!PARQUET_EXPORT_SECRET || !PARQUET_EXPORT_DATASET || !PARQUET_EXPORT_HF_TOKEN) {
15+
if (!PARQUET_EXPORT_DATASET || !PARQUET_EXPORT_HF_TOKEN) {
2016
throw error(500, "Parquet export is not configured.");
2117
}
2218

23-
if (request.headers.get("Authorization") !== `Bearer ${PARQUET_EXPORT_SECRET}`) {
24-
throw error(403);
25-
}
26-
2719
const { model } = z
2820
.object({
2921
model: z.string(),

0 commit comments

Comments
 (0)