@@ -1135,3 +1135,94 @@ async def chat_about_transcript(
11351135 )
11361136
11371137 return response
1138+
1139+ async def get_user_statistics (self ) -> Dict [str , Any ]:
1140+ """
1141+ Get comprehensive usage statistics for all users.
1142+
1143+ Returns:
1144+ Dictionary with user statistics including request counts, total minutes, and costs
1145+ """
1146+ # Get all cost entries from the database
1147+ costs = await self .db .costs .find ({}).to_list (length = 10000 )
1148+ events = await self .db .events .find ({"event_type" : "file_submission" }).to_list (length = 10000 )
1149+
1150+ # Initialize user stats dictionary
1151+ user_stats = {}
1152+
1153+ # Process cost data
1154+ for cost_entry in costs :
1155+ user_id = cost_entry .get ("user_id" )
1156+ if not user_id :
1157+ continue
1158+
1159+ if user_id not in user_stats :
1160+ user_stats [user_id ] = {
1161+ "total_cost" : 0.0 ,
1162+ "total_requests" : 0 ,
1163+ "total_minutes" : 0.0 ,
1164+ "operations" : {},
1165+ "models" : {},
1166+ "first_activity" : None ,
1167+ "last_activity" : None ,
1168+ }
1169+
1170+ # Add cost
1171+ cost = float (cost_entry .get ("cost" , 0.0 )) if cost_entry .get ("cost" ) is not None else 0.0
1172+ user_stats [user_id ]["total_cost" ] += cost
1173+
1174+ # Track operation types
1175+ operation = cost_entry .get ("operation" , "unknown" )
1176+ if operation not in user_stats [user_id ]["operations" ]:
1177+ user_stats [user_id ]["operations" ][operation ] = {"cost" : 0.0 , "count" : 0 }
1178+ user_stats [user_id ]["operations" ][operation ]["cost" ] += cost
1179+ user_stats [user_id ]["operations" ][operation ]["count" ] += 1
1180+
1181+ # Track models used
1182+ model = cost_entry .get ("model" , "unknown" )
1183+ if model not in user_stats [user_id ]["models" ]:
1184+ user_stats [user_id ]["models" ][model ] = {"cost" : 0.0 , "count" : 0 }
1185+ user_stats [user_id ]["models" ][model ]["cost" ] += cost
1186+ user_stats [user_id ]["models" ][model ]["count" ] += 1
1187+
1188+ # Track timestamps
1189+ timestamp = cost_entry .get ("timestamp" )
1190+ if timestamp :
1191+ if user_stats [user_id ]["first_activity" ] is None or timestamp < user_stats [user_id ]["first_activity" ]:
1192+ user_stats [user_id ]["first_activity" ] = timestamp
1193+ if user_stats [user_id ]["last_activity" ] is None or timestamp > user_stats [user_id ]["last_activity" ]:
1194+ user_stats [user_id ]["last_activity" ] = timestamp
1195+
1196+ # Extract minutes from usage data
1197+ usage = cost_entry .get ("usage" , {})
1198+ if isinstance (usage , dict ):
1199+ # For transcription operations, get minutes from estimated_minutes or audio_duration_seconds
1200+ if operation == "transcription" :
1201+ estimated_minutes = usage .get ("estimated_minutes" , 0 )
1202+ if estimated_minutes :
1203+ user_stats [user_id ]["total_minutes" ] += float (estimated_minutes )
1204+ elif usage .get ("audio_duration_seconds" ):
1205+ minutes = float (usage .get ("audio_duration_seconds" , 0 )) / 60
1206+ user_stats [user_id ]["total_minutes" ] += minutes
1207+
1208+ # Process event data to get request counts
1209+ for event in events :
1210+ user_id = event .get ("user_id" )
1211+ if user_id and user_id in user_stats :
1212+ user_stats [user_id ]["total_requests" ] += 1
1213+
1214+ # Calculate summary statistics
1215+ total_users = len (user_stats )
1216+ total_cost_all_users = sum (stats ["total_cost" ] for stats in user_stats .values ())
1217+ total_requests_all_users = sum (stats ["total_requests" ] for stats in user_stats .values ())
1218+ total_minutes_all_users = sum (stats ["total_minutes" ] for stats in user_stats .values ())
1219+
1220+ return {
1221+ "user_stats" : user_stats ,
1222+ "summary" : {
1223+ "total_users" : total_users ,
1224+ "total_cost" : total_cost_all_users ,
1225+ "total_requests" : total_requests_all_users ,
1226+ "total_minutes" : total_minutes_all_users ,
1227+ }
1228+ }
0 commit comments