Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
9e50bff
feat: async loading benchmark data
yaoyifan-yyf Sep 25, 2025
bc14084
opt: code format
yaoyifan-yyf Sep 25, 2025
4de41d1
feat: benchmark post_dispatch service
yaoyifan-yyf Sep 26, 2025
92c251d
opt: async load benchmark data on init
yaoyifan-yyf Sep 29, 2025
6293629
feat(benchmark): execute benchmark task
Oct 8, 2025
4875df1
Merge remote-tracking branch 'origin/feat_dataset_benchmark' into fea…
Oct 8, 2025
57e8bab
feat: query benchmark dataset api
yaoyifan-yyf Oct 9, 2025
c923e64
feat: add benchmark result query api
yaoyifan-yyf Oct 9, 2025
61e350f
Merge remote-tracking branch 'origin/feat_dataset_benchmark' into fea…
chenliang15405 Oct 10, 2025
0330dd3
chore: resolve confict
chenliang15405 Oct 10, 2025
f6351ae
feat(benchmark): optimize benchmark task and write evaluate result ro…
chenliang15405 Oct 10, 2025
81e2a1c
fix: add table mapping
yaoyifan-yyf Oct 11, 2025
8217408
feat(benchmark): create benchmark task
chenliang15405 Oct 11, 2025
5b27b6e
Merge remote-tracking branch 'origin/feat_dataset_benchmark' into fea…
chenliang15405 Oct 11, 2025
33a4e04
fix(benchmark): fix post dispatch param
chenliang15405 Oct 13, 2025
9b81a10
opt: compare result write to excel not db
yaoyifan-yyf Oct 13, 2025
87f11b5
feat(benchmark): multi model post process
chenliang15405 Oct 13, 2025
8d8d455
opt: multi model compare write result
yaoyifan-yyf Oct 13, 2025
c14b68d
feat(benchmark): query benchmark task list
chenliang15405 Oct 13, 2025
ab96ebb
opt: add standard result col to output excel
yaoyifan-yyf Oct 13, 2025
838bc35
Merge remote-tracking branch 'origin/feat_dataset_benchmark' into fea…
yaoyifan-yyf Oct 13, 2025
5df8d94
feat(benchmark): benchmark result file download
chenliang15405 Oct 14, 2025
92243cb
fix(benchmark): parse multi standard anwser
chenliang15405 Oct 15, 2025
65fd87b
fix(benchmark): update standard anwser result field
chenliang15405 Oct 15, 2025
24064d7
fix: ant_icube table mapping correct
yaoyifan-yyf Oct 15, 2025
05b1fb6
fix: col name sanitize modification
yaoyifan-yyf Oct 15, 2025
ff34064
fix: benchmark compare summary write to db
yaoyifan-yyf Oct 16, 2025
41da1b3
fix: benchmark compare summary write to db
yaoyifan-yyf Oct 16, 2025
fb83e30
opt: benchmark result api output adjust
yaoyifan-yyf Oct 16, 2025
ba80df5
opt: api name adjuest
yaoyifan-yyf Oct 16, 2025
2a823ee
feat: support multi benchmark datasets
yaoyifan-yyf Oct 16, 2025
8e025c8
feat(benchmark): update benchmark task status & benchmark task info list
chenliang15405 Oct 16, 2025
8f0b2c3
fix: fix page list request
chenliang15405 Oct 16, 2025
39ac73d
fix(benchmark): execute benchmark with model param
chenliang15405 Oct 16, 2025
cda5b74
fix(benchmark): process sql query timeout
chenliang15405 Oct 16, 2025
ed59a71
fix(benchmark): fix sql query db timeout for blocking thread
chenliang15405 Oct 17, 2025
b410e85
fix(benchmark): remove useless code
chenliang15405 Oct 17, 2025
d2e92e9
feat: add datasets evaluation page (#2908)
iterminatorheart Oct 17, 2025
19bbc6f
feat: evaluation dataset info pages (#2911)
iterminatorheart Oct 19, 2025
9de988f
fix(benchmark): custom model temperature and max token
chenliang15405 Oct 19, 2025
cfdb1db
feat: multi language for models evaluation (#2912)
iterminatorheart Oct 20, 2025
e69a4e5
fix: table error fix
yaoyifan-yyf Oct 20, 2025
3f0d63b
Merge branch 'feat_dataset_benchmark' of github.com:eosphoros-ai/DB-G…
yaoyifan-yyf Oct 20, 2025
48a3798
fix: table error fix
yaoyifan-yyf Oct 20, 2025
48aa204
feat(benchmark): show task name
chenliang15405 Oct 20, 2025
9ada90b
chore: web build file
chenliang15405 Oct 20, 2025
2caa317
fix(benchmark): fix download result url
chenliang15405 Oct 21, 2025
2cd22a2
chore: update ignore
chenliang15405 Oct 21, 2025
8c1bfed
Merge remote-tracking branch 'origin/feat_dataset_benchmark' into fea…
chenliang15405 Oct 21, 2025
6904d02
fix: answer adjustment
yaoyifan-yyf Oct 21, 2025
3beb281
Merge branch 'feat_dataset_benchmark' of github.com:eosphoros-ai/DB-G…
yaoyifan-yyf Oct 21, 2025
59e39b2
chore: update falcon repo url
chenliang15405 Oct 21, 2025
61bbba3
docs: add dataset benchmark docs
chenliang15405 Oct 21, 2025
81cee77
docs: update image
chenliang15405 Oct 22, 2025
f3263c8
docs: update benchmark doc
chenliang15405 Oct 22, 2025
f0de206
docs: update benchmark doc
chenliang15405 Oct 22, 2025
dc2e994
docs: add v0.7.4 upgrade schema sql
chenliang15405 Oct 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ logswebserver.log.*
.plugin_env
/pilot/meta_data/alembic/versions/*
/pilot/meta_data/*.db
/pilot/benchmark_meta_data/*.db
/pilot/benchmark_meta_data/result/*
# Ignore for now
thirdparty

Expand Down
41 changes: 41 additions & 0 deletions assets/schema/dbgpt.sql
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,47 @@ CREATE TABLE `dbgpt_serve_dbgpts_hub` (
UNIQUE KEY `name` (`name`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- dbgpt.evaluate_manage definition
CREATE TABLE `evaluate_manage` (
`id` int NOT NULL AUTO_INCREMENT COMMENT 'autoincrement id',
`evaluate_code` varchar(256) NOT NULL COMMENT 'evaluate unique code',
`scene_key` varchar(100) DEFAULT NULL COMMENT 'scene key',
`scene_value` varchar(256) DEFAULT NULL COMMENT 'scene value',
`context` text DEFAULT NULL COMMENT 'context',
`evaluate_metrics` varchar(599) DEFAULT NULL COMMENT 'evaluate metrics',
`datasets_name` varchar(256) DEFAULT NULL COMMENT 'datasets name',
`datasets` text DEFAULT NULL COMMENT 'datasets content',
`storage_type` varchar(256) DEFAULT NULL COMMENT 'result storage type',
`parallel_num` int DEFAULT NULL COMMENT 'execute parallel thread number',
`state` VARCHAR(100) DEFAULT NULL COMMENT 'execute state',
`result` text DEFAULT NULL COMMENT 'evaluate result',
`log_info` text DEFAULT NULL COMMENT 'evaluate error log',
`average_score` text DEFAULT NULL COMMENT 'metrics average score',
`user_id` varchar(100) DEFAULT NULL COMMENT 'user id',
`user_name` varchar(128) DEFAULT NULL COMMENT 'user name',
`sys_code` varchar(128) DEFAULT NULL COMMENT 'system code',
`gmt_create` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'benchmark create time',
`gmt_modified` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'benchmark finish time',
PRIMARY KEY (`id`),
UNIQUE KEY `uk_evaluate` (`evaluate_code`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- dbgpt.benchmark_summary definition
CREATE TABLE `benchmark_summary` (
`id` int NOT NULL AUTO_INCREMENT COMMENT 'autoincrement id',
`round_id` int NOT NULL COMMENT 'task round id',
`output_path` varchar(512) NULL COMMENT 'output file path',
`right` int DEFAULT NULL COMMENT 'right number',
`wrong` int DEFAULT NULL COMMENT 'wrong number',
`failed` int DEFAULT NULL COMMENT 'failed number',
`exception` int DEFAULT NULL COMMENT 'exception number',
`llm_code` varchar(256) DEFAULT NULL COMMENT 'benchmark llm code',
`evaluate_code` varchar(256) DEFAULT NULL COMMENT 'benchmark evaluate code',
`gmt_created` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'benchmark create time',
`gmt_modified` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'benchmark finish time',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;


CREATE
DATABASE IF NOT EXISTS EXAMPLE_1;
Expand Down
43 changes: 43 additions & 0 deletions assets/schema/upgrade/v0_7_4/upgrade_to_v0.7.4.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
-- From 0.7.1 to 0.7.4, we have the following changes:
USE dbgpt;

-- evaluate_manage, Store the dataset benchmark task record
CREATE TABLE `evaluate_manage` (
`id` int NOT NULL AUTO_INCREMENT COMMENT 'autoincrement id',
`evaluate_code` varchar(256) NOT NULL COMMENT 'evaluate unique code',
`scene_key` varchar(100) DEFAULT NULL COMMENT 'scene key',
`scene_value` varchar(256) DEFAULT NULL COMMENT 'scene value',
`context` text DEFAULT NULL COMMENT 'context',
`evaluate_metrics` varchar(599) DEFAULT NULL COMMENT 'evaluate metrics',
`datasets_name` varchar(256) DEFAULT NULL COMMENT 'datasets name',
`datasets` text DEFAULT NULL COMMENT 'datasets content',
`storage_type` varchar(256) DEFAULT NULL COMMENT 'result storage type',
`parallel_num` int DEFAULT NULL COMMENT 'execute parallel thread number',
`state` VARCHAR(100) DEFAULT NULL COMMENT 'execute state',
`result` text DEFAULT NULL COMMENT 'evaluate result',
`log_info` text DEFAULT NULL COMMENT 'evaluate error log',
`average_score` text DEFAULT NULL COMMENT 'metrics average score',
`user_id` varchar(100) DEFAULT NULL COMMENT 'user id',
`user_name` varchar(128) DEFAULT NULL COMMENT 'user name',
`sys_code` varchar(128) DEFAULT NULL COMMENT 'system code',
`gmt_create` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'benchmark create time',
`gmt_modified` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'benchmark finish time',
PRIMARY KEY (`id`),
UNIQUE KEY `uk_evaluate` (`evaluate_code`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- benchmark_summary, Store the dataset benchmark summary metric result
CREATE TABLE `benchmark_summary` (
`id` int NOT NULL AUTO_INCREMENT COMMENT 'autoincrement id',
`round_id` int NOT NULL COMMENT 'task round id',
`output_path` varchar(512) NULL COMMENT 'output file path',
`right` int DEFAULT NULL COMMENT 'right number',
`wrong` int DEFAULT NULL COMMENT 'wrong number',
`failed` int DEFAULT NULL COMMENT 'failed number',
`exception` int DEFAULT NULL COMMENT 'exception number',
`llm_code` varchar(256) DEFAULT NULL COMMENT 'benchmark llm code',
`evaluate_code` varchar(256) DEFAULT NULL COMMENT 'benchmark evaluate code',
`gmt_created` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'benchmark create time',
`gmt_modified` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'benchmark finish time',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
Loading