Skip to content

Commit bd859d1

Browse files
committed
working dashboard for viewing test results and comparing multiple tests
1 parent 861fb47 commit bd859d1

File tree

13 files changed

+863
-0
lines changed

13 files changed

+863
-0
lines changed

sources/dashboard/app.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import dash
2+
import dash_bootstrap_components as dbc
3+
from components.layout import create_layout
4+
from data.data_loader import load_metadata
5+
from callbacks import register_callbacks
6+
7+
# Initialize the app
8+
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.CYBORG])
9+
10+
# Load metadata
11+
metadata_list = load_metadata()
12+
13+
# Create layout
14+
app.layout = create_layout(metadata_list)
15+
16+
# Register callbacks
17+
register_callbacks(app, metadata_list)
18+
19+
if __name__ == '__main__':
20+
app.run_server(debug=True, port=8270)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from .file_selection import register_file_selection_callbacks
2+
from .graph_updates import register_graph_callbacks
3+
from .metrics_updates import register_metrics_callbacks
4+
from .table_updates import register_table_callbacks
5+
6+
def register_callbacks(app, metadata_list):
7+
"""Register all callbacks with the app."""
8+
register_file_selection_callbacks(app, metadata_list)
9+
register_graph_callbacks(app)
10+
register_metrics_callbacks(app)
11+
register_table_callbacks(app)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import json
2+
import os
3+
from dash.dependencies import Input, Output
4+
from dash import html, dcc
5+
6+
def register_file_selection_callbacks(app, metadata_list):
7+
@app.callback(
8+
[Output('selected-file-path', 'data'),
9+
Output('selected-uuid', 'data'),
10+
Output('file-selected-message', 'children')],
11+
Input('metadata-table', 'selected_rows'),
12+
prevent_initial_call=True
13+
)
14+
def select_file(selected_rows):
15+
if selected_rows is None or len(selected_rows) == 0:
16+
return None, None, 'No file selected.'
17+
selected_row = selected_rows[0]
18+
selected_file = metadata_list[selected_row]['file_path']
19+
selected_uuid = metadata_list[selected_row]['uuid']
20+
message = f"Selected File: {os.path.basename(selected_file)}"
21+
return selected_file, selected_uuid, message
22+
23+
@app.callback(
24+
[Output('main-question-dropdown', 'options'),
25+
Output('unit-tests-store', 'data'),
26+
Output('test-results-heading', 'children'),
27+
Output('question-answer-container', 'style')],
28+
[Input('selected-file-path', 'data'),
29+
Input('selected-uuid', 'data')]
30+
)
31+
def update_main_questions(selected_file, selected_uuid):
32+
if selected_file is None:
33+
return [], None, '', {'display': 'none'}
34+
try:
35+
with open(selected_file, 'r') as f:
36+
data = json.load(f)
37+
unit_tests = data.get('unit_tests', [])
38+
options = [{'label': test['question'], 'value': idx}
39+
for idx, test in enumerate(unit_tests)]
40+
heading = f"Test Results: {selected_uuid}"
41+
return options, unit_tests, heading, {'marginTop': '20px', 'display': 'block'}
42+
except Exception as e:
43+
print(f"Error loading {selected_file}: {e}")
44+
return [], None, '', {'display': 'none'}
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import json
2+
from dash.dependencies import Input, Output, State
3+
from dash import html, dcc
4+
import plotly.graph_objects as go
5+
6+
def register_graph_callbacks(app):
7+
@app.callback(
8+
[Output('file-accuracy-graph', 'children'),
9+
Output('question-accuracy-graph', 'children')],
10+
[Input('metadata-table', 'data')]
11+
)
12+
def update_comparison_graphs(data):
13+
if not data:
14+
return "Please select files to compare.", "Please select files to compare."
15+
16+
try:
17+
# Get selected files for comparison (checked boxes)
18+
selected_files = [row for row in data if row.get('compare') == '☒']
19+
20+
if not selected_files:
21+
return "Please select files to compare.", "Please select files to compare."
22+
23+
file_accuracies = process_file_accuracies(selected_files)
24+
question_accuracies = process_question_accuracies(selected_files)
25+
26+
file_accuracy_fig = create_file_accuracy_figure(file_accuracies)
27+
question_accuracy_fig = create_question_accuracy_figure(question_accuracies)
28+
29+
return [
30+
dcc.Graph(figure=file_accuracy_fig),
31+
dcc.Graph(figure=question_accuracy_fig)
32+
]
33+
34+
except Exception as e:
35+
print(f"Error updating comparison graphs: {e}")
36+
return "Error loading comparison.", "Error loading comparison."
37+
38+
def process_file_accuracies(selected_files):
39+
file_accuracies = []
40+
for idx, row in enumerate(selected_files):
41+
with open(row['file_path'], 'r') as f:
42+
data = json.load(f)
43+
for metric in data.get('metrics', []):
44+
if metric.get('metric_name') == 'Accuracy':
45+
file_accuracies.append({
46+
'File': row['uuid'][:8],
47+
'Accuracy': metric.get('metric_result', 0) * 100,
48+
'Index': idx
49+
})
50+
return sorted(file_accuracies, key=lambda x: x['File'])
51+
52+
def process_question_accuracies(selected_files):
53+
question_accuracies = []
54+
for row in selected_files:
55+
with open(row['file_path'], 'r') as f:
56+
data = json.load(f)
57+
for metric in data.get('metrics', []):
58+
if metric.get('metric_name') == 'Accuracy':
59+
question_wise = metric.get('metric_result_question_test_wise', [[]])[0]
60+
question_accuracies.append({
61+
'file_id': row['uuid'][:8],
62+
'accuracies': [acc * 100 for acc in question_wise]
63+
})
64+
return question_accuracies
65+
66+
def create_file_accuracy_figure(file_accuracies):
67+
fig = go.Figure()
68+
69+
fig.add_trace(go.Scatter(
70+
x=list(range(len(file_accuracies))),
71+
y=[fa['Accuracy'] for fa in file_accuracies],
72+
mode='lines+markers',
73+
name='Accuracy',
74+
line=dict(color='#5cb85c', width=2),
75+
marker=dict(size=8)
76+
))
77+
78+
fig.update_layout(
79+
title=f'Accuracy Comparison Across Files ({len(file_accuracies)} files)',
80+
plot_bgcolor='#2b2b2b',
81+
paper_bgcolor='#2b2b2b',
82+
font_color='white',
83+
xaxis_title="File ID",
84+
yaxis_title="Accuracy (%)",
85+
yaxis_range=[0, 100],
86+
xaxis=dict(
87+
showgrid=True,
88+
gridcolor='rgba(255, 255, 255, 0.1)',
89+
tickangle=45,
90+
tickmode='array',
91+
ticktext=[fa['File'] for fa in file_accuracies],
92+
tickvals=list(range(len(file_accuracies)))
93+
),
94+
yaxis=dict(
95+
showgrid=True,
96+
gridcolor='rgba(255, 255, 255, 0.1)'
97+
),
98+
showlegend=False
99+
)
100+
101+
return fig
102+
103+
def create_question_accuracy_figure(question_accuracies):
104+
fig = go.Figure()
105+
106+
for qa in question_accuracies:
107+
fig.add_trace(go.Scatter(
108+
x=list(range(1, len(qa['accuracies']) + 1)),
109+
y=qa['accuracies'],
110+
mode='lines+markers',
111+
name=qa['file_id']
112+
))
113+
114+
fig.update_layout(
115+
title=f'Question-wise Accuracy Comparison ({len(question_accuracies)} files)',
116+
xaxis_title="Question Number",
117+
yaxis_title="Accuracy (%)",
118+
plot_bgcolor='#2b2b2b',
119+
paper_bgcolor='#2b2b2b',
120+
font_color='white',
121+
yaxis_range=[0, 100],
122+
showlegend=True,
123+
xaxis=dict(
124+
showgrid=True,
125+
gridcolor='rgba(255, 255, 255, 0.1)'
126+
),
127+
yaxis=dict(
128+
showgrid=True,
129+
gridcolor='rgba(255, 255, 255, 0.1)'
130+
)
131+
)
132+
133+
return fig
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import json
2+
from dash.dependencies import Input, Output
3+
from dash import html, dcc
4+
import plotly.graph_objects as go
5+
6+
def register_metrics_callbacks(app):
7+
@app.callback(
8+
[Output('test-statistics-content', 'children'),
9+
Output('accuracy-chart', 'children'),
10+
Output('hallucination-rate', 'children'),
11+
Output('llm-drift-rate', 'children'),
12+
Output('custom-metric-1', 'children'),
13+
Output('custom-metric-2', 'children')],
14+
[Input('selected-file-path', 'data')]
15+
)
16+
def update_metrics(selected_file):
17+
default_values = get_default_metric_values()
18+
19+
if selected_file is None:
20+
return default_values
21+
22+
try:
23+
with open(selected_file, 'r') as f:
24+
data = json.load(f)
25+
metrics = data.get('metrics', [])
26+
27+
return process_metrics(metrics)
28+
29+
except Exception as e:
30+
print(f"Error loading metrics from {selected_file}: {e}")
31+
return default_values
32+
33+
def get_default_metric_values():
34+
"""Return default values for all metrics."""
35+
default_stats = html.Div([
36+
html.P(f"Total Tests: N/A"),
37+
html.P(f"Test Cases: N/A"),
38+
html.P(f"Paraphrased: N/A"),
39+
html.P(f"Iterations: N/A"),
40+
html.P(f"Passed: N/A"),
41+
html.P(f"Failed: N/A")
42+
])
43+
44+
default_gauge = html.Div([
45+
html.H3("N/A", style={'textAlign': 'center', 'color': 'gray', 'marginTop': '20px'})
46+
])
47+
48+
default_percentage = html.Div([
49+
html.H3("N/A", style={'textAlign': 'center', 'color': 'gray'})
50+
])
51+
52+
return (
53+
default_stats,
54+
default_gauge,
55+
default_percentage,
56+
default_percentage,
57+
default_percentage,
58+
default_percentage
59+
)
60+
61+
def process_metrics(metrics):
62+
"""Process metrics data and return formatted components."""
63+
statistics = get_default_metric_values()[0] # Default value
64+
accuracy = get_default_metric_values()[1] # Default value
65+
hallucination = get_default_metric_values()[2] # Default value
66+
drift = get_default_metric_values()[3] # Default value
67+
custom1 = get_default_metric_values()[4] # Default value
68+
custom2 = get_default_metric_values()[5] # Default value
69+
70+
for metric in metrics:
71+
metric_name = metric.get('metric_name', '')
72+
73+
if metric_name == 'Statistics':
74+
tests = metric.get('tests', {})
75+
statistics = html.Div([
76+
html.P(f"Total Tests: {tests.get('Total', 'N/A')}"),
77+
html.P(f"Test Cases: {tests.get('Test_cases', 'N/A')}"),
78+
html.P(f"Paraphrased: {tests.get('Paraphrased', 'N/A')}"),
79+
html.P(f"Iterations: {tests.get('Iteration', 'N/A')}"),
80+
html.P(f"Passed: {tests.get('Passed', 'N/A')}"),
81+
html.P(f"Failed: {tests.get('Failed', 'N/A')}")
82+
])
83+
84+
elif metric_name == 'Accuracy':
85+
accuracy_value = metric.get('metric_result', 0) * 100
86+
accuracy = create_accuracy_gauge(accuracy_value)
87+
88+
elif metric_name == 'Hallucination_rate':
89+
hall_rate = metric.get('metric_result', 0)
90+
hallucination = html.Div([
91+
html.H3(f"{hall_rate:.1f}%",
92+
style={'textAlign': 'center',
93+
'color': '#d9534f' if hall_rate > 20 else '#5cb85c'})
94+
])
95+
96+
elif metric_name == 'LLM Drift rate':
97+
drift_rate = metric.get('metric_result', 0)
98+
drift = html.Div([
99+
html.H3(f"{drift_rate:.1f}%",
100+
style={'textAlign': 'center',
101+
'color': '#d9534f' if drift_rate > 30 else '#5cb85c'})
102+
])
103+
104+
elif metric_name == 'Custom metric - 1':
105+
custom_rate1 = metric.get('metric_result', 0)
106+
custom1 = html.Div([
107+
html.H3(f"{custom_rate1:.1f}%",
108+
style={'textAlign': 'center',
109+
'color': '#d9534f' if custom_rate1 > 50 else '#5cb85c'})
110+
])
111+
112+
elif metric_name == 'Custom metric - 2':
113+
custom_rate2 = metric.get('metric_result', 0)
114+
custom2 = html.Div([
115+
html.H3(f"{custom_rate2:.1f}%",
116+
style={'textAlign': 'center',
117+
'color': '#d9534f' if custom_rate2 > 50 else '#5cb85c'})
118+
])
119+
120+
return statistics, accuracy, hallucination, drift, custom1, custom2
121+
122+
def create_accuracy_gauge(accuracy_value):
123+
"""Create an accuracy gauge figure."""
124+
fig = go.Figure(go.Indicator(
125+
mode="gauge+number",
126+
value=accuracy_value,
127+
domain={'x': [0, 1], 'y': [0, 1]},
128+
gauge={
129+
'axis': {'range': [0, 100], 'tickwidth': 1},
130+
'bar': {'color': "#5cb85c"},
131+
'bgcolor': "white",
132+
'borderwidth': 2,
133+
'bordercolor': "gray",
134+
'steps': [
135+
{'range': [0, 30], 'color': '#d9534f'},
136+
{'range': [30, 70], 'color': '#f0ad4e'},
137+
{'range': [70, 100], 'color': '#5cb85c'}
138+
],
139+
'threshold': {
140+
'line': {'color': "red", 'width': 4},
141+
'thickness': 0.75,
142+
'value': 70
143+
}
144+
}
145+
))
146+
147+
fig.update_layout(
148+
paper_bgcolor='#2b2b2b',
149+
font={'color': "white", 'family': "Arial"},
150+
height=200,
151+
margin=dict(l=30, r=30, t=30, b=0)
152+
)
153+
154+
return html.Div([
155+
dcc.Graph(figure=fig, config={'displayModeBar': False})
156+
])

0 commit comments

Comments
 (0)