Skip to content

Commit 2fea642

Browse files
committed
metadata and submission results
1 parent 6d3091d commit 2fea642

File tree

1 file changed

+71
-93
lines changed

1 file changed

+71
-93
lines changed

evaluation_script/main.py

Lines changed: 71 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
import os
22
import io
33
import zipfile
4-
print("Starting Evaluation.....")
5-
print("Starting Evaluation.....")
4+
import json
65
from .evo_script import TrajectoryEvaluator, read_tum_trajectory_matrix
76
import sys
87

98

109
def evaluate(test_annotation_file, user_submission_file, phase_codename, **kwargs):
1110
# script_dir = os.path.dirname(os.path.abspath(__file__))
12-
print("Starting Evaluation.....")
13-
print(kwargs['submission_metadata'])
11+
print("\033[92mStarting Evaluation.....\033[0m")
12+
# print(kwargs['submission_metadata'])
1413
output = {}
1514
# evaluated_metrics = []
1615

@@ -121,45 +120,38 @@ def evaluate(test_annotation_file, user_submission_file, phase_codename, **kwarg
121120
# # output["result"] = []
122121

123122

124-
print("Possible error", file=sys.stderr)
125-
126-
# output["submission_metadata"] = [
127-
# {
128-
# "average-evaluation-time": "5 secs",
129-
# "foo": "bar"
130-
# }
131-
# ]
123+
print("\033[91mPossible error\033[0m", file=sys.stderr)
132124

133125
output["result"] = [
134-
{
135-
"heap": {
136-
"ATE": 1.34, # Use .get for safety if keys might be missing
137-
"RTE": 2.45,
138-
"LE": 3.51
139-
}
140-
},
141-
{
142-
"eiger": {
143-
"ATE": 1.34, # Use .get for safety if keys might be missing
144-
"RTE": 2.45,
145-
"LE": 3.51
146-
}
147-
},
148-
{
149-
"tt3": {
150-
"ATE": 1.34, # Use .get for safety if keys might be missing
151-
"RTE": 2.45,
152-
"LE": 3.51
153-
}
154-
},
155-
{
156-
"tt4": {
157-
"ATE": 1.34, # Use .get for safety if keys might be missing
158-
"RTE": 2.45,
159-
"LE": 3.51
160-
}
161-
},
162-
]
126+
{
127+
"heap": {
128+
"ATE": 1.34, # Use .get for safety if keys might be missing
129+
"RTE": 2.45,
130+
"LE": 3.51
131+
}
132+
},
133+
{
134+
"eiger": {
135+
"ATE": 1.34, # Use .get for safety if keys might be missing
136+
"RTE": 2.45,
137+
"LE": 3.51
138+
}
139+
},
140+
{
141+
"tt3": {
142+
"ATE": 1.34, # Use .get for safety if keys might be missing
143+
"RTE": 2.45,
144+
"LE": 3.51
145+
}
146+
},
147+
{
148+
"tt4": {
149+
"ATE": 1.34, # Use .get for safety if keys might be missing
150+
"RTE": 2.45,
151+
"LE": 3.51
152+
}
153+
},
154+
]
163155

164156
# for i, eval_result in enumerate(evaluated_metrics):
165157
# metrics = eval_result["metrics"]
@@ -181,62 +173,48 @@ def evaluate(test_annotation_file, user_submission_file, phase_codename, **kwarg
181173
# you might need to aggregate or select specific results.
182174
# For now, let's keep the first split's results for submission_result as an example.
183175
if output["result"]:
184-
first_split_key = list(output["result"][0].keys())[0]
185-
output["submission_result"] = output["result"][0][first_split_key]
176+
total_ate = 0
177+
total_rte = 0
178+
total_le = 0
179+
count = 0
180+
181+
for split_result in output["result"]:
182+
# Each split_result is a dict like {"split_name": {"ATE": x, "RTE": y, "LE": z}}
183+
# Get the inner metrics dictionary (assuming only one key per outer dict)
184+
split_name = list(split_result.keys())[0]
185+
metrics = split_result[split_name]
186+
187+
# Accumulate metrics if they exist and are not None
188+
if metrics.get("ATE") is not None:
189+
total_ate += metrics["ATE"]
190+
if metrics.get("RTE") is not None:
191+
total_rte += metrics["RTE"]
192+
if metrics.get("LE") is not None:
193+
total_le += metrics["LE"]
194+
count += 1 # Increment count for each split processed
195+
196+
# Calculate averages, handle division by zero if count is 0
197+
if count > 0:
198+
avg_ate = total_ate / count
199+
avg_rte = total_rte / count
200+
avg_le = total_le / count
201+
output["submission_result"] = {
202+
"ATE": avg_ate,
203+
"RTE": avg_rte,
204+
"LE": avg_le
205+
}
206+
else:
207+
# Handle case with no valid metrics found
208+
output["submission_result"] = {
209+
"ATE": None,
210+
"RTE": None,
211+
"LE": None
212+
}
186213
else:
187214
output["submission_result"] = {} # Handle case with no evaluated metrics
188215
# Placeholder for submission metadata based on the requested format.
189216
# Actual values should be populated based on evaluation results if applicable.
190-
output["submission_metadata"] = {
191-
"heap": {
192-
"metrics": {
193-
"time": 5, # Replace with actual accuracy
194-
"length": 10
195-
# Add more qid: acc pairs as needed
196-
},
197-
"logs": {
198-
"rate": 100, # Replace with actual accuracy
199-
"suspicious": "no"
200-
# Add more qtype: acc pairs as needed
201-
}
202-
},
203-
"eiger": {
204-
"metrics": {
205-
"time": 5, # Replace with actual accuracy
206-
"length": 10
207-
# Add more qid: acc pairs as needed
208-
},
209-
"logs": {
210-
"rate": 100, # Replace with actual accuracy
211-
"suspicious": "no"
212-
# Add more qtype: acc pairs as needed
213-
}
214-
},
215-
"tt3": {
216-
"metrics": {
217-
"time": 5, # Replace with actual accuracy
218-
"length": 10
219-
# Add more qid: acc pairs as needed
220-
},
221-
"logs": {
222-
"rate": 100, # Replace with actual accuracy
223-
"suspicious": "no"
224-
# Add more qtype: acc pairs as needed
225-
}
226-
},
227-
"tt4": {
228-
"metrics": {
229-
"time": 5, # Replace with actual accuracy
230-
"length": 10
231-
# Add more qid: acc pairs as needed
232-
},
233-
"logs": {
234-
"rate": 100, # Replace with actual accuracy
235-
"suspicious": "no"
236-
# Add more qtype: acc pairs as needed
237-
}
238-
}
239-
}
217+
output["submission_metadata"] = json.dumps(kwargs['submission_metadata'])
240218

241219
print("Completed evaluation for Dev Phase")
242220

0 commit comments

Comments
 (0)