1
1
import os
2
2
import io
3
3
import zipfile
4
- print ("Starting Evaluation....." )
5
- print ("Starting Evaluation....." )
4
+ import json
6
5
from .evo_script import TrajectoryEvaluator , read_tum_trajectory_matrix
7
6
import sys
8
7
9
8
10
9
def evaluate (test_annotation_file , user_submission_file , phase_codename , ** kwargs ):
11
10
# script_dir = os.path.dirname(os.path.abspath(__file__))
12
- print ("Starting Evaluation....." )
13
- print (kwargs ['submission_metadata' ])
11
+ print ("\033 [92mStarting Evaluation.....\033 [0m " )
12
+ # print(kwargs['submission_metadata'])
14
13
output = {}
15
14
# evaluated_metrics = []
16
15
@@ -121,45 +120,38 @@ def evaluate(test_annotation_file, user_submission_file, phase_codename, **kwarg
121
120
# # output["result"] = []
122
121
123
122
124
- print ("Possible error" , file = sys .stderr )
125
-
126
- # output["submission_metadata"] = [
127
- # {
128
- # "average-evaluation-time": "5 secs",
129
- # "foo": "bar"
130
- # }
131
- # ]
123
+ print ("\033 [91mPossible error\033 [0m" , file = sys .stderr )
132
124
133
125
output ["result" ] = [
134
- {
135
- "heap" : {
136
- "ATE" : 1.34 , # Use .get for safety if keys might be missing
137
- "RTE" : 2.45 ,
138
- "LE" : 3.51
139
- }
140
- },
141
- {
142
- "eiger" : {
143
- "ATE" : 1.34 , # Use .get for safety if keys might be missing
144
- "RTE" : 2.45 ,
145
- "LE" : 3.51
146
- }
147
- },
148
- {
149
- "tt3" : {
150
- "ATE" : 1.34 , # Use .get for safety if keys might be missing
151
- "RTE" : 2.45 ,
152
- "LE" : 3.51
153
- }
154
- },
155
- {
156
- "tt4" : {
157
- "ATE" : 1.34 , # Use .get for safety if keys might be missing
158
- "RTE" : 2.45 ,
159
- "LE" : 3.51
160
- }
161
- },
162
- ]
126
+ {
127
+ "heap" : {
128
+ "ATE" : 1.34 , # Use .get for safety if keys might be missing
129
+ "RTE" : 2.45 ,
130
+ "LE" : 3.51
131
+ }
132
+ },
133
+ {
134
+ "eiger" : {
135
+ "ATE" : 1.34 , # Use .get for safety if keys might be missing
136
+ "RTE" : 2.45 ,
137
+ "LE" : 3.51
138
+ }
139
+ },
140
+ {
141
+ "tt3" : {
142
+ "ATE" : 1.34 , # Use .get for safety if keys might be missing
143
+ "RTE" : 2.45 ,
144
+ "LE" : 3.51
145
+ }
146
+ },
147
+ {
148
+ "tt4" : {
149
+ "ATE" : 1.34 , # Use .get for safety if keys might be missing
150
+ "RTE" : 2.45 ,
151
+ "LE" : 3.51
152
+ }
153
+ },
154
+ ]
163
155
164
156
# for i, eval_result in enumerate(evaluated_metrics):
165
157
# metrics = eval_result["metrics"]
@@ -181,62 +173,48 @@ def evaluate(test_annotation_file, user_submission_file, phase_codename, **kwarg
181
173
# you might need to aggregate or select specific results.
182
174
# For now, let's keep the first split's results for submission_result as an example.
183
175
if output ["result" ]:
184
- first_split_key = list (output ["result" ][0 ].keys ())[0 ]
185
- output ["submission_result" ] = output ["result" ][0 ][first_split_key ]
176
+ total_ate = 0
177
+ total_rte = 0
178
+ total_le = 0
179
+ count = 0
180
+
181
+ for split_result in output ["result" ]:
182
+ # Each split_result is a dict like {"split_name": {"ATE": x, "RTE": y, "LE": z}}
183
+ # Get the inner metrics dictionary (assuming only one key per outer dict)
184
+ split_name = list (split_result .keys ())[0 ]
185
+ metrics = split_result [split_name ]
186
+
187
+ # Accumulate metrics if they exist and are not None
188
+ if metrics .get ("ATE" ) is not None :
189
+ total_ate += metrics ["ATE" ]
190
+ if metrics .get ("RTE" ) is not None :
191
+ total_rte += metrics ["RTE" ]
192
+ if metrics .get ("LE" ) is not None :
193
+ total_le += metrics ["LE" ]
194
+ count += 1 # Increment count for each split processed
195
+
196
+ # Calculate averages, handle division by zero if count is 0
197
+ if count > 0 :
198
+ avg_ate = total_ate / count
199
+ avg_rte = total_rte / count
200
+ avg_le = total_le / count
201
+ output ["submission_result" ] = {
202
+ "ATE" : avg_ate ,
203
+ "RTE" : avg_rte ,
204
+ "LE" : avg_le
205
+ }
206
+ else :
207
+ # Handle case with no valid metrics found
208
+ output ["submission_result" ] = {
209
+ "ATE" : None ,
210
+ "RTE" : None ,
211
+ "LE" : None
212
+ }
186
213
else :
187
214
output ["submission_result" ] = {} # Handle case with no evaluated metrics
188
215
# Placeholder for submission metadata based on the requested format.
189
216
# Actual values should be populated based on evaluation results if applicable.
190
- output ["submission_metadata" ] = {
191
- "heap" : {
192
- "metrics" : {
193
- "time" : 5 , # Replace with actual accuracy
194
- "length" : 10
195
- # Add more qid: acc pairs as needed
196
- },
197
- "logs" : {
198
- "rate" : 100 , # Replace with actual accuracy
199
- "suspicious" : "no"
200
- # Add more qtype: acc pairs as needed
201
- }
202
- },
203
- "eiger" : {
204
- "metrics" : {
205
- "time" : 5 , # Replace with actual accuracy
206
- "length" : 10
207
- # Add more qid: acc pairs as needed
208
- },
209
- "logs" : {
210
- "rate" : 100 , # Replace with actual accuracy
211
- "suspicious" : "no"
212
- # Add more qtype: acc pairs as needed
213
- }
214
- },
215
- "tt3" : {
216
- "metrics" : {
217
- "time" : 5 , # Replace with actual accuracy
218
- "length" : 10
219
- # Add more qid: acc pairs as needed
220
- },
221
- "logs" : {
222
- "rate" : 100 , # Replace with actual accuracy
223
- "suspicious" : "no"
224
- # Add more qtype: acc pairs as needed
225
- }
226
- },
227
- "tt4" : {
228
- "metrics" : {
229
- "time" : 5 , # Replace with actual accuracy
230
- "length" : 10
231
- # Add more qid: acc pairs as needed
232
- },
233
- "logs" : {
234
- "rate" : 100 , # Replace with actual accuracy
235
- "suspicious" : "no"
236
- # Add more qtype: acc pairs as needed
237
- }
238
- }
239
- }
217
+ output ["submission_metadata" ] = json .dumps (kwargs ['submission_metadata' ])
240
218
241
219
print ("Completed evaluation for Dev Phase" )
242
220
0 commit comments