@@ -184,51 +184,47 @@ def evaluate(test_annotation_file, user_submission_file, phase_codename, **kwarg
184
184
# }
185
185
# )
186
186
187
- # The following line might need adjustment depending on EvalAI requirements.
188
- # If EvalAI expects a specific structure like the original one,
189
- # you might need to aggregate or select specific results.
190
- # For now, let's keep the first split's results for submission_result as an example.
191
- # if output["result"]:
192
- # total_ate = 0
193
- # total_rte = 0
194
- # total_le = 0
195
- # count = 0
196
-
197
- # for split_result in output["result"]:
198
- # # Each split_result is a dict like {"split_name": {"ATE": x, "RTE": y, "LE": z}}
199
- # # Get the inner metrics dictionary (assuming only one key per outer dict)
200
- # split_name = list(split_result.keys())[0]
201
- # metrics = split_result[split_name]
202
-
203
- # # Accumulate metrics if they exist and are not None
204
- # if metrics.get("ATE") is not None:
205
- # total_ate += metrics["ATE"]
206
- # if metrics.get("RTE") is not None:
207
- # total_rte += metrics["RTE"]
208
- # if metrics.get("LE") is not None:
209
- # total_le += metrics["LE"]
210
- # count += 1 # Increment count for each split processed
211
-
212
- # # Calculate averages, handle division by zero if count is 0
213
- # if count > 0:
214
- # avg_ate = total_ate / count
215
- # avg_rte = total_rte / count
216
- # avg_le = total_le / count
217
- # output["submission_result"] = {
218
- # "ATE": avg_ate,
219
- # "RTE": avg_rte,
220
- # "LE": avg_le
221
- # }
222
- # else:
223
- # # Handle case with no valid metrics found
224
- # output["submission_result"] = {
225
- # "ATE": None,
226
- # "RTE": None,
227
- # "LE": None
228
- # }
229
- # else:
230
- # output["submission_result"] = {} # Handle case with no evaluated metrics
231
- output ["submission_result" ] = output ["result" ][0 ]["heap" ]
187
+ if output ["result" ]:
188
+ total_ate = 0
189
+ total_rte = 0
190
+ total_le = 0
191
+ count = 0
192
+
193
+ for split_result in output ["result" ]:
194
+ # Each split_result is a dict like {"split_name": {"ATE": x, "RTE": y, "LE": z}}
195
+ # Get the inner metrics dictionary (assuming only one key per outer dict)
196
+ split_name = list (split_result .keys ())[0 ]
197
+ metrics = split_result [split_name ]
198
+
199
+ # Accumulate metrics if they exist and are not None
200
+ if metrics .get ("ATE" ) is not None :
201
+ total_ate += metrics ["ATE" ]
202
+ if metrics .get ("RTE" ) is not None :
203
+ total_rte += metrics ["RTE" ]
204
+ if metrics .get ("LE" ) is not None :
205
+ total_le += metrics ["LE" ]
206
+ count += 1 # Increment count for each split processed
207
+
208
+ # Calculate averages, handle division by zero if count is 0
209
+ if count > 0 :
210
+ avg_ate = total_ate / count
211
+ avg_rte = total_rte / count
212
+ avg_le = total_le / count
213
+ output ["submission_result" ] = {
214
+ "ATE" : avg_ate ,
215
+ "RTE" : avg_rte ,
216
+ "LE" : avg_le
217
+ }
218
+ else :
219
+ # Handle case with no valid metrics found
220
+ output ["submission_result" ] = {
221
+ "ATE" : None ,
222
+ "RTE" : None ,
223
+ "LE" : None
224
+ }
225
+ else :
226
+ output ["submission_result" ] = {} # Handle case with no evaluated metrics
227
+
232
228
# Placeholder for submission metadata based on the requested format.
233
229
# Actual values should be populated based on evaluation results if applicable.
234
230
output ["submission_metadata" ] = json .dumps (kwargs ['submission_metadata' ])
0 commit comments