7
7
8
8
9
9
def evaluate (test_annotation_file , user_submission_file , phase_codename , ** kwargs ):
10
+
11
+ print ("\n " + "=" * 80 )
12
+ print ("🔁 NEW RUN STARTING" )
13
+ print ("=" * 80 + "\n " )
14
+ sys .stdout .flush ()
15
+
10
16
# script_dir = os.path.dirname(os.path.abspath(__file__))
11
- print ("\033 [92mStarting Evaluation.....\033 [0m " )
17
+ print ("Starting Evaluation....." )
12
18
# print(kwargs['submission_metadata'])
13
19
output = {}
14
20
# evaluated_metrics = []
@@ -121,6 +127,7 @@ def evaluate(test_annotation_file, user_submission_file, phase_codename, **kwarg
121
127
122
128
123
129
print ("\033 [91mPossible error\033 [0m" , file = sys .stderr )
130
+ print ("❌ Fatal error while parsing" , file = sys .stderr )
124
131
125
132
output ["result" ] = [
126
133
{
@@ -153,6 +160,17 @@ def evaluate(test_annotation_file, user_submission_file, phase_codename, **kwarg
153
160
},
154
161
]
155
162
163
+ print ("🔧 Installing dependencies..." )
164
+ print ("📥 Loading annotation file..." )
165
+ print ("🧪 Evaluating predictions..." )
166
+ print ("📈 Accuracy: 92.3%" )
167
+ print ("✅ Evaluation complete!" )
168
+
169
+ print ("⚠️ Warning: trajectory misaligned" , file = sys .stderr )
170
+ print ("❌ Evaluation failed due to missing file" , file = sys .stderr )
171
+
172
+ print ("\n " + "🧵" * 20 + " LOG START " + "🧵" * 20 )
173
+
156
174
# for i, eval_result in enumerate(evaluated_metrics):
157
175
# metrics = eval_result["metrics"]
158
176
# # Use filename or index to create split names
@@ -172,46 +190,47 @@ def evaluate(test_annotation_file, user_submission_file, phase_codename, **kwarg
172
190
# If EvalAI expects a specific structure like the original one,
173
191
# you might need to aggregate or select specific results.
174
192
# For now, let's keep the first split's results for submission_result as an example.
175
- if output ["result" ]:
176
- total_ate = 0
177
- total_rte = 0
178
- total_le = 0
179
- count = 0
180
-
181
- for split_result in output ["result" ]:
182
- # Each split_result is a dict like {"split_name": {"ATE": x, "RTE": y, "LE": z}}
183
- # Get the inner metrics dictionary (assuming only one key per outer dict)
184
- split_name = list (split_result .keys ())[0 ]
185
- metrics = split_result [split_name ]
186
-
187
- # Accumulate metrics if they exist and are not None
188
- if metrics .get ("ATE" ) is not None :
189
- total_ate += metrics ["ATE" ]
190
- if metrics .get ("RTE" ) is not None :
191
- total_rte += metrics ["RTE" ]
192
- if metrics .get ("LE" ) is not None :
193
- total_le += metrics ["LE" ]
194
- count += 1 # Increment count for each split processed
195
-
196
- # Calculate averages, handle division by zero if count is 0
197
- if count > 0 :
198
- avg_ate = total_ate / count
199
- avg_rte = total_rte / count
200
- avg_le = total_le / count
201
- output ["submission_result" ] = {
202
- "ATE" : avg_ate ,
203
- "RTE" : avg_rte ,
204
- "LE" : avg_le
205
- }
206
- else :
207
- # Handle case with no valid metrics found
208
- output ["submission_result" ] = {
209
- "ATE" : None ,
210
- "RTE" : None ,
211
- "LE" : None
212
- }
213
- else :
214
- output ["submission_result" ] = {} # Handle case with no evaluated metrics
193
+ # if output["result"]:
194
+ # total_ate = 0
195
+ # total_rte = 0
196
+ # total_le = 0
197
+ # count = 0
198
+
199
+ # for split_result in output["result"]:
200
+ # # Each split_result is a dict like {"split_name": {"ATE": x, "RTE": y, "LE": z}}
201
+ # # Get the inner metrics dictionary (assuming only one key per outer dict)
202
+ # split_name = list(split_result.keys())[0]
203
+ # metrics = split_result[split_name]
204
+
205
+ # # Accumulate metrics if they exist and are not None
206
+ # if metrics.get("ATE") is not None:
207
+ # total_ate += metrics["ATE"]
208
+ # if metrics.get("RTE") is not None:
209
+ # total_rte += metrics["RTE"]
210
+ # if metrics.get("LE") is not None:
211
+ # total_le += metrics["LE"]
212
+ # count += 1 # Increment count for each split processed
213
+
214
+ # # Calculate averages, handle division by zero if count is 0
215
+ # if count > 0:
216
+ # avg_ate = total_ate / count
217
+ # avg_rte = total_rte / count
218
+ # avg_le = total_le / count
219
+ # output["submission_result"] = {
220
+ # "ATE": avg_ate,
221
+ # "RTE": avg_rte,
222
+ # "LE": avg_le
223
+ # }
224
+ # else:
225
+ # # Handle case with no valid metrics found
226
+ # output["submission_result"] = {
227
+ # "ATE": None,
228
+ # "RTE": None,
229
+ # "LE": None
230
+ # }
231
+ # else:
232
+ # output["submission_result"] = {} # Handle case with no evaluated metrics
233
+ output ["submission_result" ] = output ["result" ][0 ]["heap" ]
215
234
# Placeholder for submission metadata based on the requested format.
216
235
# Actual values should be populated based on evaluation results if applicable.
217
236
output ["submission_metadata" ] = json .dumps (kwargs ['submission_metadata' ])
0 commit comments