@@ -100,6 +100,7 @@ public TransportUndeployModelAction(
100
100
101
101
@ Override
102
102
protected void doExecute (Task task , MLUndeployModelNodesRequest request , ActionListener <MLUndeployModelNodesResponse > listener ) {
103
+ log .info ("Executing undeploy for models: {}" , Arrays .toString (request .getModelIds ()));
103
104
ActionListener <MLUndeployModelNodesResponse > wrappedListener = ActionListener .wrap (undeployModelNodesResponse -> {
104
105
processUndeployModelResponseAndUpdate (request .getTenantId (), undeployModelNodesResponse , listener );
105
106
}, listener ::onFailure );
@@ -112,6 +113,7 @@ void processUndeployModelResponseAndUpdate(
112
113
ActionListener <MLUndeployModelNodesResponse > listener
113
114
) {
114
115
List <MLUndeployModelNodeResponse > responses = undeployModelNodesResponse .getNodes ();
116
+ log .debug ("Processing undeploy model responses from nodes" );
115
117
if (responses == null || responses .isEmpty ()) {
116
118
listener .onResponse (undeployModelNodesResponse );
117
119
return ;
@@ -135,9 +137,10 @@ void processUndeployModelResponseAndUpdate(
135
137
136
138
Map <String , String > modelUndeployStatus = r .getModelUndeployStatus ();
137
139
for (Map .Entry <String , String > entry : modelUndeployStatus .entrySet ()) {
140
+ String modelId = entry .getKey ();
138
141
String status = entry .getValue ();
142
+ log .debug ("Model status of model {} on node {}: {}" , modelId , r .getNode ().getId (), status );
139
143
if (UNDEPLOYED .equals (status )) {
140
- String modelId = entry .getKey ();
141
144
if (!actualRemovedNodesMap .containsKey (modelId )) {
142
145
actualRemovedNodesMap .put (modelId , new ArrayList <>());
143
146
}
@@ -154,6 +157,7 @@ void processUndeployModelResponseAndUpdate(
154
157
MLSyncUpNodesRequest syncUpRequest = new MLSyncUpNodesRequest (nodeFilter .getAllNodes (), syncUpInput );
155
158
try (ThreadContext .StoredContext context = client .threadPool ().getThreadContext ().stashContext ()) {
156
159
if (!actualRemovedNodesMap .isEmpty ()) {
160
+ log .debug ("Models undeployed from nodes: {}" , actualRemovedNodesMap );
157
161
BulkDataObjectRequest bulkRequest = BulkDataObjectRequest .builder ().globalIndex (ML_MODEL_INDEX ).build ();
158
162
Map <String , Boolean > deployToAllNodes = new HashMap <>();
159
163
for (String modelId : actualRemovedNodesMap .keySet ()) {
@@ -166,8 +170,10 @@ void processUndeployModelResponseAndUpdate(
166
170
* we need to update both planning worker nodes (count) and current worker nodes (count)
167
171
* and deployToAllNodes value in model index.
168
172
*/
173
+ log .debug ("Updating metadata for model {}: removedNodes={}" , modelId , removedNodes );
169
174
Map <String , Object > updateDocument = new HashMap <>();
170
- if (modelWorkNodesBeforeRemoval .get (modelId ).length == removedNodeCount ) { // undeploy all nodes.
175
+ if (modelWorkNodesBeforeRemoval .get (modelId ).length == removedNodeCount ) {
176
+ log .debug ("All nodes removed for model {}. Marking as undeployed." , modelId );// undeploy all nodes.
171
177
updateDocument .put (MLModel .PLANNING_WORKER_NODES_FIELD , ImmutableList .of ());
172
178
updateDocument .put (MLModel .PLANNING_WORKER_NODE_COUNT_FIELD , 0 );
173
179
updateDocument .put (MLModel .CURRENT_WORKER_NODE_COUNT_FIELD , 0 );
@@ -180,6 +186,12 @@ void processUndeployModelResponseAndUpdate(
180
186
.stream (modelWorkNodesBeforeRemoval .get (modelId ))
181
187
.filter (x -> !removedNodes .contains (x ))
182
188
.collect (Collectors .toList ());
189
+ log
190
+ .debug (
191
+ "Partially undeployed for model {} with remaining planning worker nodes: {}" ,
192
+ modelId ,
193
+ newPlanningWorkerNodes
194
+ );
183
195
updateDocument .put (MLModel .PLANNING_WORKER_NODES_FIELD , newPlanningWorkerNodes );
184
196
updateDocument .put (MLModel .PLANNING_WORKER_NODE_COUNT_FIELD , newPlanningWorkerNodes .size ());
185
197
updateDocument .put (MLModel .CURRENT_WORKER_NODE_COUNT_FIELD , newPlanningWorkerNodes .size ());
@@ -195,6 +207,7 @@ void processUndeployModelResponseAndUpdate(
195
207
bulkRequest .add (updateRequest ).setRefreshPolicy (WriteRequest .RefreshPolicy .IMMEDIATE );
196
208
}
197
209
syncUpInput .setDeployToAllNodes (deployToAllNodes );
210
+ log .debug ("Sending bulk metadata update request for undeploy" );
198
211
ActionListener <BulkResponse > actionListener = ActionListener .wrap (r -> {
199
212
log
200
213
.debug (
@@ -203,6 +216,7 @@ void processUndeployModelResponseAndUpdate(
203
216
);
204
217
}, e -> { log .error ("Failed to update model state as undeployed" , e ); });
205
218
ActionListener <BulkResponse > wrappedListener = ActionListener .runAfter (actionListener , () -> {
219
+ log .debug ("Triggering sync-up after bulk update for undeploy" );
206
220
syncUpUndeployedModels (syncUpRequest );
207
221
listener .onResponse (undeployModelNodesResponse );
208
222
});
@@ -288,11 +302,14 @@ private MLUndeployModelNodeResponse createUndeployModelNodeResponse(MLUndeployMo
288
302
289
303
boolean specifiedModelIds = modelIds != null && modelIds .length > 0 ;
290
304
String [] removedModelIds = specifiedModelIds ? modelIds : mlModelManager .getAllModelIds ();
305
+
306
+ log .debug ("Models to undeploy: {}" , Arrays .toString (removedModelIds ));
291
307
if (removedModelIds != null ) {
292
308
for (String modelId : removedModelIds ) {
293
309
FunctionName functionName = mlModelManager .getModelFunctionName (modelId );
294
310
String [] workerNodes = mlModelManager .getWorkerNodes (modelId , functionName );
295
311
modelWorkerNodesMap .put (modelId , workerNodes );
312
+ log .debug ("Retrieved worker nodes for model {}: {}" , modelId , Arrays .toString (workerNodes ));
296
313
}
297
314
}
298
315
0 commit comments