@@ -127,23 +127,6 @@ func GetCluster(project, location, name string) (*apis.Cluster, error) {
127
127
}
128
128
129
129
func PauseCluster (cluster * apis.Cluster , dryRun bool ) error {
130
- resize := func (cluster * apis.Cluster , pool * apis.Cluster_NodePool ) error {
131
- _ , err := exec .Run (exec .Command ("gcloud" ,
132
- "container" ,
133
- "clusters" ,
134
- "resize" , cluster .Name ,
135
- "--project" , cluster .Project ,
136
- "--location" , cluster .Location ,
137
- "--node-pool" , pool .Name ,
138
- "--num-nodes" , "0" ,
139
- "--quiet" ,
140
- ))
141
- if err != nil {
142
- return errors .Wrapf (err , "pause cluster: resize pool '%s/%s'" , cluster .Name , pool .Name )
143
- }
144
- return nil
145
- }
146
-
147
130
pause := func (cluster * apis.Cluster , pool * apis.Cluster_NodePool ) error {
148
131
if pool .GetAutoscaling () != nil || pool .GetAutoscaling ().GetEnabled () {
149
132
_ , err := exec .Run (exec .Command ("gcloud" ,
@@ -166,40 +149,10 @@ func PauseCluster(cluster *apis.Cluster, dryRun bool) error {
166
149
return nil
167
150
}
168
151
169
- ticker := time .NewTicker (time .Second )
170
-
171
- // resize node pool isn't completed at the first time. After disable the autoscaling, GCP set the nodeCount is
172
- // the initialNodeCount. Currently, we can't change the initialNodeCount setting.
173
- if err := resize (cluster , pool ); err != nil {
174
- return err
175
- }
176
- // this will ensure that the nodeCount will be 0
177
- for {
178
- select {
179
- case <- ticker .C :
180
- if size := getNodePoolSize (cluster .Project , cluster .Name , pool .Name ); size == 0 {
181
- log .Printf ("INFO: resized pool for '%s/%s' is completed!\n " , cluster .Name , pool .Name )
182
- return nil
183
- } else {
184
- log .Printf ("INFO: current size of '%s/%s': %d\n " , cluster .Name , pool .Name , size )
185
- }
186
- _ , err := exec .Run (exec .Command ("gcloud" ,
187
- "container" ,
188
- "clusters" ,
189
- "resize" , cluster .Name ,
190
- "--project" , cluster .Project ,
191
- "--location" , cluster .Location ,
192
- "--node-pool" , pool .Name ,
193
- "--num-nodes" , "0" ,
194
- "--quiet" ,
195
- ))
196
- if err != nil {
197
- return errors .Wrapf (err , "pause cluster: resize pool '%s/%s'" , cluster .Name , pool .Name )
198
- }
199
- case <- context .Background ().Done ():
200
- return nil
201
- }
152
+ if err := resize (cluster , pool .Name , 0 , func (currentSize int ) bool { return currentSize == 0 }); err != nil {
153
+ return errors .Wrap (err , "pause cluster" )
202
154
}
155
+ return nil
203
156
}
204
157
205
158
var err error
@@ -222,6 +175,7 @@ func PauseCluster(cluster *apis.Cluster, dryRun bool) error {
222
175
223
176
func UnpauseCluster (cluster * apis.Cluster ) error {
224
177
unpause := func (cluster * apis.Cluster , p * apis.Cluster_NodePool ) error {
178
+ // re-enable autoscaling if the setting is presented
225
179
if p .GetAutoscaling () != nil && p .GetAutoscaling ().GetEnabled () {
226
180
_ , err := exec .Run (exec .Command ("gcloud" ,
227
181
"container" ,
@@ -240,21 +194,10 @@ func UnpauseCluster(cluster *apis.Cluster) error {
240
194
}
241
195
log .Printf ("INFO: enabled autoscaling for '%s/%s'\n " , cluster .Name , p .Name )
242
196
}
243
-
244
- _ , err := exec .Run (exec .Command ("gcloud" ,
245
- "container" ,
246
- "clusters" ,
247
- "resize" , cluster .Name ,
248
- "--project" , cluster .Project ,
249
- "--location" , cluster .Location ,
250
- "--node-pool" , p .GetName (),
251
- "--num-nodes" , strconv .Itoa (int (p .GetCurrentSize ())),
252
- "--quiet" ,
253
- ))
254
- if err != nil {
255
- return errors .Wrapf (err , "unpause cluster: resize pool '%s/%s'" , cluster .Name , p .Name )
197
+ size := int (p .GetCurrentSize ())
198
+ if err := resize (cluster , p .GetName (), size , func (currentSize int ) bool { return currentSize >= size }); err != nil {
199
+ return errors .Wrap (err , "unpause cluster" )
256
200
}
257
- log .Printf ("INFO: resized '%s/%s' to %d\n " , cluster .Name , p .GetName (), p .GetCurrentSize ())
258
201
return nil
259
202
}
260
203
@@ -353,6 +296,59 @@ func RefreshCluster(cluster *apis.Cluster, recreate bool) error {
353
296
return eg .Wait ()
354
297
}
355
298
299
+ func resize (cluster * apis.Cluster , pool string , size int , sizeCondition func (currentSize int ) bool ) error {
300
+ ticker := time .NewTicker (time .Second )
301
+ // resize a node pool might take up to 4 hours
302
+ ctx , cancel := context .WithTimeout (context .Background (), 4 * time .Hour )
303
+ defer cancel ()
304
+
305
+ for {
306
+ select {
307
+ case <- ticker .C :
308
+ if currentSize := getNodePoolSize (cluster .Project , cluster .Name , pool ); sizeCondition (currentSize ) {
309
+ log .Printf ("INFO: resized pool for '%s/%s' is completed!\n " , cluster .Name , pool )
310
+ return nil
311
+ } else {
312
+ log .Printf ("INFO: current size of '%s/%s': %d\n " , cluster .Name , pool , currentSize )
313
+ }
314
+
315
+ var op * Operation
316
+ var err error
317
+ if op , err = getOperation (cluster , pool , OperationFilter {Status : Running , OperationType : SetNodePoolSize }); err != nil {
318
+ log .Printf ("WARN: get container operation got error: %v" , err )
319
+ }
320
+
321
+ // if the cluster already in an operation, we must to need all operations complete
322
+ if op != nil {
323
+ log .Printf ("INFO: cluster %q already in an operation. Tell the process to wait until the operation complete.\n " , cluster .GetName ())
324
+ _ , err = exec .Run (exec .Command ("gcloud" , "container" , "operations" , "wait" , op .Name , "--location" , op .Zone , "--project" , cluster .GetProject ()))
325
+ if err != nil {
326
+ log .Printf ("WARN: cluster %q: wait operation %q incomplete: error: %v\n " , cluster .GetName (), op .Name , err )
327
+ }
328
+ // break the select; we will retry the whole process even the op success or not.
329
+ break
330
+ }
331
+
332
+ _ , err = exec .Run (exec .Command ("gcloud" ,
333
+ "container" ,
334
+ "clusters" ,
335
+ "resize" , cluster .Name ,
336
+ "--project" , cluster .Project ,
337
+ "--location" , cluster .Location ,
338
+ "--node-pool" , pool ,
339
+ "--num-nodes" , strconv .Itoa (size ),
340
+ "--quiet" ,
341
+ "--async" ,
342
+ ))
343
+ if err != nil {
344
+ return errors .Wrapf (err , "pause cluster: resize pool '%s/%s'" , cluster .Name , pool )
345
+ }
346
+ case <- ctx .Done ():
347
+ return nil
348
+ }
349
+ }
350
+ }
351
+
356
352
func getNodePoolSize (project , cluster , pool string ) int {
357
353
out , err := exec .Run (exec .Command ("gcloud" ,
358
354
"compute" ,
@@ -369,3 +365,33 @@ func getNodePoolSize(project, cluster, pool string) int {
369
365
val , _ := strconv .Atoi (out )
370
366
return val
371
367
}
368
+
369
+ func getOperation (cluster * apis.Cluster , pool string , filter OperationFilter ) (* Operation , error ) {
370
+ filterQuery := fmt .Sprintf ("target_link:*/%s/nodePools/%s" , cluster .GetName (), pool )
371
+ if filter .Status != "" {
372
+ filterQuery = fmt .Sprintf ("%s AND status:%s" , filterQuery , filter .Status )
373
+ }
374
+ if filter .OperationType != "" {
375
+ filterQuery = fmt .Sprintf ("%s AND operation_type:%s" , filterQuery , filter .OperationType )
376
+ }
377
+
378
+ raw , err := exec .Run (exec .Command ("gcloud" ,
379
+ "container" ,
380
+ "operations" ,
381
+ "list" ,
382
+ "--filter" , filterQuery ,
383
+ "--project" , cluster .GetProject (),
384
+ "--location" , cluster .GetLocation (),
385
+ "--format" , "json" ,
386
+ ))
387
+ if err != nil {
388
+ return nil , errors .Wrapf (err , "get operation" )
389
+ }
390
+
391
+ var ops []* Operation
392
+ _ = json .UnmarshalFromString (raw , & ops )
393
+ if len (ops ) == 0 {
394
+ return nil , nil
395
+ }
396
+ return ops [0 ], nil
397
+ }
0 commit comments