@@ -140,6 +140,8 @@ Resources:
140
140
Action :
141
141
- ecs:PutClusterCapacityProviders
142
142
- ecs:DescribeClusters
143
+ - ecs:UpdateService
144
+ - ecs:DescribeServices
143
145
- logs:CreateLogGroup
144
146
- logs:CreateLogStream
145
147
- logs:PutLogEvents
@@ -162,40 +164,70 @@ Resources:
162
164
ecs_client = boto3.client('ecs')
163
165
cluster_name = os.environ['ECS_CLUSTER_NAME']
164
166
capacity_provider_name = event['ResourceProperties']['CapacityProvider']
167
+
168
+ def try_update_with_retry():
169
+ # Simple retry mechanism - try twice with a delay
170
+ try:
171
+ return _do_update()
172
+ except Exception as e:
173
+ if 'UpdateInProgressException' in str(e):
174
+ print("Cluster busy, waiting 30 seconds before retry...")
175
+ import time
176
+ time.sleep(30)
177
+ return _do_update() # Try once more
178
+ else:
179
+ raise # Re-raise if it's not the specific error we're handling
180
+
181
+ def _do_update():
182
+ cluster_info = ecs_client.describe_clusters(clusters=[cluster_name])['clusters'][0]
183
+ current_capacity_providers = cluster_info.get('capacityProviders', [])
184
+
185
+ if capacity_provider_name not in current_capacity_providers:
186
+ current_capacity_providers.append(capacity_provider_name)
187
+
188
+ return ecs_client.put_cluster_capacity_providers(
189
+ cluster=cluster_name,
190
+ capacityProviders=current_capacity_providers,
191
+ defaultCapacityProviderStrategy=[
192
+ {
193
+ 'capacityProvider': capacity_provider_name,
194
+ 'weight': 1,
195
+ 'base': 0
196
+ }
197
+ ]
198
+ )
199
+
165
200
try:
166
201
if event['RequestType'] in ['Create', 'Update']:
167
- cluster_info = ecs_client.describe_clusters(clusters=[cluster_name])['clusters'][0]
168
- current_capacity_providers = cluster_info.get('capacityProviders', [])
169
-
170
- if capacity_provider_name not in current_capacity_providers:
171
- current_capacity_providers.append(capacity_provider_name)
172
-
173
- ecs_client.put_cluster_capacity_providers(
174
- cluster=cluster_name,
175
- capacityProviders=current_capacity_providers,
176
- defaultCapacityProviderStrategy=[
177
- {
178
- 'capacityProvider': capacity_provider_name,
179
- 'weight': 1,
180
- 'base': 0
181
- }
182
- ]
183
- )
202
+ try_update_with_retry()
184
203
elif event['RequestType'] == 'Delete':
185
- # Retrieve current capacity providers
186
- cluster_info = ecs_client.describe_clusters(clusters=[cluster_name])['clusters'][0]
187
- current_capacity_providers = cluster_info.get('capacityProviders', [])
188
-
189
- # Remove only the specific capacity provider
190
- updated_capacity_providers = [
191
- cp for cp in current_capacity_providers if cp != capacity_provider_name
192
- ]
193
-
194
- ecs_client.put_cluster_capacity_providers(
195
- cluster=cluster_name,
196
- capacityProviders=updated_capacity_providers,
197
- defaultCapacityProviderStrategy=[]
198
- )
204
+ def _do_delete():
205
+ # Retrieve current capacity providers
206
+ cluster_info = ecs_client.describe_clusters(clusters=[cluster_name])['clusters'][0]
207
+ current_capacity_providers = cluster_info.get('capacityProviders', [])
208
+
209
+ # Remove only the specific capacity provider
210
+ updated_capacity_providers = [
211
+ cp for cp in current_capacity_providers if cp != capacity_provider_name
212
+ ]
213
+
214
+ return ecs_client.put_cluster_capacity_providers(
215
+ cluster=cluster_name,
216
+ capacityProviders=updated_capacity_providers,
217
+ defaultCapacityProviderStrategy=[]
218
+ )
219
+
220
+ # Simple retry for delete operation too
221
+ try:
222
+ _do_delete()
223
+ except Exception as e:
224
+ if 'UpdateInProgressException' in str(e):
225
+ print("Cluster busy during delete, waiting 30 seconds before retry...")
226
+ import time
227
+ time.sleep(30)
228
+ _do_delete() # Try once more
229
+ else:
230
+ raise
199
231
cfnresponse.send(event, context, cfnresponse.SUCCESS, {})
200
232
except Exception as e:
201
233
cfnresponse.send(event, context, cfnresponse.FAILED, {'Error': str(e)})
@@ -240,6 +272,38 @@ Resources:
240
272
response_data = {'DnsName': dns_name}
241
273
cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data)
242
274
275
+ def force_api_router_deployment(event, context):
276
+ """
277
+ Forces a new deployment for the APIRouterService.
278
+ This will restart the service with the latest task definition.
279
+ """
280
+ ecs_client = boto3.client('ecs')
281
+ cluster_name = os.environ['ECS_CLUSTER_NAME']
282
+ service_name = "EMD-API-Router"
283
+
284
+ try:
285
+ # Check if the service exists
286
+ response = ecs_client.describe_services(
287
+ cluster=cluster_name,
288
+ services=[service_name]
289
+ )
290
+
291
+ if not response['services'] or response['services'][0]['status'] != 'ACTIVE':
292
+ raise Exception(f"Service {service_name} not found or not active in cluster {cluster_name}")
293
+
294
+ # Force a new deployment
295
+ ecs_client.update_service(
296
+ cluster=cluster_name,
297
+ service=service_name,
298
+ forceNewDeployment=True
299
+ )
300
+
301
+ response_data = {'Message': f"Forced new deployment for {service_name}"}
302
+ cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data)
303
+ except Exception as e:
304
+ print(f"Error forcing deployment: {str(e)}")
305
+ cfnresponse.send(event, context, cfnresponse.FAILED, {'Error': str(e)})
306
+
243
307
def handler(event, context):
244
308
print(event)
245
309
print(context)
0 commit comments