Skip to content
Open
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ffab34a
Add json TD specification for production and staging, from AWS unmodi…
frankhereford Oct 21, 2025
32173cd
Rename workflow and helper script, and enhance task names in job defi…
frankhereford Oct 21, 2025
21896fb
Add placeholder job to migration metadata workflow for completion not…
frankhereford Oct 22, 2025
daee945
rename this file to match
frankhereford Oct 23, 2025
6a120da
remove header; there is a `name` directly below it with the same content
frankhereford Oct 23, 2025
8bc92ad
disarm the migration job for development
frankhereford Oct 23, 2025
3489c5f
Update path for triggering files
frankhereford Oct 23, 2025
e6fd5e5
Start to address boilerplate code
frankhereford Oct 23, 2025
e3efa70
allow for this branch to trigger actions for dev
frankhereford Oct 27, 2025
19b9a5b
typo in file name
frankhereford Oct 27, 2025
9623be3
Add ECS task definition validation and update process to deployment w…
frankhereford Oct 27, 2025
5b879d6
Add AWS cli tooling
frankhereford Oct 27, 2025
ecdaca6
Use official installation instructions from AWS for cli
frankhereford Oct 27, 2025
f026504
V2 out of the ubuntu repo
frankhereford Oct 27, 2025
43d88f7
Update AWS CLI installation method in deployment workflow
frankhereford Oct 27, 2025
c036b93
Refactor ECS task definition handling in deployment workflow
frankhereford Oct 27, 2025
bb46adf
Add ECS task family environment variable to deployment helper script
frankhereford Oct 27, 2025
23c1cea
Checking if file changed in this push
frankhereford Oct 27, 2025
337196f
Sync up to latest graphql-engine
frankhereford Oct 27, 2025
8b54352
try a more robust approach to change detection
frankhereford Oct 27, 2025
1828ec6
Update workflow, but not TD, to test normalization technique
frankhereford Oct 27, 2025
ce0a54b
OK, this may be working -- bump the ram for example
frankhereford Oct 27, 2025
78b49b1
Add deregistration support
frankhereford Oct 27, 2025
8fc4d06
we need to make sure we're running the latest before we deregister an…
frankhereford Oct 27, 2025
d071d51
Add ECS service update functionality to deployment helper script
frankhereford Oct 27, 2025
91ce1f6
Restore previous 2G memory allocation
frankhereford Oct 27, 2025
6710547
Allow deregistration of old TDs now
frankhereford Oct 27, 2025
1bc6344
Enhance logging in deployment helper script to indicate task definiti…
frankhereford Oct 27, 2025
fc87608
Reduce CPU allocation in ECS task definition from 512 to 256
frankhereford Oct 27, 2025
c4ea1ab
Got a free test of invalid TD's on that one, worked good!
frankhereford Oct 27, 2025
d21cc06
TD EOF Newlines
frankhereford Oct 27, 2025
1f172a6
Get the version right now that dev is done
frankhereford Oct 27, 2025
ce31eea
john testing: bump memory and memoryReservation to 4096
johnclary Oct 30, 2025
dea7f4d
john testing: bump task memory to 4096
johnclary Oct 30, 2025
8439c60
john testing: revert everything
johnclary Oct 30, 2025
93dc1d7
john testing: add env var
johnclary Oct 30, 2025
ebe3748
john testing: remove env var
johnclary Oct 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 0 additions & 50 deletions .github/workflows/atd_moped_database.yml
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're doing some renaming of some files here to give them names that better represent what they do, not what they act upon.

This file was deleted.

58 changes: 0 additions & 58 deletions .github/workflows/aws-moped-migrations-helper.sh

This file was deleted.

261 changes: 261 additions & 0 deletions .github/workflows/migrations-metadata-deployment-helper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
#!/usr/bin/env bash

case "${BRANCH_NAME}" in
"production")
export WORKING_STAGE="production"
;;
*)
export WORKING_STAGE="staging"
;;
esac

echo "SOURCE -> BRANCH_NAME: ${BRANCH_NAME}"
echo "SOURCE -> WORKING_STAGE: ${WORKING_STAGE}"

################################################################################
# JOB: apply-migrations-and-metadata
# Functions for downloading Hasura settings and applying migrations/metadata
################################################################################

#
# Download the Hasura settings from the AWS Secrets Manager
Copy link
Collaborator

@mddilley mddilley Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you for cleaning up this last confusing reference to Zappa 😵‍💫🙏 and improving the comments ✨

#
function download_hasura_settings() {
echo "Downloading Hasura Settings: ${WORKING_STAGE}";

if [[ "${WORKING_STAGE}" == "production" ]]; then
export AWS_HASURA_CONFIGURATION="${AWS_MOPED_HASURA_CONFIGURATION_FILE_PRODUCTION}";
else
export AWS_HASURA_CONFIGURATION="${AWS_MOPED_HASURA_CONFIGURATION_FILE_STAGING}";
fi;

aws secretsmanager get-secret-value \
--secret-id "${AWS_HASURA_CONFIGURATION}" | \
jq -rc ".SecretString" > config.yaml;
}

#
# Applies migrations and metadata to the Hasura instance
#
function run_migration() {
echo "----- MIGRATIONS STARTED -----";
hasura --skip-update-check version;
echo "Applying migration";
hasura migrate apply \
--skip-update-check \
--disable-interactive \
--database-name default;
echo "Applying metadata";
hasura metadata apply \
--skip-update-check;
echo "----- MIGRATIONS FINISHED -----";
}

#
# Controls the migration process (main entry point for migrations job)
#
function run_migration_process() {
cd ./moped-database;
echo "Running migration process @ ${PWD}"
download_hasura_settings;
run_migration;
}

################################################################################
# JOB: update-ecs-task-deployment
# Functions for validating and deploying ECS task definitions
################################################################################

#
# Determines which ECS task definition file to use based on the branch
# Sets TD_FILE and ENVIRONMENT variables
#
function determine_task_definition_file() {
echo "Branch name: ${BRANCH_NAME}";

# Determine environment based on branch
if [ "${BRANCH_NAME}" = "production" ]; then
export ENVIRONMENT="production"
export TD_FILE="moped-database/ecs_task_definitions/production.graphql-engine.ecs-td.json"
export FAMILY="atd-moped-production"
export CLUSTER="atd-moped-cluster-production"
else
export ENVIRONMENT="staging"
export TD_FILE="moped-database/ecs_task_definitions/staging.graphql-engine.ecs-td.json"
export FAMILY="atd-moped-staging"
export CLUSTER="atd-moped-cluster-staging"
fi

export SERVICE="graphql-engine"

echo "Environment: ${ENVIRONMENT}";
echo "Cluster: ${CLUSTER}";
echo "Service: ${SERVICE}";
echo "Family: ${FAMILY}";
echo "Task definition file: ${TD_FILE}";
}

#
# Compares the local task definition file with the one currently in AWS
# Returns 0 if different (needs update), 1 if identical (no update needed)
#
function check_task_definition_differs() {
echo "Fetching current task definition from AWS for family: ${FAMILY}...";

# Describe the current task definition from AWS
# If this is the first task definition, the command will fail and we'll register it
if ! aws ecs describe-task-definition \
--task-definition ${FAMILY} \
--output json > /tmp/aws-task-def.json 2>/dev/null; then
echo "No existing task definition found in AWS, will register new one";
return 0;
fi

echo "Extracting task definition from AWS response...";

# Extract just the taskDefinition object and remove AWS-managed fields
# These fields are added by AWS and shouldn't be compared
jq --sort-keys '.taskDefinition | del(.taskDefinitionArn, .revision, .status, .requiresAttributes, .compatibilities, .registeredAt, .registeredBy, .deregisteredAt, .tags)' \
Copy link
Member Author

@frankhereford frankhereford Oct 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sort keys is important here to make the ordering of KV pairs in the JSON deterministic, because we actually compare these json blobs with diff as if they were code, and not by doing a == inside JS or python or what-have-you.

Anyway - with jq here, we strip out things that are intrinsic (such as those items supplied by AWS) and we only look at what we're actually defining.

An upside of doing it this way is that it becomes trivial for us to include the output of that diff in the logs, which is a super easy way for a dev to figure out what change caused the new task definition to be deployed into the service.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's really cool to see how you solved this. How are you feeling about how future proof this is, and also if concerns about false positives due to minor formatting differences?

Copy link
Collaborator

@mddilley mddilley Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@frankhereford I might be way overthinking, but I'm curious if we could consider deploying the task definition if the version-controlled file changes at all instead of comparing to what is currently deployed. Since we will be reviewing any code changes in a PR, we could give ourselves responsibility for making sure that we understand the changes we are pushing to be deployed in the service (like we do right now during the launch parties).

I'm also thinking about needing to maintain this list of fields that are added by AWS in the future. I really could be worrying to much but wanted to get some discussion going on this in case I am overlooking something.

/tmp/aws-task-def.json > /tmp/aws-task-def-normalized.json

# Normalize the local file the same way (remove the same fields if present)
jq --sort-keys 'del(.taskDefinitionArn, .revision, .status, .requiresAttributes, .compatibilities, .registeredAt, .registeredBy, .deregisteredAt, .tags)' \
${TD_FILE} > /tmp/local-task-def-normalized.json

echo "Comparing local task definition with AWS version...";

# Compare the normalized JSON files
if diff -q /tmp/aws-task-def-normalized.json /tmp/local-task-def-normalized.json > /dev/null; then
echo "🛑 Task definitions are identical, no update needed";
return 1;
else
echo "✓ Task definitions differ, update needed";
echo "";
echo "========================================";
echo "Differences (AWS version vs Local file):";
echo "========================================";
diff -u /tmp/aws-task-def-normalized.json /tmp/local-task-def-normalized.json || true
echo "========================================";
echo "";
return 0;
fi
}

#
# Deregisters old task definitions, keeping only the last 3 active ones
#
function cleanup_old_task_definitions() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a really nice touch, and i'm mostly taking it for granted that this works as promised 👍

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i see now that it does work! (with my latest revision the most recent of the three) ✨

Image

Copy link
Member

@johnclary johnclary Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@frankhereford one last thought on this. I wonder if we might want to increase to preserving maybe 10 of the most recent revisions. Reason being that although the latest task definition may install successfully, it is still possible that the definition has a faulty configuration that causes the container deployment to fail. If something goes sideways, it might be a relief to have a working task def available in AWS so we can recover quickly while we triage the issue. This would be excessive caution, of course, because we do have task definitions version controlled in Github as well.

echo "Cleaning up old task definitions for family: ${FAMILY}...";

# List all ACTIVE task definition revisions for this family, sorted by revision number
local active_revisions=$(aws ecs list-task-definitions \
--family-prefix ${FAMILY} \
--status ACTIVE \
--sort DESC \
--output json | jq -r '.taskDefinitionArns[]')

if [ -z "$active_revisions" ]; then
echo "No active task definitions found to clean up";
return 0;
fi

# Count total active revisions
local total_count=$(echo "$active_revisions" | wc -l | tr -d ' ')
echo "Found ${total_count} active task definition(s)";

# If we have 3 or fewer, no cleanup needed
if [ "$total_count" -le 3 ]; then
echo "Only ${total_count} active revision(s), no cleanup needed";
return 0;
fi

# Skip the first 3 (most recent) and deregister the rest
local to_deregister=$(echo "$active_revisions" | tail -n +4)
local deregister_count=$(echo "$to_deregister" | wc -l | tr -d ' ')

echo "Deregistering ${deregister_count} old revision(s), keeping the 3 most recent...";

while IFS= read -r task_def_arn; do
if [ -n "$task_def_arn" ]; then
echo "Deregistering: ${task_def_arn}";
if aws ecs deregister-task-definition --task-definition "${task_def_arn}" > /dev/null; then
echo " ✓ Deregistered successfully";
else
echo " ✗ Failed to deregister";
fi
fi
done <<< "$to_deregister"

echo "Cleanup complete!";
}

#
# Registers the ECS task definition using AWS CLI
# Returns 0 if successful, exits with error if registration fails or file not found
# Returns 1 if no registration was needed (file unchanged)
#
function register_task_definition() {
# Check if task definition file exists
if [ ! -f "${TD_FILE}" ]; then
echo "Task definition file not found: ${TD_FILE}";
echo "Skipping ECS task definition update";
return 1;
fi

# Check if the task definition differs from what's in AWS
if ! check_task_definition_differs; then
echo "Skipping ECS task definition registration";
return 1;
fi

echo "Registering updated task definition...";

# Register the task definition using AWS CLI
if aws ecs register-task-definition \
--family ${FAMILY} \
--cli-input-json file://${TD_FILE}; then
echo "✓ Task definition registered successfully!";
return 0;
else
echo "✗ Task definition registration failed!";
exit 1;
fi
}

#
# Updates the ECS service to use the latest task definition
#
function update_ecs_service() {
echo "Updating ECS service to use the new task definition...";
echo "Cluster: ${CLUSTER}";
echo "Service: ${SERVICE}";
echo "Family: ${FAMILY}";

# Update the service to use the latest task definition from the family
if aws ecs update-service \
--cluster ${CLUSTER} \
--service ${SERVICE} \
--task-definition ${FAMILY} \
--force-new-deployment; then
echo "✓ ECS service updated successfully!";
echo "The service will now use the new task definition";
return 0;
else
echo "✗ Failed to update ECS service";
exit 1;
fi
}

#
# Main entry point for ECS task definition update process
# Determines the correct file, registers it if needed, and cleans up old revisions
#
function update_ecs_task_definition_process() {
determine_task_definition_file;

# Only run cleanup and service update if we successfully registered a new task definition
if register_task_definition; then
cleanup_old_task_definitions;
update_ecs_service;
fi
}
Loading