Skip to content

🎨 Add fallback traefik 503 routes 🚨⚠️ DEVOPS #7899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions services/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ services:
- traefik.http.services.${SWARM_STACK_NAME}_api-server.loadbalancer.healthcheck.path=/
- traefik.http.services.${SWARM_STACK_NAME}_api-server.loadbalancer.healthcheck.interval=2000ms
- traefik.http.services.${SWARM_STACK_NAME}_api-server.loadbalancer.healthcheck.timeout=1000ms
# NOTE: keep in sync with fallback router (rule and entrypoint)
- traefik.http.routers.${SWARM_STACK_NAME}_api-server.rule=(Path(`/`) || Path(`/v0`) || PathPrefix(`/v0/`) || Path(`/api/v0/openapi.json`))
- traefik.http.routers.${SWARM_STACK_NAME}_api-server.entrypoints=simcore_api
- traefik.http.routers.${SWARM_STACK_NAME}_api-server.priority=3
Expand Down Expand Up @@ -628,6 +629,7 @@ services:
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver.loadbalancer.healthcheck.interval=2000ms
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver.loadbalancer.healthcheck.timeout=1000ms
- traefik.http.middlewares.${SWARM_STACK_NAME}_static_webserver_retry.retry.attempts=2
# NOTE: keep in sync with fallback router (rule and entrypoint)
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.rule=(Path(`/osparc`) || Path(`/s4l`) || Path(`/s4llite`) || Path(`/s4lacad`) || Path(`/s4lengine`) || Path(`/s4ldesktop`) || Path(`/s4ldesktopacad`) || Path(`/tis`) || Path(`/tiplite`) || Path(`/transpiled`) || Path(`/resource`) || PathPrefix(`/osparc/`) || PathPrefix(`/s4l/`) || PathPrefix(`/s4llite/`) || PathPrefix(`/s4lacad/`) || PathPrefix(`/s4lengine/`) || PathPrefix(`/s4ldesktop/`) || PathPrefix(`/s4ldesktopacad/`) || PathPrefix(`/tis/`) || PathPrefix(`/tiplite/`) || PathPrefix(`/transpiled/`) || PathPrefix(`/resource/`))
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.service=${SWARM_STACK_NAME}_static_webserver
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.entrypoints=http
Expand Down Expand Up @@ -871,6 +873,7 @@ services:
# NOTE: stickyness must remain only for specific endpoints, see https://github.com/ITISFoundation/osparc-simcore/pull/4180
- traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_retry.retry.attempts=2
- traefik.http.routers.${SWARM_STACK_NAME}_webserver.service=${SWARM_STACK_NAME}_webserver
# NOTE: keep in sync with fallback router (rule and entrypoint)
- traefik.http.routers.${SWARM_STACK_NAME}_webserver.rule=(Path(`/`) || Path(`/v0`) || Path(`/socket.io/`) || Path(`/static-frontend-data.json`) || PathRegexp(`^/study/(?P<study_uuid>\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b)`) || Path(`/view`) || Path(`/#/view`) || Path(`/#/error`) || PathPrefix(`/v0/`))
- traefik.http.routers.${SWARM_STACK_NAME}_webserver.entrypoints=http
- traefik.http.routers.${SWARM_STACK_NAME}_webserver.priority=6
Expand Down Expand Up @@ -1452,6 +1455,56 @@ services:
- default
- interactive_services_subnet # for legacy dynamic services

# use to define fallback routes for simcore services
# if docker healthcheck fails, container's traefik configuration is removed
# leading to 404 https://github.com/traefik/traefik/issues/7842
#
# use fallback routes to return proper 503 (instead of 404)
# this service must be running at all times
traefik-configuration-placeholder:
image: busybox:1.35.0
command: sleep infinity
networks:
- default
deploy:
labels:
# route to internal traefik
- traefik.enable=true
- io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}

### Fallback for api-server
- traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.rule=(Path(`/`) || Path(`/v0`) || PathPrefix(`/v0/`) || Path(`/api/v0/openapi.json`))
- traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.service=${SWARM_STACK_NAME}_api-server_fallback
- traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.entrypoints=simcore_api
- traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.priority=1
# always fail and return 503 via unhealthy loadbalancer healthcheck
- traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.server.port=0 # port is required (otherwise traefik service is not created)
- traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
- traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.healthcheck.interval=10s
- traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.healthcheck.timeout=1ms

### Fallback for webserver
- traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.service=${SWARM_STACK_NAME}_webserver_fallback
- traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.rule=(Path(`/`) || Path(`/v0`) || Path(`/socket.io/`) || Path(`/static-frontend-data.json`) || PathRegexp(`^/study/(?P<study_uuid>\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b)`) || Path(`/view`) || Path(`/#/view`) || Path(`/#/error`) || PathPrefix(`/v0/`))
- traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.entrypoints=http
- traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.priority=1
# always fail and return 503 via unhealthy loadbalancer healthcheck
- traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.server.port=0
- traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.healthcheck.path=/v0/
- traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.healthcheck.interval=10s
- traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.healthcheck.timeout=1ms

### Fallback for static-webserver
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.rule=(Path(`/osparc`) || Path(`/s4l`) || Path(`/s4llite`) || Path(`/s4lacad`) || Path(`/s4lengine`) || Path(`/s4ldesktop`) || Path(`/s4ldesktopacad`) || Path(`/tis`) || Path(`/tiplite`) || Path(`/transpiled`) || Path(`/resource`) || PathPrefix(`/osparc/`) || PathPrefix(`/s4l/`) || PathPrefix(`/s4llite/`) || PathPrefix(`/s4lacad/`) || PathPrefix(`/s4lengine/`) || PathPrefix(`/s4ldesktop/`) || PathPrefix(`/s4ldesktopacad/`) || PathPrefix(`/tis/`) || PathPrefix(`/tiplite/`) || PathPrefix(`/transpiled/`) || PathPrefix(`/resource/`))
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.service=${SWARM_STACK_NAME}_static_webserver_fallback
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.entrypoints=http
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.priority=1
# always fail and return 503 via unhealthy loadbalancer healthcheck
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.server.port=0
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.healthcheck.interval=10s
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.healthcheck.timeout=1ms

volumes:
postgres_data:
name: ${SWARM_STACK_NAME}_postgres_data
Expand Down
Loading