Skip to content

Commit 6e14e7a

Browse files
committed
(fix) Bump to airflow 1.10.12 based on PR puckel#623
1 parent bed7779 commit 6e14e7a

File tree

5 files changed

+101
-16
lines changed

5 files changed

+101
-16
lines changed

Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
# VERSION 1.10.9
1+
# VERSION 1.10.12
22
# AUTHOR: Matthieu "Puckel_" Roisil
3+
# UPGRADE BY David Wong
34
# DESCRIPTION: Basic Airflow container
45
# BUILD: docker build --rm -t puckel/docker-airflow .
56
# SOURCE: https://github.com/puckel/docker-airflow
@@ -12,7 +13,7 @@ ENV DEBIAN_FRONTEND noninteractive
1213
ENV TERM linux
1314

1415
# Airflow
15-
ARG AIRFLOW_VERSION=1.10.9
16+
ARG AIRFLOW_VERSION=1.10.12
1617
ARG AIRFLOW_USER_HOME=/usr/local/airflow
1718
ARG AIRFLOW_DEPS=""
1819
ARG PYTHON_DEPS=""

config/airflow.cfg

Lines changed: 90 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ sql_alchemy_pool_pre_ping = True
110110
# SqlAlchemy supports databases with the concept of multiple schemas.
111111
sql_alchemy_schema =
112112

113+
# Import path for connect args in SqlAlchemy. Default to an empty dict.
114+
# This is useful when you want to configure db engine args that SqlAlchemy won't parse
115+
# in connection string.
116+
# See https://docs.sqlalchemy.org/en/13/core/engines.html#sqlalchemy.create_engine.params.connect_args
117+
# sql_alchemy_connect_args =
118+
113119
# The amount of parallelism as a setting to the executor. This defines
114120
# the max number of task instances that should run simultaneously
115121
# on this airflow installation
@@ -124,11 +130,16 @@ dags_are_paused_at_creation = True
124130
# The maximum number of active DAG runs per DAG
125131
max_active_runs_per_dag = 16
126132

127-
# Whether to load the examples that ship with Airflow. It's good to
133+
# Whether to load the DAG examples that ship with Airflow. It's good to
128134
# get started, but you probably want to set this to False in a production
129135
# environment
130136
load_examples = True
131137

138+
# Whether to load the default connections that ship with Airflow. It's good to
139+
# get started, but you probably want to set this to False in a production
140+
# environment
141+
load_default_connections = False
142+
132143
# Where your Airflow plugins are stored
133144
plugins_folder = /usr/local/airflow/plugins
134145

@@ -184,17 +195,51 @@ dag_discovery_safe_mode = True
184195
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
185196
default_task_retries = 0
186197

187-
# Whether to serialises DAGs and persist them in DB.
198+
# Whether to serialise DAGs and persist them in DB.
188199
# If set to True, Webserver reads from DB instead of parsing DAG files
189200
# More details: https://airflow.apache.org/docs/stable/dag-serialization.html
190201
store_serialized_dags = False
191202

192203
# Updating serialized DAG can not be faster than a minimum interval to reduce database write rate.
193204
min_serialized_dag_update_interval = 30
194205

206+
# Fetching serialized DAG can not be faster than a minimum interval to reduce database
207+
# read rate. This config controls when your DAGs are updated in the Webserver
208+
min_serialized_dag_fetch_interval = 10
209+
210+
# Whether to persist DAG files code in DB.
211+
# If set to True, Webserver reads file contents from DB instead of
212+
# trying to access files in a DAG folder. Defaults to same as the
213+
# ``store_serialized_dags`` setting.
214+
# Example: store_dag_code = False
215+
# store_dag_code =
216+
217+
# Maximum number of Rendered Task Instance Fields (Template Fields) per task to store
218+
# in the Database.
219+
# When Dag Serialization is enabled (``store_serialized_dags=True``), all the template_fields
220+
# for each of Task Instance are stored in the Database.
221+
# Keeping this number small may cause an error when you try to view ``Rendered`` tab in
222+
# TaskInstance view for older tasks.
223+
max_num_rendered_ti_fields_per_task = 30
224+
195225
# On each dagrun check against defined SLAs
196226
check_slas = True
197227

228+
# Path to custom XCom class that will be used to store and resolve operators results
229+
# Example: xcom_backend = path.to.CustomXCom
230+
xcom_backend = airflow.models.xcom.BaseXCom
231+
232+
[secrets]
233+
# Full class name of secrets backend to enable (will precede env vars and metastore in search path)
234+
# Example: backend = airflow.contrib.secrets.aws_systems_manager.SystemsManagerParameterStoreBackend
235+
backend =
236+
237+
# The backend_kwargs param is loaded into a dictionary and passed to __init__ of secrets backend class.
238+
# See documentation for the secrets backend you are using. JSON is expected.
239+
# Example for AWS Systems Manager ParameterStore:
240+
# ``{{"connections_prefix": "/airflow/connections", "profile_name": "default"}}``
241+
backend_kwargs =
242+
198243
[cli]
199244
# In what way should the cli access the API. The LocalClient will use the
200245
# database directly, while the json_client will use the api running on the
@@ -212,7 +257,9 @@ endpoint_url = http://localhost:8080
212257
fail_fast = False
213258

214259
[api]
215-
# How to authenticate users of the API
260+
# How to authenticate users of the API. See
261+
# https://airflow.apache.org/docs/stable/security.html for possible values.
262+
# ("airflow.api.auth.backend.default" allows all requests for historic reasons)
216263
auth_backend = airflow.api.auth.backend.default
217264

218265
[lineage]
@@ -245,6 +292,12 @@ default_hive_mapred_queue =
245292
# airflow sends to point links to the right web server
246293
base_url = http://localhost:8080
247294

295+
# Default timezone to display all dates in the RBAC UI, can be UTC, system, or
296+
# any IANA timezone string (e.g. Europe/Amsterdam). If left empty the
297+
# default value of core/default_timezone will be used
298+
# Example: default_ui_timezone = America/New_York
299+
default_ui_timezone =
300+
248301
# The ip specified when starting the web server
249302
web_server_host = 0.0.0.0
250303

@@ -273,6 +326,10 @@ worker_refresh_batch_size = 1
273326
# Number of seconds to wait before refreshing a batch of workers.
274327
worker_refresh_interval = 30
275328

329+
# If set to True, Airflow will track files in plugins_folder directory. When it detects changes,
330+
# then reload the gunicorn.
331+
reload_on_plugin_change = False
332+
276333
# Secret key used to run your flask app
277334
# It should be as random as possible
278335
secret_key = temporary_key
@@ -734,18 +791,30 @@ verify_certs = True
734791
[kubernetes]
735792
# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
736793
worker_container_repository =
794+
795+
# Path to the YAML pod file. If set, all other kubernetes-related fields are ignored.
796+
# (This feature is experimental)
797+
pod_template_file =
737798
worker_container_tag =
738799
worker_container_image_pull_policy = IfNotPresent
739800

740-
# If True (default), worker pods will be deleted upon termination
801+
# If True, all worker pods will be deleted upon termination
741802
delete_worker_pods = True
742803

804+
# If False (and delete_worker_pods is True),
805+
# failed worker pods will not be deleted so users can investigate them.
806+
delete_worker_pods_on_failure = False
807+
743808
# Number of Kubernetes Worker Pod creation calls per scheduler loop
744809
worker_pods_creation_batch_size = 1
745810

746811
# The Kubernetes namespace where airflow workers should be created. Defaults to ``default``
747812
namespace = default
748813

814+
# Allows users to launch pods in multiple namespaces.
815+
# Will require creating a cluster-role for the scheduler
816+
multi_namespace_mode = False
817+
749818
# The name of the Kubernetes ConfigMap containing the Airflow Configuration (this file)
750819
# Example: airflow_configmap = airflow-configmap
751820
airflow_configmap =
@@ -782,6 +851,9 @@ dags_in_image = False
782851
# For either git sync or volume mounted DAGs, the worker will look in this subpath for DAGs
783852
dags_volume_subpath =
784853

854+
# For either git sync or volume mounted DAGs, the worker will mount the volume in this path
855+
dags_volume_mount_point =
856+
785857
# For DAGs mounted via a volume claim (mutually exclusive with git-sync and host path)
786858
dags_volume_claim =
787859

@@ -810,6 +882,10 @@ env_from_secret_ref =
810882
# Git credentials and repository for DAGs mounted via Git (mutually exclusive with volume claim)
811883
git_repo =
812884
git_branch =
885+
886+
# Use a shallow clone with a history truncated to the specified number of commits.
887+
# 0 - do not use shallow clone.
888+
git_sync_depth = 1
813889
git_subpath =
814890

815891
# The specific rev or hash the git_sync init container will checkout
@@ -931,10 +1007,18 @@ tolerations =
9311007
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely
9321008
# for kubernetes api responses, which will cause the scheduler to hang.
9331009
# The timeout is specified as [connect timeout, read timeout]
934-
kube_client_request_args = {{"_request_timeout" : [60,60] }}
1010+
kube_client_request_args =
1011+
1012+
# Optional keyword arguments to pass to the ``delete_namespaced_pod`` kubernetes client
1013+
# ``core_v1_api`` method when using the Kubernetes Executor.
1014+
# This should be an object and can contain any of the options listed in the ``v1DeleteOptions``
1015+
# class defined here:
1016+
# https://github.com/kubernetes-client/python/blob/41f11a09995efcd0142e25946adc7591431bfb2f/kubernetes/client/models/v1_delete_options.py#L19
1017+
# Example: delete_option_kwargs = {{"grace_period_seconds": 10}}
1018+
delete_option_kwargs =
9351019

9361020
# Specifies the uid to run the first process of the worker pods containers as
937-
run_as_user =
1021+
run_as_user = 50000
9381022

9391023
# Specifies a gid to associate with all containers in the worker pods
9401024
# if using a git_ssh_key_secret_name use an fs_group

docker-compose-CeleryExecutor.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ services:
1616
# - ./pgdata:/var/lib/postgresql/data/pgdata
1717

1818
webserver:
19-
image: puckel/docker-airflow:1.10.9
19+
image: puckel/docker-airflow:1.10.12
2020
restart: always
2121
depends_on:
2222
- postgres
@@ -43,7 +43,7 @@ services:
4343
retries: 3
4444

4545
flower:
46-
image: puckel/docker-airflow:1.10.9
46+
image: puckel/docker-airflow:1.10.12
4747
restart: always
4848
depends_on:
4949
- redis
@@ -55,7 +55,7 @@ services:
5555
command: flower
5656

5757
scheduler:
58-
image: puckel/docker-airflow:1.10.9
58+
image: puckel/docker-airflow:1.10.12
5959
restart: always
6060
depends_on:
6161
- webserver
@@ -74,7 +74,7 @@ services:
7474
command: scheduler
7575

7676
worker:
77-
image: puckel/docker-airflow:1.10.9
77+
image: puckel/docker-airflow:1.10.12
7878
restart: always
7979
depends_on:
8080
- scheduler

docker-compose-LocalExecutor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ services:
1212
max-file: "3"
1313

1414
webserver:
15-
image: puckel/docker-airflow:1.10.9
15+
image: puckel/docker-airflow:1.10.12
1616
restart: always
1717
depends_on:
1818
- postgres

script/entrypoint.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ wait_for_port() {
3737
echo >&2 "$(date) - $host:$port still not reachable, giving up"
3838
exit 1
3939
fi
40-
echo "$(date) - waiting for $name... $j/$TRY_LOOP"
40+
echo "$(date) - waiting for $name($host:$port)... $j/$TRY_LOOP"
4141
sleep 5
4242
done
4343
}
@@ -100,8 +100,8 @@ if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then
100100
else
101101
# Derive useful variables from the AIRFLOW__ variables provided explicitly by the user
102102
REDIS_ENDPOINT=$(echo -n "$AIRFLOW__CELERY__BROKER_URL" | cut -d '/' -f3 | sed -e 's,.*@,,')
103-
REDIS_HOST=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f1)
104-
REDIS_PORT=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f2)
103+
REDIS_HOST=$(echo -n "$REDIS_ENDPOINT" | cut -d ':' -f1)
104+
REDIS_PORT=$(echo -n "$REDIS_ENDPOINT" | cut -d ':' -f2)
105105
fi
106106

107107
wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT"

0 commit comments

Comments
 (0)