airflow.cfg

[core]
# The folder where your airflow pipelines live, most likely a
# subfolder in a code repository. This path must be absolute.
dags_folder = /opt/airflow/dags

# The folder where airflow should store its log files
# This path must be absolute
base_log_folder = /opt/airflow/logs

# Log format for when Colored logs is enabled
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s

# Format of Log line
log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s

dag_processor_manager_log_location = /opt/airflow/logs/dag_processor_manager/dag_processor_manager.log

# The SqlAlchemy connection string to the metadata database.
# SqlAlchemy supports many different database engine, more information
# their website
# sql_alchemy_conn = sqlite:////tmp/airflow.db

# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
parallelism = 256

# The number of task instances allowed to run concurrently by the scheduler
dag_concurrency = 64

# Whether to load the examples that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_examples = False

# Where your Airflow plugins are stored
plugins_folder = /opt/airflow/plugins

# Secret key to save connection passwords in the db
fernet_key = $FERNET_KEY

# How long before timing out a python file import
dagbag_import_timeout = 600

# How long before timing out a DagFileProcessor, which processes a dag file
dag_file_processor_timeout = 600

[api]
# How to authenticate users of the API
auth_backend = airflow.api.auth.backend.default


[webserver]
# Number of seconds the webserver waits before killing gunicorn master that doesn't respond
web_server_master_timeout = 600

# Number of seconds the gunicorn webserver waits before timing out on a worker
web_server_worker_timeout = 600

# Secret key used to run your flask app
# It should be as random as possible
secret_key = l\xba,\xc3\x023\xca\x04\xdb\xf2\xf7\xfa\xb8#\xee>

# Number of workers to run the Gunicorn web server
workers = 2

# Expose the configuration file in the web server
expose_config = True

# Allow the UI to be rendered in a frame
x_frame_enabled = True

# Minutes of non-activity before logged out from UI
# 0 means never get forcibly logged out
force_log_out_after = 0

authenticate = False
auth_backend = airflow.api.auth.backend.default


[celery]
# The concurrency that will be used when starting workers with the
# ``airflow celery worker`` command. This defines the number of task instances that
# a worker will take, so size up your workers based on the resources on
# your worker box and the nature of your tasks
worker_concurrency = 32

# The maximum and minimum concurrency that will be used when starting workers with the
# ``airflow celery worker`` command (always keep minimum processes, but grow
# to maximum if necessary). Note the value should be max_concurrency,min_concurrency
# Pick these numbers based on resources on worker box and the nature of the task.
# If autoscale option is available, worker_concurrency will be ignored.
# http://docs.celeryproject.org/en/latest/reference/celery.bin.worker.html#cmdoption-celery-worker-autoscale
# Example: worker_autoscale = 16,12
worker_autoscale = 32,12

# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
# a sqlalchemy database. Refer to the Celery documentation for more
# information.
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings
broker_url = redis://redis:6379/1

# The Celery result_backend. When a job finishes, it needs to update the
# metadata of the job. Therefore it will post a message on a message bus,
# or insert it into a database (depending of the backend)
# This status is used by the scheduler to update the state of the task
# The use of a database is highly recommended
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings
result_backend = db+postgresql://airflow:airflow@postgres/airflow

[scheduler]
child_process_log_directory = /opt/airflow/logs/scheduler


# Format of the log_id, which is used to query for a given tasks logs
log_id_template = {{dag_id}}-{{task_id}}-{{execution_date}}-{{try_number}}

[kubernetes]
# Keyword parameters to pass while calling a kubernetes client core_v1_api methods
# from Kubernetes Executor provided as a single line formatted JSON dictionary string.
# List of supported params are similar for all core_v1_apis, hence a single config
# variable for all apis.
# See:
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely
# for kubernetes api responses, which will cause the scheduler to hang.
# The timeout is specified as [connect timeout, read timeout]
kube_client_request_args = {{"_request_timeout" : [60,60] }}

# Specifies the uid to run the first process of the worker pods containers as
run_as_user =

# ref: https://airflow.apache.org/docs/apache-airflow/1.10.1/security.html#setting-up-google-authentication
[google]
client_id = <check the doc above>
client_secret = <check the doc above>
oauth_callback_route = /oauth2callback
domain = localhost,pycon.tw
prompt = select_account