diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index adc3ad3..2c0951b 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -7,6 +7,8 @@ on: branches: [ main ] env: + BASE_URL_HOST: localhost:8081 + ENABLE_PROXY_FIX: False AIRFLOW_PROJ_DIR: /home/runner/work/AlertFlow/AlertFlow/alertflow AIRFLOW_HOME: /opt/airflow AIRFLOW_PORT: 8081 diff --git a/alertflow/airflow.cfg b/alertflow/airflow.cfg index 3d321bd..9001b98 100644 --- a/alertflow/airflow.cfg +++ b/alertflow/airflow.cfg @@ -534,7 +534,7 @@ default_hive_mapred_queue = # The base url of your website as airflow cannot guess what domain or # cname you are using. This is used in automated emails that # airflow sends to point links to the right web server -base_url = http://localhost:8080 +base_url = http://$BASE_URL_HOST # Default timezone to display all dates in the UI, can be UTC, system, or # any IANA timezone string (e.g. Europe/Amsterdam). If left empty the @@ -648,7 +648,7 @@ navbar_color = #fff default_dag_run_display_number = 25 # Enable werkzeug ``ProxyFix`` middleware for reverse proxy -enable_proxy_fix = False +enable_proxy_fix = $ENABLE_PROXY_FIX # Number of values to trust for ``X-Forwarded-For``. # More info: https://werkzeug.palletsprojects.com/en/0.16.x/middleware/proxy_fix/ diff --git a/alertflow/dags/episcanner/episcanner_export_data.py b/alertflow/dags/episcanner/episcanner_export_data.py deleted file mode 100644 index 9e81f5a..0000000 --- a/alertflow/dags/episcanner/episcanner_export_data.py +++ /dev/null @@ -1,103 +0,0 @@ -import os -from datetime import datetime, timedelta - -from airflow import DAG -from airflow.models import Variable -from airflow.operators.bash import BashOperator -from airflow.operators.python import PythonOperator -from dotenv import dotenv_values, set_key - - -def set_airflow_variables(): - """ - Set Airflow variables from environment and write them to the .env file. - """ - # Set Airflow variables from environment variables - PSQL_USER = os.environ.get('AIRFLOW_PSQL_USER_MAIN') - PSQL_PASSWORD = os.environ.get('AIRFLOW_PSQL_PASSWORD_MAIN') - PSQL_HOST = os.environ.get('AIRFLOW_PSQL_HOST_MAIN') - PSQL_PORT = os.environ.get('AIRFLOW_PSQL_PORT_MAIN') - PSQL_DB = os.environ.get('AIRFLOW_PSQL_DB_MAIN') - - Variable.set('PSQL_USER', PSQL_USER) - Variable.set('PSQL_PASSWORD', PSQL_PASSWORD) - Variable.set('PSQL_HOST', PSQL_HOST) - Variable.set('PSQL_PORT', PSQL_PORT) - Variable.set('PSQL_DB', PSQL_DB) - - # Write variables to .env file - dotenv_path = '/opt/airflow/episcanner-downloader/.env' - env_vars = dotenv_values(dotenv_path) - env_vars['PSQL_USER'] = PSQL_USER - env_vars['PSQL_PASSWORD'] = PSQL_PASSWORD - env_vars['PSQL_HOST'] = PSQL_HOST - env_vars['PSQL_PORT'] = PSQL_PORT - env_vars['PSQL_DB'] = PSQL_DB - for key, value in env_vars.items(): - set_key(dotenv_path, key, value) - - -default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'start_date': datetime(2023, 5, 21), - 'retries': 1, - 'retry_delay': timedelta(minutes=5), -} - -with DAG( - 'EPISCANNER_DOWNLOADER', - default_args=default_args, - schedule_interval='0 3 * * 0', # Every Sunday at 3 AM - catchup=False, -) as dag: - - # clone the repository from GitHub - clone_repository = BashOperator( - task_id='clone_repository', - bash_command='git clone --branch main --single-branch --depth 1 ' - 'https://github.com/AlertaDengue/episcanner-downloader.git ' - '/opt/airflow/episcanner-downloader', - dag=dag, - ) - - # Set variables for Episcanner-PostgreSQL connection - set_connection_variables = PythonOperator( - task_id='set_connection_variables', - python_callable=set_airflow_variables, - dag=dag, - ) - - # Install the Episcanner package using Poetry - install_episcanner = BashOperator( - task_id='install_episcanner', - bash_command='source /home/airflow/mambaforge/bin/activate episcanner-downloader && ' # NOQA E501 - 'cd /opt/airflow/episcanner-downloader && ' - 'poetry install', - dag=dag, - ) - - # Download all data to the specified directory - episcanner_downloader = BashOperator( - task_id='episcanner_downloader', - bash_command='source /home/airflow/mambaforge/bin/activate episcanner-downloader && ' # NOQA E501 - 'cd /opt/airflow/episcanner-downloader &&' - 'python epi_scanner/downloader/export_data.py ' - '-s all -d dengue chikungunya -o /opt/airflow/episcanner_data', - dag=dag, - ) - - # Remove the episcanner-downloader repository - remove_repository = BashOperator( - task_id='remove_repository', - bash_command='rm -rf /opt/airflow/episcanner-downloader', - dag=dag, - ) - - ( - clone_repository - >> set_connection_variables - >> install_episcanner - >> episcanner_downloader - >> remove_repository - ) diff --git a/alertflow/dags/satellite-weather/brasil.py b/alertflow/dags/satellite-weather/brasil.py index 3618f33..22233cf 100644 --- a/alertflow/dags/satellite-weather/brasil.py +++ b/alertflow/dags/satellite-weather/brasil.py @@ -41,7 +41,7 @@ tags=['Brasil', 'Copernicus'], schedule='@daily', default_args=DEFAULT_ARGS, - start_date=pendulum.datetime(2014, 1, 1), + start_date=pendulum.datetime(2023, 8, 1), catchup=True, max_active_runs=14, ): diff --git a/conda/env.yaml b/conda/env.yaml index a833e98..7e65db9 100644 --- a/conda/env.yaml +++ b/conda/env.yaml @@ -3,10 +3,13 @@ channels: - conda-forge - nodefaults dependencies: - - python 3.* - - docker-compose - - pip + - poetry - pre-commit - blue - flake8 - isort + - python 3.* + - docker-compose + - pip + - pip: + - tomli diff --git a/docker/Dockerfile b/docker/Dockerfile index 9e60945..b42a3ac 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM apache/airflow:2.5.3 +FROM apache/airflow:2.6.3 LABEL maintainer="Luã Bida Vacaro " LABEL org.opencontainers.image.title="AlertFlow" @@ -74,13 +74,4 @@ RUN /usr/local/bin/python -m virtualenv /opt/envs/py310 --python="/opt/py310/bin "satellite-weather-downloader >= 1.8.4" \ psycopg2 -# Install conda and create environment -RUN curl -LO https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh \ - && bash Mambaforge-Linux-x86_64.sh -b -p /home/airflow/mambaforge \ - && rm Mambaforge-Linux-x86_64.sh \ - && /home/airflow/mambaforge/bin/mamba create -y -n episcanner-downloader python=3.11 poetry psycopg2 python-dotenv \ - && chown -R ${HOST_UID}:${HOST_GID} ${AIRFLOW_HOME}/ /home/airflow/mambaforge/ - -RUN echo "alias activate_episcanner='source /home/airflow/mambaforge/bin/activate episcanner-downloader'" >> /home/airflow/.bashrc - ENTRYPOINT [ "/entrypoint.sh" ] diff --git a/env.tpl b/env.tpl index 3a1510c..c5ce15f 100644 --- a/env.tpl +++ b/env.tpl @@ -7,11 +7,13 @@ HOST_UID=${HOST_UID} HOST_GID=${HOST_GID} # Web +BASE_URL_HOST=${BASE_URL_HOST} _AIRFLOW_WWW_USER_USERNAME=${_AIRFLOW_WWW_USER_USERNAME} _AIRFLOW_WWW_USER_PASSWORD=${_AIRFLOW_WWW_USER_USERNAME} _AIRFLOW_WWW_USER_EMAIL=${_AIRFLOW_WWW_USER_EMAIL} _AIRFLOW_WWW_USER_FIRST_NAME=${_AIRFLOW_WWW_USER_FIRST_NAME} _AIRFLOW_WWW_USER_LAST_NAME=${_AIRFLOW_WWW_USER_LAST_NAME} +ENABLE_PROXY_FIX=${ENABLE_PROXY_FIX} # Email EMAIL_MAIN=${EMAIL_MAIN}