From 49ffb9f04a3949169411740d168aa1fc735b0c10 Mon Sep 17 00:00:00 2001 From: JosephKevinMachado Date: Mon, 10 Jun 2024 15:48:38 -0400 Subject: [PATCH 01/14] 2024-06-10-15-48-38 --- .gitignore | 6 +- .tool-versions | 1 + Makefile | 6 +- README.md | 2 +- containers/airflow/Dockerfile | 5 +- containers/airflow/quarto.sh | 12 + containers/airflow/requirements.txt | 22 +- dags/.gitignore | 4 - dags/coincap_el.py | 40 ++ data/.gitkeep | 0 docker-compose.yml | 25 +- visualization/dashboard.html | 575 ++++++++++++++++++++++++++++ visualization/dashboard.qmd | 24 ++ 13 files changed, 680 insertions(+), 42 deletions(-) create mode 100644 .tool-versions create mode 100644 containers/airflow/quarto.sh delete mode 100755 dags/.gitignore create mode 100755 dags/coincap_el.py create mode 100755 data/.gitkeep create mode 100755 visualization/dashboard.html create mode 100755 visualization/dashboard.qmd diff --git a/.gitignore b/.gitignore index ec01476..f545cc8 100644 --- a/.gitignore +++ b/.gitignore @@ -28,9 +28,6 @@ __pycache__ # policy trust-policy.json -# data -data* - # logs logs/* *.log @@ -80,3 +77,6 @@ override.tf.json terraform.rc Footer +dashboard_files/ + +data/*.csv diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..bc91fdd --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +python 3.11.1 diff --git a/Makefile b/Makefile index 5c56bb3..745b6dd 100644 --- a/Makefile +++ b/Makefile @@ -5,12 +5,14 @@ docker-spin-up: docker compose --env-file env up airflow-init && docker compose --env-file env up --build -d perms: - sudo mkdir -p logs plugins temp dags tests migrations && sudo chmod -R u=rwx,g=rwx,o=rwx logs plugins temp dags tests migrations + sudo mkdir -p logs plugins temp dags tests migrations data visualization && sudo chmod -R u=rwx,g=rwx,o=rwx logs plugins temp dags tests migrations data visualization up: perms docker-spin-up warehouse-migration down: - docker compose down + docker compose down --volumes --rmi all + +restart: down up sh: docker exec -ti webserver bash diff --git a/README.md b/README.md index 199504e..ac702fe 100644 --- a/README.md +++ b/README.md @@ -73,4 +73,4 @@ After you are done, make sure to destroy your cloud infrastructure. ```shell make down # Stop docker containers on your computer make infra-down # type in yes after verifying the changes TF will make -``` \ No newline at end of file +``` diff --git a/containers/airflow/Dockerfile b/containers/airflow/Dockerfile index 53597b4..7558f38 100755 --- a/containers/airflow/Dockerfile +++ b/containers/airflow/Dockerfile @@ -1,3 +1,6 @@ -FROM apache/airflow:2.2.0 +FROM apache/airflow:2.9.2 COPY requirements.txt / RUN pip install --no-cache-dir -r /requirements.txt + +COPY quarto.sh / +RUN cd / && bash /quarto.sh diff --git a/containers/airflow/quarto.sh b/containers/airflow/quarto.sh new file mode 100644 index 0000000..702e10c --- /dev/null +++ b/containers/airflow/quarto.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +curl -L -o ~/quarto-1.5.43-linux-amd64.tar.gz https://github.com/quarto-dev/quarto-cli/releases/download/v1.5.43/quarto-1.5.43-linux-amd64.tar.gz +mkdir ~/opt +tar -C ~/opt -xvzf ~/quarto-1.5.43-linux-amd64.tar.gz + +mkdir ~/.local/bin +ln -s ~/opt/quarto-1.5.43/bin/quarto ~/.local/bin/quarto + +( echo ""; echo 'export PATH=$PATH:~/.local/bin\n' ; echo "" ) >> ~/.profile +source ~/.profile + diff --git a/containers/airflow/requirements.txt b/containers/airflow/requirements.txt index 96958af..30ae22f 100755 --- a/containers/airflow/requirements.txt +++ b/containers/airflow/requirements.txt @@ -1,9 +1,13 @@ -black==22.8.0 -flake8==5.0.4 -mypy==0.971 -isort==5.10.1 -moto[all]==4.0.6 -pytest==7.0.1 -pytest-mock==3.6.1 -apache-airflow-client==2.3.0 -yoyo-migrations==8.0.0 \ No newline at end of file +black==24.4.2 +flake8==7.0.0 +mypy==1.10.0 +isort==5.13.2 +moto[all]==5.0.9 +pytest==8.2.2 +pytest-mock==3.14.0 +apache-airflow-client==2.9.0 +yoyo-migrations==8.2.0 +duckdb==1.0.0 +plotly==5.22.0 +jupyter==1.0.0 +types-requests==2.32.0.20240602 diff --git a/dags/.gitignore b/dags/.gitignore deleted file mode 100755 index 86d0cb2..0000000 --- a/dags/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# Ignore everything in this directory -* -# Except this file -!.gitignore \ No newline at end of file diff --git a/dags/coincap_el.py b/dags/coincap_el.py new file mode 100755 index 0000000..e1b686e --- /dev/null +++ b/dags/coincap_el.py @@ -0,0 +1,40 @@ +import csv +import os +from datetime import datetime, timedelta + +import requests + +from airflow import DAG +from airflow.decorators import task +from airflow.operators.bash import BashOperator + +with DAG( + 'coincap_el', + description='A simple DAG to fetch data from CoinCap Exchanges API and write to a file', + schedule_interval=timedelta(days=1), + start_date=datetime(2023, 1, 1), + catchup=False, +) as dag: + + url = "https://api.coincap.io/v2/exchanges" + file_path = f'{os.getenv("AIRFLOW_HOME")}/data/coincap_exchanges.csv' + + @task + def fetch_coincap_exchanges(url, file_path): + response = requests.get(url) + data = response.json() + exchanges = data['data'] + if exchanges: + keys = exchanges[0].keys() + with open(file_path, 'w') as f: + dict_writer = csv.DictWriter(f, fieldnames=keys) + dict_writer.writeheader() + dict_writer.writerows(exchanges) + + markdown_path = f'{os.getenv("AIRFLOW_HOME")}/visualization/' + gen_dashboard = BashOperator( + task_id="generate_dashboard", + bash_command=f'cd {markdown_path} && quarto render {markdown_path}/dashboard.qmd', + ) + + fetch_coincap_exchanges(url, file_path) >> gen_dashboard diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100755 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml index a3a8de0..a899116 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -58,6 +58,8 @@ x-airflow-common: volumes: - ./dags:/opt/airflow/dags + - ./data:/opt/airflow/data + - ./visualization:/opt/airflow/visualization - ./logs:/opt/airflow/logs - ./plugins:/opt/airflow/plugins - ./tests:/opt/airflow/tests @@ -71,7 +73,7 @@ x-airflow-common: services: postgres: container_name: postgres - image: postgres:13 + image: postgres:16 environment: POSTGRES_USER: airflow POSTGRES_PASSWORD: airflow @@ -127,24 +129,3 @@ services: _AIRFLOW_WWW_USER_CREATE: 'true' _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} - - dashboard: - image: metabase/metabase - container_name: dashboard - ports: - - "3000:3000" - - warehouse: - image: postgres:13 - container_name: warehouse - environment: - POSTGRES_USER: ${POSTGRES_USER} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_DB: ${POSTGRES_DB} - healthcheck: - test: [ "CMD", "pg_isready", "-U", "${POSTGRES_USER}" ] - interval: 5s - retries: 5 - restart: always - ports: - - "5439:5432" diff --git a/visualization/dashboard.html b/visualization/dashboard.html new file mode 100755 index 0000000..2c14ce5 --- /dev/null +++ b/visualization/dashboard.html @@ -0,0 +1,575 @@ + + + + + + + + + + +CoinCap Exchange Dashboard + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+
+
+
Coincap Exchange data analysis
+
+
+
+
+
+ + + + + +
+
+
+ + + +
+ + + + + + + \ No newline at end of file diff --git a/visualization/dashboard.qmd b/visualization/dashboard.qmd new file mode 100755 index 0000000..ab51cec --- /dev/null +++ b/visualization/dashboard.qmd @@ -0,0 +1,24 @@ +--- +title: "CoinCap Exchange Dashboard" +author: "StartDataEngineering" +format: dashboard +--- + +## Row {height=70%} + +```{python} +#| title: Coincap Exchange data analysis + +import pandas as pd +import plotly.express as px +import os +# Load the CSV file +file_path = f'{os.getenv("AIRFLOW_HOME")}/data/coincap_exchanges.csv' +import duckdb + +clean_data = duckdb.sql(f"select name, volumeUsd from '{file_path}' order by 2 desc limit 10").df() +# Plot the top 10 exchanges' volumeUSD +fig = px.bar(clean_data, x='name', y='volumeUsd', title='Top 10 Exchanges by VolumeUSD') +fig.show() + +``` From fde03f46e23272f7753e988f541daf45d83f57fb Mon Sep 17 00:00:00 2001 From: JosephKevinMachado Date: Mon, 10 Jun 2024 15:58:16 -0400 Subject: [PATCH 02/14] 2024-06-10-15-58-15 --- README.md | 2 ++ dags/coincap_el.py | 9 ++++++--- tests/dags/test_dag_validity.py | 2 +- visualization/dashboard.html | 4 ++-- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ac702fe..2497311 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Detailed explanation can be found **[`in this post`](https://www.startdataengineering.com/post/data-engineering-projects-with-free-template/)** +Dashboard rendered at: ./visualization/dashboard.html + ## Prerequisites To use the template, please install the following. diff --git a/dags/coincap_el.py b/dags/coincap_el.py index e1b686e..f6eda9f 100755 --- a/dags/coincap_el.py +++ b/dags/coincap_el.py @@ -10,7 +10,8 @@ with DAG( 'coincap_el', - description='A simple DAG to fetch data from CoinCap Exchanges API and write to a file', + description='A simple DAG to fetch data \ + from CoinCap Exchanges API and write to a file', schedule_interval=timedelta(days=1), start_date=datetime(2023, 1, 1), catchup=False, @@ -32,9 +33,11 @@ def fetch_coincap_exchanges(url, file_path): dict_writer.writerows(exchanges) markdown_path = f'{os.getenv("AIRFLOW_HOME")}/visualization/' + q_cmd = ( + f'cd {markdown_path} && quarto render {markdown_path}/dashboard.qmd' + ) gen_dashboard = BashOperator( - task_id="generate_dashboard", - bash_command=f'cd {markdown_path} && quarto render {markdown_path}/dashboard.qmd', + task_id="generate_dashboard", bash_command=q_cmd ) fetch_coincap_exchanges(url, file_path) >> gen_dashboard diff --git a/tests/dags/test_dag_validity.py b/tests/dags/test_dag_validity.py index 6e553a1..d4f7ef8 100755 --- a/tests/dags/test_dag_validity.py +++ b/tests/dags/test_dag_validity.py @@ -5,4 +5,4 @@ def test_no_import_errors(): dag_bag = DagBag() assert len(dag_bag.import_errors) == 0, "No Import Failures" - assert dag_bag.size() == 0 + assert dag_bag.size() == 1 diff --git a/visualization/dashboard.html b/visualization/dashboard.html index 2c14ce5..9942981 100755 --- a/visualization/dashboard.html +++ b/visualization/dashboard.html @@ -100,9 +100,9 @@
Coincap Exchange data analysis
-