Skip to content

ntd: fix failing tests #455

ntd: fix failing tests

ntd: fix failing tests #455

Workflow file for this run

name: Deploy dbt
on:
push:
branches:
- main
paths:
- .github/workflows/deploy-dbt.yml
- 'warehouse/**'
pull_request:
paths:
- .github/workflows/deploy-dbt.yml
- 'warehouse/**'
env:
PYTHON_VERSION: '3.11'
POETRY_VERSION: '2.0.1'
SERVICE_ACCOUNT: ${{ github.ref == 'refs/heads/main' && 'github-actions-service-account@cal-itp-data-infra.iam.gserviceaccount.com' || 'github-actions-service-account@cal-itp-data-infra-staging.iam.gserviceaccount.com' }}
WORKLOAD_IDENTITY_PROVIDER: ${{ github.ref == 'refs/heads/main' && 'projects/1005246706141/locations/global/workloadIdentityPools/github-actions/providers/data-infra' || 'projects/473674835135/locations/global/workloadIdentityPools/github-actions/providers/data-infra' }}
PROJECT_ID: ${{ github.ref == 'refs/heads/main' && 'cal-itp-data-infra' || 'cal-itp-data-infra-staging' }}
DBT_TARGET: ${{ github.ref == 'refs/heads/main' && 'prod' || 'staging' }}
DBT_ARTIFACTS_BUCKET: ${{ github.ref == 'refs/heads/main' && 'calitp-dbt-artifacts' || 'calitp-staging-dbt-artifacts' }}
DBT_DOCS_BUCKET: ${{ github.ref == 'refs/heads/main' && 'calitp-dbt-docs' || 'calitp-staging-dbt-docs' }}
CALITP_BUCKET__PUBLISH: ${{ github.ref == 'refs/heads/main' && 'calitp-publish' || 'calitp-staging-publish' }}
DBT_SOURCE_DATABASE: ${{ github.ref == 'refs/heads/main' && 'cal-itp-data-infra' || 'cal-itp-data-infra-staging' }}
METABASE_DESTINATION_DATABASE: ${{ github.ref == 'refs/heads/main' && 'Data Marts (formerly Warehouse Views)' || '(Internal) Staging Warehouse Views' }}
jobs:
compile:
name: Compile dbt
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Authenticate Google Service Account
uses: google-github-actions/auth@v2
with:
create_credentials_file: 'true'
project_id: ${{ env.PROJECT_ID }}
workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ env.SERVICE_ACCOUNT }}
- name: Setup GCloud utilities
uses: google-github-actions/setup-gcloud@v2
- name: Setup Graphviz
uses: ts-graphviz/setup-graphviz@v2
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Cache Poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry
key: poetry-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-poetry-${{ env.POETRY_VERSION }}
- name: Setup Poetry
uses: abatilo/actions-poetry@v3
with:
poetry-version: ${{ env.POETRY_VERSION }}
- name: Cache Python packages
uses: actions/cache@v3
with:
path: ~/.local
key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-${{ hashFiles('.github/workflows/*.yml') }}
- name: Install dependencies
working-directory: warehouse
run: poetry install
- name: Cache dbt packages
uses: actions/cache@v3
with:
path: warehouse/dbt_packages
key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-dbt-packages-${{ hashFiles('warehouse/packages.yml') }}
- name: Install dbt dependencies
working-directory: warehouse
run: poetry run dbt deps
- name: Print dbt environment
working-directory: warehouse
run: poetry run dbt debug --target ${{ env.DBT_TARGET }}
- name: Compile dbt
working-directory: warehouse
run: poetry run dbt compile --target ${{ env.DBT_TARGET }} --full-refresh
- name: Generate dbt documentation
working-directory: warehouse
run: poetry run dbt docs generate --target ${{ env.DBT_TARGET }} --no-compile
- name: Archive compilation artifacts
uses: actions/upload-artifact@v4
with:
name: dbt
path: |
warehouse/target/*.json
warehouse/target/*.html
models-changed:
name: Detect dbt model changes
runs-on: ubuntu-latest
if: ${{ github.event_name == 'pull_request' }}
outputs:
any_changed: ${{ steps.changed-files-warehouse.outputs.any_changed }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: tj-actions/changed-files@v46
id: changed-files-warehouse
with:
files: 'warehouse/models/**/*.sql'
metabase:
name: Sync Metabase
runs-on: ubuntu-latest
needs:
- compile
- models-changed
if: ${{ needs.models-changed.outputs.any_changed == 'true' }}
permissions:
contents: read
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download compilation artifacts
uses: actions/download-artifact@v4
with:
name: dbt
path: warehouse/target
- name: Authenticate Google Service Account
uses: google-github-actions/auth@v2
with:
create_credentials_file: 'true'
project_id: ${{ env.PROJECT_ID }}
workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ env.SERVICE_ACCOUNT }}
- name: Setup GCloud utilities
uses: google-github-actions/setup-gcloud@v2
- name: Setup Graphviz
uses: ts-graphviz/setup-graphviz@v2
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Cache Poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry
key: poetry-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-poetry-${{ env.POETRY_VERSION }}
- name: Setup Poetry
uses: abatilo/actions-poetry@v3
with:
poetry-version: ${{ env.POETRY_VERSION }}
- name: Cache Python packages
uses: actions/cache@v3
with:
path: ~/.local
key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-${{ hashFiles('.github/workflows/*.yml') }}
- name: Install dependencies
working-directory: warehouse
run: poetry install
- name: Cache dbt packages
uses: actions/cache@v3
with:
path: warehouse/dbt_packages
key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-dbt-packages-${{ hashFiles('warehouse/packages.yml') }}
- name: Install dbt dependencies
working-directory: warehouse
run: poetry run dbt deps
- name: Print dbt environment
working-directory: warehouse
run: poetry run dbt debug --target ${{ env.DBT_TARGET }}
- name: Download latest artifacts from GCS
working-directory: warehouse
if: ${{ env.DBT_TARGET == 'staging' }}
run: gsutil cp -r gs://${{ env.DBT_ARTIFACTS_BUCKET }}/latest/ ./target/
- name: Run changed models
working-directory: warehouse
if: ${{ env.DBT_TARGET == 'staging' }}
run: poetry run dbt run --select state:modified+ --target ${{ env.DBT_TARGET }} --state ./target/latest
- name: Synchronize Metabase
working-directory: warehouse
run: poetry run dbt-metabase models -v --manifest-path=target/manifest.json --exclude-schemas="*staging, payments" --skip-sources --docs-url="https://dbt-docs.calitp.org" --metabase-url="https://dashboards.calitp.org" --metabase-database="${{ env.METABASE_DESTINATION_DATABASE }}" --metabase-api-key="${{ secrets.METABASE_API_KEY}}"
upload:
name: Upload to Google Cloud Storage
needs: [compile]
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- name: Download compilation artifacts
uses: actions/download-artifact@v4
with:
name: dbt
path: warehouse/target
- name: Authenticate Google Service Account
uses: google-github-actions/auth@v2
with:
create_credentials_file: 'true'
project_id: ${{ env.PROJECT_ID }}
workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ env.SERVICE_ACCOUNT }}
- name: Setup GCloud utilities
uses: google-github-actions/setup-gcloud@v2
- name: Get current timestamp
uses: josStorer/get-current-time@v2
id: current-time
with:
format: "YYYY-MM-DDTHH:mm:ss.SSSSSS+00:00"
timezone: "UTC"
- name: Get current date
uses: josStorer/get-current-time@v2
id: current-date
with:
format: "YYYY-MM-DD"
timezone: "UTC"
- name: Upload catalog.json
uses: google-github-actions/upload-cloud-storage@v1
with:
path: './warehouse/target/'
glob: 'catalog.json'
parent: false
process_gcloudignore: false
destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/catalog.json/dt=${{ steps.current-date.outputs.formattedTime }}/ts=${{ steps.current-time.outputs.formattedTime }}/"
- name: Upload manifest.json
uses: google-github-actions/upload-cloud-storage@v1
with:
path: './warehouse/target/'
glob: 'manifest.json'
parent: false
process_gcloudignore: false
destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/manifest.json/dt=${{ steps.current-date.outputs.formattedTime }}/ts=${{ steps.current-time.outputs.formattedTime }}/"
- name: Upload run_results.json
uses: google-github-actions/upload-cloud-storage@v1
with:
path: './warehouse/target/'
glob: 'run_results.json'
parent: false
process_gcloudignore: false
destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/run_results.json/dt=${{ steps.current-date.outputs.formattedTime }}/ts=${{ steps.current-time.outputs.formattedTime }}/"
- name: Upload index.html
uses: google-github-actions/upload-cloud-storage@v1
with:
path: './warehouse/target/'
glob: 'index.html'
parent: false
process_gcloudignore: false
destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/index.html/dt=${{ steps.current-date.outputs.formattedTime }}/ts=${{ steps.current-time.outputs.formattedTime }}/"
- name: Upload documentation
uses: google-github-actions/upload-cloud-storage@v1
with:
path: './warehouse/target/'
glob: '{catalog.json,manifest.json,index.html}'
parent: false
process_gcloudignore: false
destination: ${{ env.DBT_DOCS_BUCKET }}
visualize:
name: Pull Request visualization
runs-on: ubuntu-latest
needs:
- models-changed
- compile
if: ${{ github.event_name == 'pull_request' && needs.models-changed.outputs.any_changed == 'true' }}
permissions:
contents: read
id-token: write
pull-requests: write
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download compilation artifacts
uses: actions/download-artifact@v4
with:
name: dbt
path: warehouse/target
- name: Authenticate Google Service Account
uses: google-github-actions/auth@v2
with:
create_credentials_file: 'true'
project_id: ${{ env.PROJECT_ID }}
workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ env.SERVICE_ACCOUNT }}
- name: Setup GCloud utilities
uses: google-github-actions/setup-gcloud@v2
- name: Setup Graphviz
uses: ts-graphviz/setup-graphviz@v2
- name: Setup Continuous Machine Learning tools
uses: iterative/setup-cml@v2
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Cache poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry
key: poetry-cache-{{ runner.os }}-python-${{ env.PYTHON_VERSION }}-poetry-${{ env.POETRY_VERSION }}
- name: Setup Poetry
uses: abatilo/actions-poetry@v3
with:
poetry-version: ${{ env.POETRY_VERSION }}
- name: Cache python packages
uses: actions/cache@v3
with:
path: ~/.local
key: python-cache-{{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('poetry.lock') }}-${{ hashFiles('.github/workflows/*.yml') }}
- name: Install dependencies
working-directory: warehouse
run: poetry install
- name: Install dbt dependencies
working-directory: warehouse
run: poetry run dbt deps
- name: Download latest artifacts from GCS
working-directory: warehouse
run: gsutil cp -r gs://${{ env.DBT_ARTIFACTS_BUCKET }}/latest/ ./target/
- name: Create CI report
working-directory: warehouse
run: poetry run python scripts/visualize.py ci-report --latest-dir='./target/latest/'
- name: Archive CI report
uses: actions/upload-artifact@v4
with:
name: ci-report
path: |
target/*.md
target/*.png
- name: Create GitHub comment
working-directory: warehouse
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: cml comment update target/report.md || echo "Failed to generate GitHub comment"