dev changes deployment with volatility changes #105
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CO2 Data Pipeline CI/CD | |
on: | |
push: | |
branches: [main, dev] | |
pull_request: | |
branches: [main] | |
workflow_dispatch: | |
jobs: | |
test: | |
runs-on: ubuntu-latest | |
env: | |
SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} | |
SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} | |
SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }} | |
AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} | |
AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
- name: Set environment variables based on branch | |
run: | | |
# Determine environment based on branch | |
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then | |
echo "ENVIRONMENT=prod" >> $GITHUB_ENV | |
echo "DATABASE_NAME=CO2_DB_PROD" >> $GITHUB_ENV | |
echo "SCHEMA_NAME=RAW_CO2" >> $GITHUB_ENV | |
echo "SNOWFLAKE_WAREHOUSE=CO2_WH_PROD" >> $GITHUB_ENV | |
echo "SNOWFLAKE_ROLE=CO2_ROLE_PROD" >> $GITHUB_ENV | |
else | |
echo "ENVIRONMENT=dev" >> $GITHUB_ENV | |
echo "DATABASE_NAME=CO2_DB_DEV" >> $GITHUB_ENV | |
echo "SCHEMA_NAME=RAW_CO2" >> $GITHUB_ENV | |
echo "SNOWFLAKE_WAREHOUSE=CO2_WH_DEV" >> $GITHUB_ENV | |
echo "SNOWFLAKE_ROLE=CO2_ROLE_DEV" >> $GITHUB_ENV | |
fi | |
echo "Testing with $ENVIRONMENT environment" | |
- name: Setup Snowflake connection profiles for testing | |
run: | | |
mkdir -p ~/.snowflake | |
# Create connection profile for testing with authenticator | |
cat > ~/.snowflake/connections.toml << EOF | |
[default] | |
account = "${{ secrets.SNOWFLAKE_ACCOUNT }}" | |
user = "${{ secrets.SNOWFLAKE_USER }}" | |
password = "${{ secrets.SNOWFLAKE_PASSWORD }}" | |
authenticator = "snowflake" | |
warehouse = "$SNOWFLAKE_WAREHOUSE" | |
database = "$DATABASE_NAME" | |
schema = "$SCHEMA_NAME" | |
role = "$SNOWFLAKE_ROLE" | |
[dev] | |
account = "${{ secrets.SNOWFLAKE_ACCOUNT }}" | |
user = "${{ secrets.SNOWFLAKE_USER }}" | |
password = "${{ secrets.SNOWFLAKE_PASSWORD }}" | |
authenticator = "snowflake" | |
warehouse = "CO2_WH_DEV" | |
role = "CO2_ROLE_DEV" | |
database = "CO2_DB_DEV" | |
schema = "RAW_CO2" | |
[prod] | |
account = "${{ secrets.SNOWFLAKE_ACCOUNT }}" | |
user = "${{ secrets.SNOWFLAKE_USER }}" | |
password = "${{ secrets.SNOWFLAKE_PASSWORD }}" | |
authenticator = "snowflake" | |
warehouse = "CO2_WH_PROD" | |
role = "CO2_ROLE_PROD" | |
database = "CO2_DB_PROD" | |
schema = "RAW_CO2" | |
EOF | |
chmod 600 ~/.snowflake/connections.toml | |
- name: Run tests | |
run: | | |
pytest tests/test_snowflake_connection.py | |
pytest tests/test_s3_connection.py | |
pytest tests/test_volatility_co2ppm.py | |
pytest tests/test_daily_co2_changes.py | |
pytest tests/test_weekly_co2_changes.py | |
pytest tests/test_co2_harmonized_sp.py | |
deploy: | |
needs: test | |
if: success() && (github.event_name != 'pull_request') | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
pip install snowflake-connector-python pandas | |
- name: Set environment variables based on branch | |
run: | | |
# Set AWS credentials | |
echo "AWS_ACCESS_KEY=${{ secrets.AWS_ACCESS_KEY }}" >> $GITHUB_ENV | |
echo "AWS_SECRET_KEY=${{ secrets.AWS_SECRET_KEY }}" >> $GITHUB_ENV | |
echo "AWS_REGION=${{ secrets.AWS_REGION }}" >> $GITHUB_ENV | |
echo "S3_BUCKET_NAME=${{ secrets.S3_BUCKET_NAME }}" >> $GITHUB_ENV | |
echo "S3_OBJECT_NAME=${{ secrets.S3_OBJECT_NAME }}" >> $GITHUB_ENV | |
echo "PARENT_FOLDER=${{ secrets.PARENT_FOLDER }}" >> $GITHUB_ENV | |
# Set Snowflake credentials | |
echo "SNOWFLAKE_ACCOUNT=${{ secrets.SNOWFLAKE_ACCOUNT }}" >> $GITHUB_ENV | |
echo "SNOWFLAKE_USER=${{ secrets.SNOWFLAKE_USER }}" >> $GITHUB_ENV | |
echo "SNOWFLAKE_PASSWORD=${{ secrets.SNOWFLAKE_PASSWORD }}" >> $GITHUB_ENV | |
# Determine environment based on branch | |
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then | |
echo "ENVIRONMENT=prod" >> $GITHUB_ENV | |
echo "DATABASE_NAME=CO2_DB_PROD" >> $GITHUB_ENV | |
echo "SCHEMA_NAME=RAW_CO2" >> $GITHUB_ENV | |
echo "CONN_PROFILE=prod" >> $GITHUB_ENV | |
echo "SNOWFLAKE_WAREHOUSE=CO2_WH_PROD" >> $GITHUB_ENV | |
echo "SNOWFLAKE_ROLE=CO2_ROLE_PROD" >> $GITHUB_ENV | |
else | |
echo "ENVIRONMENT=dev" >> $GITHUB_ENV | |
echo "DATABASE_NAME=CO2_DB_DEV" >> $GITHUB_ENV | |
echo "SCHEMA_NAME=RAW_CO2" >> $GITHUB_ENV | |
echo "CONN_PROFILE=dev" >> $GITHUB_ENV | |
echo "SNOWFLAKE_WAREHOUSE=CO2_WH_DEV" >> $GITHUB_ENV | |
echo "SNOWFLAKE_ROLE=CO2_ROLE_DEV" >> $GITHUB_ENV | |
fi | |
echo "Deploying to $ENVIRONMENT environment" | |
echo "Using database: $DATABASE_NAME" | |
echo "Using warehouse: $SNOWFLAKE_WAREHOUSE" | |
echo "Using role: $SNOWFLAKE_ROLE" | |
- name: Setup Snowflake connection profiles | |
run: | | |
mkdir -p ~/.snowflake/keys | |
# Store private key correctly from GitHub secrets | |
echo "${{ secrets.SNOWFLAKE_PRIVATE_KEY }}" > ~/.snowflake/keys/rsa_key.p8 | |
chmod 600 ~/.snowflake/keys/rsa_key.p8 | |
# Debug the Snowflake account format | |
echo "Using Snowflake account: ${SNOWFLAKE_ACCOUNT}" | |
# Install required packages | |
pip install cryptography snowflake-connector-python | |
# Create connection profile for the environment with direct key authentication | |
cat > ~/.snowflake/connections.toml << EOF | |
[dev] | |
account = "$SNOWFLAKE_ACCOUNT" | |
user = "$SNOWFLAKE_USER" | |
# Using direct path, no expansion needed in GitHub Actions | |
private_key_path = "/home/runner/.snowflake/keys/rsa_key.p8" | |
warehouse = "CO2_WH_DEV" | |
role = "CO2_ROLE_DEV" | |
database = "CO2_DB_DEV" | |
schema = "RAW_CO2" | |
client_request_mfa_token = false | |
[prod] | |
account = "$SNOWFLAKE_ACCOUNT" | |
user = "$SNOWFLAKE_USER" | |
# Using direct path, no expansion needed in GitHub Actions | |
private_key_path = "/home/runner/.snowflake/keys/rsa_key.p8" | |
warehouse = "CO2_WH_PROD" | |
role = "CO2_ROLE_PROD" | |
database = "CO2_DB_PROD" | |
schema = "RAW_CO2" | |
client_request_mfa_token = false | |
EOF | |
chmod 600 ~/.snowflake/connections.toml | |
# Verify the connection file was created properly | |
echo "Connection file created with profiles:" | |
grep -v private_key ~/.snowflake/connections.toml | |
# Test the key can be read properly (without connecting) | |
python -c " | |
from cryptography.hazmat.backends import default_backend | |
from cryptography.hazmat.primitives import serialization | |
import os | |
key_path = '/home/runner/.snowflake/keys/rsa_key.p8' | |
print(f'Checking key at: {key_path}') | |
print(f'Key exists: {os.path.exists(key_path)}') | |
try: | |
with open(key_path, 'rb') as f: | |
key_data = f.read() | |
print(f'Key length: {len(key_data)} bytes') | |
print(f'Key starts with: {key_data[:30]}') | |
# Try to load the key to validate format | |
p_key = serialization.load_pem_private_key( | |
key_data, | |
password=None, | |
backend=default_backend() | |
) | |
print('Key loaded successfully!') | |
# Convert to DER format as required by Snowflake | |
pkb = p_key.private_bytes( | |
encoding=serialization.Encoding.DER, | |
format=serialization.PrivateFormat.PKCS8, | |
encryption_algorithm=serialization.NoEncryption() | |
) | |
print('Key converted to DER format successfully!') | |
except Exception as e: | |
print(f'Error processing key: {e}') | |
" | |
- name: Generate configuration and SQL files | |
run: | | |
# Create .env file for local tools | |
cat > .env << EOF | |
AWS_ACCESS_KEY=${{ secrets.AWS_ACCESS_KEY }} | |
AWS_SECRET_KEY=${{ secrets.AWS_SECRET_KEY }} | |
AWS_REGION=${{ secrets.AWS_REGION }} | |
S3_BUCKET_NAME=${{ secrets.S3_BUCKET_NAME }} | |
S3_OBJECT_NAME=${{ secrets.S3_OBJECT_NAME }} | |
PARENT_FOLDER=${{ secrets.PARENT_FOLDER }} | |
SNOWFLAKE_ENV=$ENVIRONMENT | |
EOF | |
# Create environment.json for any tools that use it | |
mkdir -p templates | |
cat > templates/environment.json << EOF | |
{ | |
"environment": "$ENVIRONMENT" | |
} | |
EOF | |
# Render YAML and SQL setup files | |
python scripts/render_yaml.py $ENVIRONMENT | |
python scripts/render_setup.py $ENVIRONMENT | |
- name: Deploy UDFs and Stored Procedures | |
run: | | |
# First verify that environment variables are set | |
echo "Checking environment variables:" | |
echo "CONN_PROFILE: $CONN_PROFILE" | |
echo "SNOWFLAKE_ACCOUNT: ${SNOWFLAKE_ACCOUNT:0:5}..." | |
echo "SNOWFLAKE_USER: $SNOWFLAKE_USER" | |
echo "ENVIRONMENT: $ENVIRONMENT" | |
echo "DATABASE_NAME: $DATABASE_NAME" | |
# Configure git for better change detection | |
git config --global core.quotepath off | |
git fetch --prune --unshallow | |
deploy_component() { | |
local component_path=$1 | |
local component_name=$2 | |
local component_type=$3 | |
echo "π Checking for changes in $component_path..." | |
# For pull requests or workflow_dispatch, always deploy components | |
if [[ "${{ github.event_name }}" == "pull_request" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then | |
echo "Pull request or manual dispatch detected - deploying component" | |
# Use our Python deployment script without checking changes | |
PYTHONPATH=$PYTHONPATH:$(pwd) python -u scripts/deployment_files/snowflake_deployer.py deploy --profile $CONN_PROFILE --path "$component_path" --name "$component_name" --type "$component_type" | |
else | |
# For push events, only deploy if component changed | |
echo "Push event detected - only deploying changed components" | |
# Use our Python deployment script with change detection | |
PYTHONPATH=$PYTHONPATH:$(pwd) python -u scripts/deployment_files/snowflake_deployer.py deploy --profile $CONN_PROFILE --path "$component_path" --name "$component_name" --type "$component_type" --check-changes | |
fi | |
if [ $? -eq 0 ]; then | |
echo "β Component $component_name processed successfully" | |
else | |
echo "β Processing failed for $component_name" | |
exit 1 | |
fi | |
} | |
echo "π Starting deployments to $ENVIRONMENT environment..." | |
# Deploy UDFs | |
deploy_component "udfs_and_spoc/python_udf" "CO2_VOLATILITY" "udf" | |
deploy_component "udfs_and_spoc/daily_co2_changes" "DAILY_CO2_CHANGES" "udf" | |
deploy_component "udfs_and_spoc/weekly_co2_changes" "WEEKLY_CO2_CHANGES" "udf" | |
# Deploy stored procedures | |
deploy_component "udfs_and_spoc/loading_co2_data_sp" "LOAD_CO2_DATA" "procedure" | |
deploy_component "udfs_and_spoc/co2_harmonized_sp" "HARMONIZE_CO2_DATA" "procedure" | |
deploy_component "udfs_and_spoc/co2_analytical_sp" "ANALYZE_CO2_DATA" "procedure" | |
echo "π All components processed successfully!" |