Skip to content

dev changes deployment with volatility changes #105

dev changes deployment with volatility changes

dev changes deployment with volatility changes #105

name: CO2 Data Pipeline CI/CD
on:
push:
branches: [main, dev]
pull_request:
branches: [main]
workflow_dispatch:
jobs:
test:
runs-on: ubuntu-latest
env:
SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }}
AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }}
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Set environment variables based on branch
run: |
# Determine environment based on branch
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "ENVIRONMENT=prod" >> $GITHUB_ENV
echo "DATABASE_NAME=CO2_DB_PROD" >> $GITHUB_ENV
echo "SCHEMA_NAME=RAW_CO2" >> $GITHUB_ENV
echo "SNOWFLAKE_WAREHOUSE=CO2_WH_PROD" >> $GITHUB_ENV
echo "SNOWFLAKE_ROLE=CO2_ROLE_PROD" >> $GITHUB_ENV
else
echo "ENVIRONMENT=dev" >> $GITHUB_ENV
echo "DATABASE_NAME=CO2_DB_DEV" >> $GITHUB_ENV
echo "SCHEMA_NAME=RAW_CO2" >> $GITHUB_ENV
echo "SNOWFLAKE_WAREHOUSE=CO2_WH_DEV" >> $GITHUB_ENV
echo "SNOWFLAKE_ROLE=CO2_ROLE_DEV" >> $GITHUB_ENV
fi
echo "Testing with $ENVIRONMENT environment"
- name: Setup Snowflake connection profiles for testing
run: |
mkdir -p ~/.snowflake
# Create connection profile for testing with authenticator
cat > ~/.snowflake/connections.toml << EOF
[default]
account = "${{ secrets.SNOWFLAKE_ACCOUNT }}"
user = "${{ secrets.SNOWFLAKE_USER }}"
password = "${{ secrets.SNOWFLAKE_PASSWORD }}"
authenticator = "snowflake"
warehouse = "$SNOWFLAKE_WAREHOUSE"
database = "$DATABASE_NAME"
schema = "$SCHEMA_NAME"
role = "$SNOWFLAKE_ROLE"
[dev]
account = "${{ secrets.SNOWFLAKE_ACCOUNT }}"
user = "${{ secrets.SNOWFLAKE_USER }}"
password = "${{ secrets.SNOWFLAKE_PASSWORD }}"
authenticator = "snowflake"
warehouse = "CO2_WH_DEV"
role = "CO2_ROLE_DEV"
database = "CO2_DB_DEV"
schema = "RAW_CO2"
[prod]
account = "${{ secrets.SNOWFLAKE_ACCOUNT }}"
user = "${{ secrets.SNOWFLAKE_USER }}"
password = "${{ secrets.SNOWFLAKE_PASSWORD }}"
authenticator = "snowflake"
warehouse = "CO2_WH_PROD"
role = "CO2_ROLE_PROD"
database = "CO2_DB_PROD"
schema = "RAW_CO2"
EOF
chmod 600 ~/.snowflake/connections.toml
- name: Run tests
run: |
pytest tests/test_snowflake_connection.py
pytest tests/test_s3_connection.py
pytest tests/test_volatility_co2ppm.py
pytest tests/test_daily_co2_changes.py
pytest tests/test_weekly_co2_changes.py
pytest tests/test_co2_harmonized_sp.py
deploy:
needs: test
if: success() && (github.event_name != 'pull_request')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install snowflake-connector-python pandas
- name: Set environment variables based on branch
run: |
# Set AWS credentials
echo "AWS_ACCESS_KEY=${{ secrets.AWS_ACCESS_KEY }}" >> $GITHUB_ENV
echo "AWS_SECRET_KEY=${{ secrets.AWS_SECRET_KEY }}" >> $GITHUB_ENV
echo "AWS_REGION=${{ secrets.AWS_REGION }}" >> $GITHUB_ENV
echo "S3_BUCKET_NAME=${{ secrets.S3_BUCKET_NAME }}" >> $GITHUB_ENV
echo "S3_OBJECT_NAME=${{ secrets.S3_OBJECT_NAME }}" >> $GITHUB_ENV
echo "PARENT_FOLDER=${{ secrets.PARENT_FOLDER }}" >> $GITHUB_ENV
# Set Snowflake credentials
echo "SNOWFLAKE_ACCOUNT=${{ secrets.SNOWFLAKE_ACCOUNT }}" >> $GITHUB_ENV
echo "SNOWFLAKE_USER=${{ secrets.SNOWFLAKE_USER }}" >> $GITHUB_ENV
echo "SNOWFLAKE_PASSWORD=${{ secrets.SNOWFLAKE_PASSWORD }}" >> $GITHUB_ENV
# Determine environment based on branch
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "ENVIRONMENT=prod" >> $GITHUB_ENV
echo "DATABASE_NAME=CO2_DB_PROD" >> $GITHUB_ENV
echo "SCHEMA_NAME=RAW_CO2" >> $GITHUB_ENV
echo "CONN_PROFILE=prod" >> $GITHUB_ENV
echo "SNOWFLAKE_WAREHOUSE=CO2_WH_PROD" >> $GITHUB_ENV
echo "SNOWFLAKE_ROLE=CO2_ROLE_PROD" >> $GITHUB_ENV
else
echo "ENVIRONMENT=dev" >> $GITHUB_ENV
echo "DATABASE_NAME=CO2_DB_DEV" >> $GITHUB_ENV
echo "SCHEMA_NAME=RAW_CO2" >> $GITHUB_ENV
echo "CONN_PROFILE=dev" >> $GITHUB_ENV
echo "SNOWFLAKE_WAREHOUSE=CO2_WH_DEV" >> $GITHUB_ENV
echo "SNOWFLAKE_ROLE=CO2_ROLE_DEV" >> $GITHUB_ENV
fi
echo "Deploying to $ENVIRONMENT environment"
echo "Using database: $DATABASE_NAME"
echo "Using warehouse: $SNOWFLAKE_WAREHOUSE"
echo "Using role: $SNOWFLAKE_ROLE"
- name: Setup Snowflake connection profiles
run: |
mkdir -p ~/.snowflake/keys
# Store private key correctly from GitHub secrets
echo "${{ secrets.SNOWFLAKE_PRIVATE_KEY }}" > ~/.snowflake/keys/rsa_key.p8
chmod 600 ~/.snowflake/keys/rsa_key.p8
# Debug the Snowflake account format
echo "Using Snowflake account: ${SNOWFLAKE_ACCOUNT}"
# Install required packages
pip install cryptography snowflake-connector-python
# Create connection profile for the environment with direct key authentication
cat > ~/.snowflake/connections.toml << EOF
[dev]
account = "$SNOWFLAKE_ACCOUNT"
user = "$SNOWFLAKE_USER"
# Using direct path, no expansion needed in GitHub Actions
private_key_path = "/home/runner/.snowflake/keys/rsa_key.p8"
warehouse = "CO2_WH_DEV"
role = "CO2_ROLE_DEV"
database = "CO2_DB_DEV"
schema = "RAW_CO2"
client_request_mfa_token = false
[prod]
account = "$SNOWFLAKE_ACCOUNT"
user = "$SNOWFLAKE_USER"
# Using direct path, no expansion needed in GitHub Actions
private_key_path = "/home/runner/.snowflake/keys/rsa_key.p8"
warehouse = "CO2_WH_PROD"
role = "CO2_ROLE_PROD"
database = "CO2_DB_PROD"
schema = "RAW_CO2"
client_request_mfa_token = false
EOF
chmod 600 ~/.snowflake/connections.toml
# Verify the connection file was created properly
echo "Connection file created with profiles:"
grep -v private_key ~/.snowflake/connections.toml
# Test the key can be read properly (without connecting)
python -c "
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization
import os
key_path = '/home/runner/.snowflake/keys/rsa_key.p8'
print(f'Checking key at: {key_path}')
print(f'Key exists: {os.path.exists(key_path)}')
try:
with open(key_path, 'rb') as f:
key_data = f.read()
print(f'Key length: {len(key_data)} bytes')
print(f'Key starts with: {key_data[:30]}')
# Try to load the key to validate format
p_key = serialization.load_pem_private_key(
key_data,
password=None,
backend=default_backend()
)
print('Key loaded successfully!')
# Convert to DER format as required by Snowflake
pkb = p_key.private_bytes(
encoding=serialization.Encoding.DER,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=serialization.NoEncryption()
)
print('Key converted to DER format successfully!')
except Exception as e:
print(f'Error processing key: {e}')
"
- name: Generate configuration and SQL files
run: |
# Create .env file for local tools
cat > .env << EOF
AWS_ACCESS_KEY=${{ secrets.AWS_ACCESS_KEY }}
AWS_SECRET_KEY=${{ secrets.AWS_SECRET_KEY }}
AWS_REGION=${{ secrets.AWS_REGION }}
S3_BUCKET_NAME=${{ secrets.S3_BUCKET_NAME }}
S3_OBJECT_NAME=${{ secrets.S3_OBJECT_NAME }}
PARENT_FOLDER=${{ secrets.PARENT_FOLDER }}
SNOWFLAKE_ENV=$ENVIRONMENT
EOF
# Create environment.json for any tools that use it
mkdir -p templates
cat > templates/environment.json << EOF
{
"environment": "$ENVIRONMENT"
}
EOF
# Render YAML and SQL setup files
python scripts/render_yaml.py $ENVIRONMENT
python scripts/render_setup.py $ENVIRONMENT
- name: Deploy UDFs and Stored Procedures
run: |
# First verify that environment variables are set
echo "Checking environment variables:"
echo "CONN_PROFILE: $CONN_PROFILE"
echo "SNOWFLAKE_ACCOUNT: ${SNOWFLAKE_ACCOUNT:0:5}..."
echo "SNOWFLAKE_USER: $SNOWFLAKE_USER"
echo "ENVIRONMENT: $ENVIRONMENT"
echo "DATABASE_NAME: $DATABASE_NAME"
# Configure git for better change detection
git config --global core.quotepath off
git fetch --prune --unshallow
deploy_component() {
local component_path=$1
local component_name=$2
local component_type=$3
echo "πŸ” Checking for changes in $component_path..."
# For pull requests or workflow_dispatch, always deploy components
if [[ "${{ github.event_name }}" == "pull_request" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then
echo "Pull request or manual dispatch detected - deploying component"
# Use our Python deployment script without checking changes
PYTHONPATH=$PYTHONPATH:$(pwd) python -u scripts/deployment_files/snowflake_deployer.py deploy --profile $CONN_PROFILE --path "$component_path" --name "$component_name" --type "$component_type"
else
# For push events, only deploy if component changed
echo "Push event detected - only deploying changed components"
# Use our Python deployment script with change detection
PYTHONPATH=$PYTHONPATH:$(pwd) python -u scripts/deployment_files/snowflake_deployer.py deploy --profile $CONN_PROFILE --path "$component_path" --name "$component_name" --type "$component_type" --check-changes
fi
if [ $? -eq 0 ]; then
echo "βœ… Component $component_name processed successfully"
else
echo "❌ Processing failed for $component_name"
exit 1
fi
}
echo "πŸš€ Starting deployments to $ENVIRONMENT environment..."
# Deploy UDFs
deploy_component "udfs_and_spoc/python_udf" "CO2_VOLATILITY" "udf"
deploy_component "udfs_and_spoc/daily_co2_changes" "DAILY_CO2_CHANGES" "udf"
deploy_component "udfs_and_spoc/weekly_co2_changes" "WEEKLY_CO2_CHANGES" "udf"
# Deploy stored procedures
deploy_component "udfs_and_spoc/loading_co2_data_sp" "LOAD_CO2_DATA" "procedure"
deploy_component "udfs_and_spoc/co2_harmonized_sp" "HARMONIZE_CO2_DATA" "procedure"
deploy_component "udfs_and_spoc/co2_analytical_sp" "ANALYZE_CO2_DATA" "procedure"
echo "πŸŽ‰ All components processed successfully!"