Skip to content

DuckDB Commit Watcher #26

DuckDB Commit Watcher

DuckDB Commit Watcher #26

name: DuckDB Commit Watcher
on:
workflow_dispatch:
schedule:
- cron: '10 * * * *' # Runs every hour
jobs:
fetch_new_commits:
runs-on: ubuntu-latest
env:
REMOTE_REPO_URL: "https://github.com/duckdb/duckdb.git" # Remote repository URL
COMMIT_LIMIT: 50
ARTIFACT_API_URL: "https://api.github.com/repos/quackscience/duckdbot/actions/artifacts?per_page=1"
steps:
- name: Checkout this repository
uses: actions/checkout@v4
- name: Download latest artifact (last-commit-state)
run: |
# Fetch the latest artifact URL using the GitHub API
ARTIFACT_URL=$(curl -H "Accept: application/vnd.github+json" -s $ARTIFACT_API_URL | jq '[.artifacts[] | {name : .name, archive_download_url : .archive_download_url}]' | jq -r '.[] | .archive_download_url')
# If no artifact is found, just continue (i.e., no prior state)
if [ "$ARTIFACT_URL" != "null" ]; then
echo "Downloading artifact from $ARTIFACT_URL"
curl -H "Accept: application/vnd.github+json" -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -L -o artifact.zip "$ARTIFACT_URL" # Download the artifact
# Extract the downloaded artifact
mkdir -p state
unzip -q artifact.zip -d state # Unzip the artifact to 'state' directory
echo "Artifact extracted to state/"
else
echo "No previous artifact found. Starting fresh."
fi
cat state/last_commit.txt
- name: Set up environment and state
run: |
mkdir -p state
if [ ! -f state/last_commit.txt ]; then
echo "null" > state/last_commit.txt
fi
echo "restarting from $(cat state/last_commit.txt)"
- name: Clone remote repository
run: |
git clone --bare $REMOTE_REPO_URL remote-repo
cd remote-repo
git fetch
# Detect the default branch dynamically
DEFAULT_BRANCH=$(git remote show origin | grep 'HEAD branch' | awk '{print $NF}')
echo "Default branch is $DEFAULT_BRANCH"
echo $DEFAULT_BRANCH > ../state/default_branch.txt
- name: Fetch new commits with limits
run: |
cd remote-repo
DEFAULT_BRANCH=$(cat ../state/default_branch.txt)
LAST_PROCESSED=$(cat ../state/last_commit.txt)
if [ "$LAST_PROCESSED" = "null" ]; then
# First run: limit to the most recent $COMMIT_LIMIT commits
git log $DEFAULT_BRANCH -n $COMMIT_LIMIT --pretty=format:'{"commit":"%H","author":"%an","date":"%ad","message":"%s"}' | jq -s '.' > ../commits.json
else
# Fetch only new commits since the last processed commit
git log $LAST_PROCESSED..$DEFAULT_BRANCH --pretty=format:'{"commit":"%H","author":"%an","date":"%ad","message":"%s"}' | jq -s '.' > ../commits.json
fi
echo "["$(cat ../commits.json)"]" | jq '.' > ../new_commits.json
- name: Update last processed commit
run: |
cd remote-repo
DEFAULT_BRANCH=$(cat ../state/default_branch.txt)
LATEST_COMMIT=$(git rev-parse $DEFAULT_BRANCH)
echo "$LATEST_COMMIT" > ../state/last_commit.txt
- name: Save state for next run
uses: actions/upload-artifact@v3
with:
name: last-commit-state
path: state
- name: Output new commits as JSON
id: output
run: |
cat new_commits.json
- name: Iterate over commits and call Bluesky post workflow
env:
GH_TOKEN: ${{ github.token }}
run: |
cat new_commits.json | jq -c '.[] | .[]' | while read commit; do
# Extract fields
commit_hash=$(echo $commit | jq -r '.commit')
author=$(echo $commit | jq -r '.author')
date=$(echo $commit | jq -r '.date')
message=$(echo $commit | jq -r '.message')
# Format the post content
post_content="New DuckDB Commit by $author on $date: $message https://github.com/duckdb/duckdb/commit/$commit_hash"
# Trigger Bluesky post workflow
sleep $((1 + RANDOM % 3))
gh workflow run Post-on-Bluesky \
--ref main \
-f post="$post_content"
done