Skip to content

DuckDB Commit Watcher #225

DuckDB Commit Watcher

DuckDB Commit Watcher #225

name: DuckDB Commit Watcher
on:
workflow_dispatch:
inputs:
last_commit:
description: 'Last Commit Hash'
required: false
type: string
schedule:
- cron: '0 */2 * * *'
jobs:
fetch_new_commits:
runs-on: ubuntu-latest
env:
REMOTE_REPO_URL: "https://github.com/duckdb/duckdb.git"
COMMIT_LIMIT: 50
MAX_POSTS: 15
ARTIFACT_API_URL: "https://api.github.com/repos/quackscience/duckdbot/actions/artifacts?per_page=1"
steps:
- name: Checkout this repository
uses: actions/checkout@v4
- name: Download latest artifact (last-commit-state)
run: |
ARTIFACT_URL=$(curl -H "Accept: application/vnd.github+json" -s $ARTIFACT_API_URL | jq -r '.artifacts[] | select(.name=="last-commit-state").archive_download_url // empty')
if [ -n "$ARTIFACT_URL" ]; then
echo "Downloading artifact from $ARTIFACT_URL"
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -L -o artifact.zip "$ARTIFACT_URL"
mkdir -p state
unzip -q artifact.zip -d state
echo "Artifact extracted to state/"
else
echo "No previous artifact found. Starting fresh."
mkdir -p state
echo "null" > state/last_commit.txt
fi
- name: Set up environment and state
run: |
mkdir -p state
if [ ! -f state/last_commit.txt ]; then
echo "null" > state/last_commit.txt
fi
echo "Restarting from commit: $(cat state/last_commit.txt)"
- name: Clone remote repository
run: |
git clone --bare $REMOTE_REPO_URL remote-repo
cd remote-repo
git fetch
DEFAULT_BRANCH=$(git remote show origin | grep 'HEAD branch' | awk '{print $NF}')
echo "Default branch is $DEFAULT_BRANCH"
echo $DEFAULT_BRANCH > ../state/default_branch.txt
- name: overwrite last commit from workflow
if: "${{ github.event.inputs.last_commit != '' }}"
run: |
echo "${{ github.event.inputs.last_commit }}" > state/last_commit.txt
- name: Fetch new commits with limits
id: fetch_commits
run: |
cd remote-repo
DEFAULT_BRANCH=$(cat ../state/default_branch.txt)
LAST_PROCESSED=$(cat ../state/last_commit.txt)
if ! git rev-parse "$LAST_PROCESSED" >/dev/null 2>&1; then
echo "Warning: Invalid or missing commit hash in last_commit.txt ($LAST_PROCESSED). Restarting from latest commits."
LAST_PROCESSED="null"
fi
if [ "$LAST_PROCESSED" = "null" ]; then
echo "Fetching the last $COMMIT_LIMIT commits from $DEFAULT_BRANCH"
git log $DEFAULT_BRANCH -n $COMMIT_LIMIT --pretty=format:'{"commit":"%H","author":"%an","date":"%ad","message":%s}' | \
sed 's/\\/\\\\/g' | \
jq -R 'fromjson?' | \
jq -s 'map(select(. != null))' > ../commits.json
else
echo "Fetching commits from $LAST_PROCESSED to $DEFAULT_BRANCH"
git log --reverse $LAST_PROCESSED..$DEFAULT_BRANCH --pretty=format:'{"commit":"%H","author":"%an","date":"%ad","message":%s}' | \
sed 's/\\/\\\\/g' | \
jq -R 'fromjson?' | \
jq -s 'map(select(. != null))' > ../commits.json
fi
# Check if commits.json is empty or contains empty array
if [ ! -s ../commits.json ] || [ "$(cat ../commits.json | jq 'length')" -eq 0 ]; then
echo "No new commits found. Exiting workflow."
echo "has_commits=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Limiting to $MAX_POSTS commits:"
cat ../commits.json | jq '.[0:'$MAX_POSTS']' > ../new_commits.json
echo "Capped new commits:"
cat ../new_commits.json
echo "has_commits=true" >> $GITHUB_OUTPUT
- name: Update last processed commit
if: steps.fetch_commits.outputs.has_commits == 'true'
run: |
cd remote-repo
DEFAULT_BRANCH=$(cat ../state/default_branch.txt)
LATEST_COMMIT=$(git log $DEFAULT_BRANCH -n 1 --pretty=format:'%H')
echo "$LATEST_COMMIT" > ../state/last_commit.txt
echo "Updated last processed commit to $LATEST_COMMIT"
- name: Save state for next run
if: always()
uses: actions/upload-artifact@v4
with:
name: last-commit-state
path: state
- name: Iterate over commits and call Bluesky post workflow
if: steps.fetch_commits.outputs.has_commits == 'true'
env:
GH_TOKEN: ${{ github.token }}
run: |
MAX_POSTS=${{ env.MAX_POSTS }}
POST_COUNT=0
cat new_commits.json | jq -c '.[]' | while read commit; do
if [ $POST_COUNT -ge $MAX_POSTS ]; then
echo "Max post limit ($MAX_POSTS) reached. Exiting loop."
break
fi
commit_hash=$(echo $commit | jq -r '.commit')
author=$(echo $commit | jq -r '.author')
date=$(echo $commit | jq -r '.date')
message=$(echo $commit | jq -r '.message')
post_content="New DuckDB Commit by $author on $date: $message https://github.com/duckdb/duckdb/commit/$commit_hash"
echo "Posting: $post_content"
sleep $((1 + RANDOM % 9))
gh workflow run Post-on-Bluesky \
--ref main \
-f post="$post_content"
POST_COUNT=$((POST_COUNT + 1))
done
- name: Safeguard check
if: steps.fetch_commits.outputs.has_commits == 'true'
run: |
if [ $(cat new_commits.json | jq length) -gt ${{ env.MAX_POSTS }} ]; then
echo "Warning: Commit processing exceeded the max limit!"
exit 1
fi