Skip to content

Prompt Evaluation

Prompt Evaluation #101

name: 'Prompt Evaluation'
on:
pull_request:
branches:
- main
paths:
- 'app/src/chat_api.py'
- 'app/src/chat_engine.py'
- 'app/src/generate.py'
- 'app/promptfoo/promptfooconfig.ci.yaml'
- 'app/promptfoo/generateUniqueId.js'
- 'app/promptfoo/readability_assessment.py'
- '.github/workflows/promptfoo-googlesheet-evaluation.yml'
workflow_dispatch:
inputs:
input_sheet_url:
type: string
description: 'Google Sheet URL for test case inputs'
required: false
output_sheet_url:
type: string
description: 'Google Sheet URL for evaluation outputs'
required: false
chatbot_instance_url:
type: string
description: 'Chatbot API endpoint URL'
required: false
default: 'https://decision-support-tool-dev.navateam.com/api/query'
jobs:
evaluate:
runs-on: ubuntu-latest
permissions:
pull-requests: write
contents: read
env:
GOOGLE_SHEET_INPUT_URL: ${{ inputs.input_sheet_url || secrets.GOOGLE_SHEET_INPUT_URL }}
GOOGLE_SHEET_OUTPUT_URL: ${{ inputs.output_sheet_url || secrets.GOOGLE_SHEET_OUTPUT_URL }}
PROMPTFOO_API_KEY: ${{ secrets.PROMPTFOO_API_KEY || '' }}
CHATBOT_INSTANCE_URL: ${{ inputs.chatbot_instance_url || 'https://decision-support-tool-dev.navateam.com/api/query' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: 'lts/*'
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y jq gettext python3-pip
- name: Install Python dependencies
run: |
python3 -m pip install textdescriptives spacy
python3 -m spacy download en_core_web_sm
- name: Set up Google Cloud credentials
run: |
echo '${{ secrets.GOOGLE_CREDENTIALS_JSON }}' > "/tmp/gcp-creds.json"
jq -e . "/tmp/gcp-creds.json" > /dev/null || echo "Warning: Invalid JSON format in credentials"
echo "GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-creds.json" >> "${GITHUB_ENV}"
- name: Install promptfoo and googleapis
run: |
npm install -g promptfoo --no-fund --no-audit --loglevel=error
npm install -g googleapis
npm install -g @actions/core @actions/github
- name: Authenticate with Promptfoo
if: env.PROMPTFOO_API_KEY != ''
run: |
# Authenticate with Promptfoo using the login command
promptfoo auth login --host https://api.promptfoo.app --api-key "${PROMPTFOO_API_KEY}"
- name: Process config file
run: |
cp app/promptfoo/generateUniqueId.js /tmp/generateUniqueId.js
cp app/promptfoo/readability_assessment.py /tmp/readability_assessment.py
echo "Using Google Sheet input URL: $(echo "$GOOGLE_SHEET_INPUT_URL" | grep -o 'spreadsheets.*')"
echo "Using Google Sheet output URL: $(echo "$GOOGLE_SHEET_OUTPUT_URL" | grep -o 'spreadsheets.*')"
echo "Using Chatbot instance URL: $CHATBOT_INSTANCE_URL"
envsubst < app/promptfoo/promptfooconfig.ci.yaml > /tmp/promptfooconfig.processed.yaml
echo "Config file processed, checking..."
grep -v "GOOGLE_SHEET\|CHATBOT_INSTANCE" /tmp/promptfooconfig.processed.yaml | grep -i "url\|path"
- name: Run promptfoo evaluation
id: eval
env:
GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcp-creds.json
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PROMPTFOO_FAILED_TEST_EXIT_CODE: 0
run: |
if [ ! -f "$GOOGLE_APPLICATION_CREDENTIALS" ]; then
echo "Error: Google credentials file not found at $GOOGLE_APPLICATION_CREDENTIALS"
exit 1
fi
if ! jq -e . "$GOOGLE_APPLICATION_CREDENTIALS" > /dev/null 2>&1; then
echo "Error: Invalid JSON format in Google credentials file"
exit 1
fi
if [ -z "$OPENAI_API_KEY" ]; then
echo "Error: OPENAI_API_KEY environment variable is not set"
exit 1
fi
OUTPUT_JSON_FILE="/tmp/promptfoo-output.json"
EVAL_OUTPUT_FILE="/tmp/promptfoo-output.txt"
if [ -n "$PROMPTFOO_API_KEY" ]; then
promptfoo eval --max-concurrency 1 --config "/tmp/promptfooconfig.processed.yaml" --share --output "${OUTPUT_JSON_FILE}" --no-cache | tee "${EVAL_OUTPUT_FILE}"
else
promptfoo eval --max-concurrency 1 --config "/tmp/promptfooconfig.processed.yaml" --output "${OUTPUT_JSON_FILE}" --no-cache | tee "${EVAL_OUTPUT_FILE}"
fi
if [ -f "${OUTPUT_JSON_FILE}" ]; then
echo "Output JSON file generated successfully"
EVAL_ID=$(grep -o 'https://www.promptfoo.app/eval/[^[:space:]]*' "${EVAL_OUTPUT_FILE}" | sed 's|.*/||')
if [ -n "${EVAL_ID}" ]; then
echo "Found evaluation ID: ${EVAL_ID}"
echo "Exporting results to Google Sheets..."
promptfoo export "${EVAL_ID}" --output "${GOOGLE_SHEET_OUTPUT_URL}"
fi
if [ -n "$PROMPTFOO_API_KEY" ]; then
SHARE_URL=$(grep -o 'https://www.promptfoo.app/eval/[^[:space:]]*' "${EVAL_OUTPUT_FILE}")
echo "eval_id=${EVAL_ID}" >> "${GITHUB_OUTPUT}"
echo "share_url=${SHARE_URL}" >> "${GITHUB_OUTPUT}"
fi
else
echo "No output JSON file was generated"
fi
- name: Create PR comment
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const evalId = '${{ steps.eval.outputs.eval_id }}';
const shareUrl = '${{ steps.eval.outputs.share_url }}';
const googleSheetUrl = '${{ env.GOOGLE_SHEET_OUTPUT_URL }}';
// Get the test results from the previous step
const fs = require('fs');
let stats = { successes: 0, failures: 0, total: 0 };
try {
if (fs.existsSync('/tmp/promptfoo-output.json')) {
const outputData = JSON.parse(fs.readFileSync('/tmp/promptfoo-output.json', 'utf8'));
if (outputData.results && outputData.results.stats) {
stats = outputData.results.stats;
}
}
} catch (error) {
console.error('Error parsing output file:', error);
}
// Create the comment body
let body = `## Promptfoo Evaluation Results
| Success | Failure | Total | Pass Rate |
|---------|---------|-------|-----------|
| ${stats.successes} | ${stats.failures} | ${stats.total || stats.successes + stats.failures} | ${((stats.successes / (stats.total || stats.successes + stats.failures)) * 100).toFixed(2)}% |
`;
// Add Google Sheet link
body += `[View detailed results in Google Sheets](${googleSheetUrl})\n\n`;
// Add shareable link if available, matching the format in the TS example
if (shareUrl && shareUrl.length > 0) {
body += `**» [View eval results](${shareUrl}) «**\n`;
} else if (evalId && evalId !== 'null' && evalId !== 'unknown') {
body += `Run \`promptfoo view --id ${evalId}\` locally to view interactive results\n`;
} else {
body += `**» View eval results in CI console «**\n`;
}
// Post comment to PR
const { data: comment } = await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: body
});
console.log(`Created comment: ${comment.html_url}`);