Prompt Evaluation #101

Summary
Jobs
- evaluate
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/promptfoo-googlesheet-evaluation.yml at 43134e7

	name: 'Prompt Evaluation'

	on:
	pull_request:
	branches:
	- main
	paths:
	- 'app/src/chat_api.py'
	- 'app/src/chat_engine.py'
	- 'app/src/generate.py'
	- 'app/promptfoo/promptfooconfig.ci.yaml'
	- 'app/promptfoo/generateUniqueId.js'
	- 'app/promptfoo/readability_assessment.py'
	- '.github/workflows/promptfoo-googlesheet-evaluation.yml'
	workflow_dispatch:
	inputs:
	input_sheet_url:
	type: string
	description: 'Google Sheet URL for test case inputs'
	required: false
	output_sheet_url:
	type: string
	description: 'Google Sheet URL for evaluation outputs'
	required: false
	chatbot_instance_url:
	type: string
	description: 'Chatbot API endpoint URL'
	required: false
	default: 'https://decision-support-tool-dev.navateam.com/api/query'

	jobs:
	evaluate:
	runs-on: ubuntu-latest
	permissions:
	pull-requests: write
	contents: read

	env:
	GOOGLE_SHEET_INPUT_URL: ${{ inputs.input_sheet_url \|\| secrets.GOOGLE_SHEET_INPUT_URL }}
	GOOGLE_SHEET_OUTPUT_URL: ${{ inputs.output_sheet_url \|\| secrets.GOOGLE_SHEET_OUTPUT_URL }}
	PROMPTFOO_API_KEY: ${{ secrets.PROMPTFOO_API_KEY \|\| '' }}
	CHATBOT_INSTANCE_URL: ${{ inputs.chatbot_instance_url \|\| 'https://decision-support-tool-dev.navateam.com/api/query' }}

	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Set up Node.js
	uses: actions/setup-node@v4
	with:
	node-version: 'lts/*'

	- name: Install system dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install -y jq gettext python3-pip

	- name: Install Python dependencies
	run: \|
	python3 -m pip install textdescriptives spacy
	python3 -m spacy download en_core_web_sm

	- name: Set up Google Cloud credentials
	run: \|
	echo '${{ secrets.GOOGLE_CREDENTIALS_JSON }}' > "/tmp/gcp-creds.json"
	jq -e . "/tmp/gcp-creds.json" > /dev/null \|\| echo "Warning: Invalid JSON format in credentials"
	echo "GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-creds.json" >> "${GITHUB_ENV}"

	- name: Install promptfoo and googleapis
	run: \|
	npm install -g promptfoo --no-fund --no-audit --loglevel=error
	npm install -g googleapis
	npm install -g @actions/core @actions/github

	- name: Authenticate with Promptfoo
	if: env.PROMPTFOO_API_KEY != ''
	run: \|
	# Authenticate with Promptfoo using the login command
	promptfoo auth login --host https://api.promptfoo.app --api-key "${PROMPTFOO_API_KEY}"

	- name: Process config file
	run: \|
	cp app/promptfoo/generateUniqueId.js /tmp/generateUniqueId.js
	cp app/promptfoo/readability_assessment.py /tmp/readability_assessment.py

	echo "Using Google Sheet input URL: $(echo "$GOOGLE_SHEET_INPUT_URL" \| grep -o 'spreadsheets.*')"
	echo "Using Google Sheet output URL: $(echo "$GOOGLE_SHEET_OUTPUT_URL" \| grep -o 'spreadsheets.*')"
	echo "Using Chatbot instance URL: $CHATBOT_INSTANCE_URL"
	envsubst < app/promptfoo/promptfooconfig.ci.yaml > /tmp/promptfooconfig.processed.yaml
	echo "Config file processed, checking..."
	grep -v "GOOGLE_SHEET\\|CHATBOT_INSTANCE" /tmp/promptfooconfig.processed.yaml \| grep -i "url\\|path"

	- name: Run promptfoo evaluation
	id: eval
	env:
	GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcp-creds.json
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	PROMPTFOO_FAILED_TEST_EXIT_CODE: 0
	run: \|
	if [ ! -f "$GOOGLE_APPLICATION_CREDENTIALS" ]; then
	echo "Error: Google credentials file not found at $GOOGLE_APPLICATION_CREDENTIALS"
	exit 1
	fi

	if ! jq -e . "$GOOGLE_APPLICATION_CREDENTIALS" > /dev/null 2>&1; then
	echo "Error: Invalid JSON format in Google credentials file"
	exit 1
	fi

	if [ -z "$OPENAI_API_KEY" ]; then
	echo "Error: OPENAI_API_KEY environment variable is not set"
	exit 1
	fi

	OUTPUT_JSON_FILE="/tmp/promptfoo-output.json"
	EVAL_OUTPUT_FILE="/tmp/promptfoo-output.txt"

	if [ -n "$PROMPTFOO_API_KEY" ]; then
	promptfoo eval --max-concurrency 1 --config "/tmp/promptfooconfig.processed.yaml" --share --output "${OUTPUT_JSON_FILE}" --no-cache \| tee "${EVAL_OUTPUT_FILE}"
	else
	promptfoo eval --max-concurrency 1 --config "/tmp/promptfooconfig.processed.yaml" --output "${OUTPUT_JSON_FILE}" --no-cache \| tee "${EVAL_OUTPUT_FILE}"
	fi

	if [ -f "${OUTPUT_JSON_FILE}" ]; then
	echo "Output JSON file generated successfully"

	EVAL_ID=$(grep -o 'https://www.promptfoo.app/eval/[^[:space:]]' "${EVAL_OUTPUT_FILE}" \| sed 's\|./\|\|')

	if [ -n "${EVAL_ID}" ]; then
	echo "Found evaluation ID: ${EVAL_ID}"
	echo "Exporting results to Google Sheets..."
	promptfoo export "${EVAL_ID}" --output "${GOOGLE_SHEET_OUTPUT_URL}"
	fi

	if [ -n "$PROMPTFOO_API_KEY" ]; then
	SHARE_URL=$(grep -o 'https://www.promptfoo.app/eval/[^[:space:]]*' "${EVAL_OUTPUT_FILE}")

	echo "eval_id=${EVAL_ID}" >> "${GITHUB_OUTPUT}"
	echo "share_url=${SHARE_URL}" >> "${GITHUB_OUTPUT}"
	fi
	else
	echo "No output JSON file was generated"
	fi

	- name: Create PR comment
	if: github.event_name == 'pull_request'
	uses: actions/github-script@v7
	with:
	github-token: ${{ secrets.GITHUB_TOKEN }}
	script: \|
	const evalId = '${{ steps.eval.outputs.eval_id }}';
	const shareUrl = '${{ steps.eval.outputs.share_url }}';
	const googleSheetUrl = '${{ env.GOOGLE_SHEET_OUTPUT_URL }}';

	// Get the test results from the previous step
	const fs = require('fs');
	let stats = { successes: 0, failures: 0, total: 0 };

	try {
	if (fs.existsSync('/tmp/promptfoo-output.json')) {
	const outputData = JSON.parse(fs.readFileSync('/tmp/promptfoo-output.json', 'utf8'));
	if (outputData.results && outputData.results.stats) {
	stats = outputData.results.stats;
	}
	}
	} catch (error) {
	console.error('Error parsing output file:', error);
	}

	// Create the comment body
	let body = `## Promptfoo Evaluation Results

	\| Success \| Failure \| Total \| Pass Rate \|
	\|---------\|---------\|-------\|-----------\|
	\| ${stats.successes} \| ${stats.failures} \| ${stats.total \|\| stats.successes + stats.failures} \| ${((stats.successes / (stats.total \|\| stats.successes + stats.failures)) * 100).toFixed(2)}% \|

	`;

	// Add Google Sheet link
	body += `[View detailed results in Google Sheets](${googleSheetUrl})\n\n`;

	// Add shareable link if available, matching the format in the TS example
	if (shareUrl && shareUrl.length > 0) {
	body += `» [View eval results](${shareUrl}) «\n`;
	} else if (evalId && evalId !== 'null' && evalId !== 'unknown') {
	body += `Run \`promptfoo view --id ${evalId}\` locally to view interactive results\n`;
	} else {
	body += `» View eval results in CI console «\n`;
	}

	// Post comment to PR
	const { data: comment } = await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	body: body
	});

	console.log(`Created comment: ${comment.html_url}`);

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Prompt Evaluation #101

Workflow file

Prompt Evaluation #101

Uh oh!

Jobs

Run details

Workflow file for this run