From a18d5b728dd3c2121968fd3a51cd74542aaf91fb Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 13:28:43 -0700 Subject: [PATCH 01/22] feat: Setup Github Action workflow file for PromptFoo-GoogleSheet integration --- .../promptfoo-googlesheet-evaluation.yml | 74 +++++++++++++++++++ app/promptfooconfig.ci.yaml | 37 ++++++++++ 2 files changed, 111 insertions(+) create mode 100644 .github/workflows/promptfoo-googlesheet-evaluation.yml create mode 100644 app/promptfooconfig.ci.yaml diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml new file mode 100644 index 00000000..bb10f392 --- /dev/null +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -0,0 +1,74 @@ +name: 'Prompt Evaluation' + +on: + pull_request: + branches: + - main + paths: + - 'app/src/chat_api.py' + - 'app/src/chat_engine.py' + - 'app/src/generate.py' + - 'app/promptfooconfig.ci.yaml' + - 'docs/app/evaluation/generateUniqueId.js' + +jobs: + evaluate: + runs-on: ubuntu-latest + permissions: + pull-requests: write + contents: read + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: 'lts/*' + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y jq + + - name: Set up Google Cloud credentials + run: | + # Ensure the JSON is properly formatted without escaping issues + echo '${{ secrets.GOOGLE_CREDENTIALS_JSON }}' > /tmp/gcp-creds.json + # Verify the JSON is valid + cat /tmp/gcp-creds.json | jq . || echo "Warning: Invalid JSON format in credentials" + echo "GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-creds.json" >> $GITHUB_ENV + + - name: Install promptfoo and googleapis + run: | + npm install -g promptfoo + npm install -g googleapis + + - name: Set up promptfoo cache + uses: actions/cache@v3 + with: + path: ~/.cache/promptfoo + key: ${{ runner.os }}-promptfoo-v1-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-promptfoo-v1- + + - name: Create unique ID generator + run: | + cat > /tmp/generateUniqueId.js << 'EOF' + module.exports = function (varName, prompt, otherVars) { + // Generate a unique ID using timestamp and a random component + const uniqueId = 'promptfoo-eval-test-' + Date.now().toString() + '-' + Math.random().toString(36).substring(2, 9); + return { + output: uniqueId + }; + }; + EOF + + - name: Run promptfoo evaluation + run: | + promptfoo eval --config app/promptfooconfig.ci.yaml + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} \ No newline at end of file diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml new file mode 100644 index 00000000..a5b99d0a --- /dev/null +++ b/app/promptfooconfig.ci.yaml @@ -0,0 +1,37 @@ +# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json + +description: 'Decision Support Tool Evaluation (CI)' +prompts: + - response +providers: + # Use the hardcoded dev endpoint URL directly in CI + - id: "https://decision-support-tool-dev.navateam.com/api/query" + config: + method: POST + headers: + Content-Type: application/json + body: + chat_history: [] + session_id: "{{uniqueSessionId}}" + new_session: true + message: "{{question}}" + user_id: "promptfoo-evaluator" + transformResponse: "json ? json.response_text : ''" + +defaultTest: + vars: + uniqueSessionId: file:///tmp/generateUniqueId.js # Use the tmp file we make in the GHA workflow + googleCredentials: "/tmp/gcp-creds.json" + options: + timeout: 360000 + +evaluateOptions: + maxConcurrency: 3 + delay: 1000 + showProgressBar: true + +# Read test cases from this sheet +tests: https://docs.google.com/spreadsheets/d/1NWopeJjKy7eHq8D-VcIO7QtTDRD9agn9MAv7SQ6_hSY/edit?gid=711949508#gid=711949508 + +# Create a new tab for each evaluation +outputPath: https://docs.google.com/spreadsheets/d/1NWopeJjKy7eHq8D-VcIO7QtTDRD9agn9MAv7SQ6_hSY/edit \ No newline at end of file From bf50d7dc1dc53d215599078c5718122ac78253e6 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 14:55:39 -0700 Subject: [PATCH 02/22] fix: remove cat from json verify --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index bb10f392..cdd44618 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -38,8 +38,8 @@ jobs: run: | # Ensure the JSON is properly formatted without escaping issues echo '${{ secrets.GOOGLE_CREDENTIALS_JSON }}' > /tmp/gcp-creds.json - # Verify the JSON is valid - cat /tmp/gcp-creds.json | jq . || echo "Warning: Invalid JSON format in credentials" + # Verify JSON is valid without printing content + jq -e . /tmp/gcp-creds.json > /dev/null || echo "Warning: Invalid JSON format in credentials" echo "GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-creds.json" >> $GITHUB_ENV - name: Install promptfoo and googleapis From 3dabfa03c3ed7e7def4701ab02b25a09a28f5594 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 15:00:09 -0700 Subject: [PATCH 03/22] fix: upgrade actions/cache version to v4 --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index cdd44618..2886fe86 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -39,7 +39,7 @@ jobs: # Ensure the JSON is properly formatted without escaping issues echo '${{ secrets.GOOGLE_CREDENTIALS_JSON }}' > /tmp/gcp-creds.json # Verify JSON is valid without printing content - jq -e . /tmp/gcp-creds.json > /dev/null || echo "Warning: Invalid JSON format in credentials" + jq -e . "/tmp/gcp-creds.json" > /dev/null || echo "Warning: Invalid JSON format in credentials" echo "GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-creds.json" >> $GITHUB_ENV - name: Install promptfoo and googleapis @@ -48,7 +48,7 @@ jobs: npm install -g googleapis - name: Set up promptfoo cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/promptfoo key: ${{ runner.os }}-promptfoo-v1-${{ github.sha }} From 8b60452ee6dc5e89657bc095894e29863d31c39f Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 15:09:43 -0700 Subject: [PATCH 04/22] fix: gha linter errors --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 2886fe86..8ac7f5d1 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -37,10 +37,10 @@ jobs: - name: Set up Google Cloud credentials run: | # Ensure the JSON is properly formatted without escaping issues - echo '${{ secrets.GOOGLE_CREDENTIALS_JSON }}' > /tmp/gcp-creds.json + echo '${{ secrets.GOOGLE_CREDENTIALS_JSON }}' > "/tmp/gcp-creds.json" # Verify JSON is valid without printing content jq -e . "/tmp/gcp-creds.json" > /dev/null || echo "Warning: Invalid JSON format in credentials" - echo "GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-creds.json" >> $GITHUB_ENV + echo "GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-creds.json" >> "${GITHUB_ENV}" - name: Install promptfoo and googleapis run: | From cf3b36a09f0d848233e2ef93d1fa24d96bb855fe Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 15:28:51 -0700 Subject: [PATCH 05/22] save sheet urls to secrets, add PR comment --- .../promptfoo-googlesheet-evaluation.yml | 90 ++++++++++++++++++- app/promptfooconfig.ci.yaml | 14 +-- 2 files changed, 96 insertions(+), 8 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 8ac7f5d1..38899a79 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -46,6 +46,7 @@ jobs: run: | npm install -g promptfoo npm install -g googleapis + npm install -g @actions/core @actions/github - name: Set up promptfoo cache uses: actions/cache@v4 @@ -67,8 +68,93 @@ jobs: }; EOF + - name: Process config file + run: | + # Create a temporary copy of the config file + cp app/promptfooconfig.ci.yaml /tmp/promptfooconfig.processed.yaml + + # Replace placeholders with actual secret values + sed -i "s|GOOGLE_SHEET_INPUT_URL|${{ secrets.GOOGLE_SHEET_INPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml + sed -i "s|GOOGLE_SHEET_OUTPUT_URL|${{ secrets.GOOGLE_SHEET_OUTPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml + + # Debug output (redacting sensitive parts) + echo "Config file processed. Validating structure..." + grep -v "GOOGLE_SHEET" /tmp/promptfooconfig.processed.yaml | grep -i "url\|path" + - name: Run promptfoo evaluation + id: eval run: | - promptfoo eval --config app/promptfooconfig.ci.yaml + # Add a JSON output file in addition to Google Sheets output + OUTPUT_JSON_FILE="/tmp/promptfoo-output.json" + # Run evaluation with share flag to get shareable URL, using the processed config + promptfoo eval --config /tmp/promptfooconfig.processed.yaml --share --output $OUTPUT_JSON_FILE + + # Extract evaluation ID and shareable URL if available + if [ -f "$OUTPUT_JSON_FILE" ]; then + EVAL_ID=$(jq -r '.evaluationId' $OUTPUT_JSON_FILE) + SHARE_URL=$(jq -r '.shareableUrl // ""' $OUTPUT_JSON_FILE) + + # Save as outputs + echo "eval_id=$EVAL_ID" >> $GITHUB_OUTPUT + echo "share_url=$SHARE_URL" >> $GITHUB_OUTPUT + + # Print the share URL for debugging + echo "Shareable URL: $SHARE_URL" + else + echo "No output JSON file was generated" + fi env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} \ No newline at end of file + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + - name: Create PR comment + if: github.event_name == 'pull_request' + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const evalId = '${{ steps.eval.outputs.eval_id }}'; + const shareUrl = '${{ steps.eval.outputs.share_url }}'; + + // Get the test results from the previous step + const fs = require('fs'); + let stats = { successes: 0, failures: 0, total: 0 }; + + try { + if (fs.existsSync('/tmp/promptfoo-output.json')) { + const outputData = JSON.parse(fs.readFileSync('/tmp/promptfoo-output.json', 'utf8')); + if (outputData.results && outputData.results.stats) { + stats = outputData.results.stats; + } + } + } catch (error) { + console.error('Error parsing output file:', error); + } + + // Create the comment body + let body = `## Promptfoo Evaluation Results + + | Success | Failure | Total | Pass Rate | + |---------|---------|-------|-----------| + | ${stats.successes} | ${stats.failures} | ${stats.total || stats.successes + stats.failures} | ${((stats.successes / (stats.total || stats.successes + stats.failures)) * 100).toFixed(2)}% | + + `; + + // Add Google Sheet link + body += `[View detailed results in Google Sheets](${{ env.GOOGLE_SHEET_OUTPUT_URL }})\n\n`; + + // Add shareable link if available + if (shareUrl) { + body += `[View interactive evaluation results](${shareUrl})\n`; + } else { + body += `Run \`promptfoo view --id ${evalId}\` locally to view interactive results\n`; + } + + // Post comment to PR + const { data: comment } = await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: body + }); + + console.log(`Created comment: ${comment.html_url}`); \ No newline at end of file diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml index a5b99d0a..0a91cfa3 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfooconfig.ci.yaml @@ -20,8 +20,7 @@ providers: defaultTest: vars: - uniqueSessionId: file:///tmp/generateUniqueId.js # Use the tmp file we make in the GHA workflow - googleCredentials: "/tmp/gcp-creds.json" + uniqueSessionId: file:///tmp/generateUniqueId.js # Use the tmp script that we make in the GHA workflow options: timeout: 360000 @@ -30,8 +29,11 @@ evaluateOptions: delay: 1000 showProgressBar: true -# Read test cases from this sheet -tests: https://docs.google.com/spreadsheets/d/1NWopeJjKy7eHq8D-VcIO7QtTDRD9agn9MAv7SQ6_hSY/edit?gid=711949508#gid=711949508 -# Create a new tab for each evaluation -outputPath: https://docs.google.com/spreadsheets/d/1NWopeJjKy7eHq8D-VcIO7QtTDRD9agn9MAv7SQ6_hSY/edit \ No newline at end of file +tests: {{env.GOOGLE_SHEET_INPUT_URL}} +outputPath: {{env.GOOGLE_SHEET_OUTPUT_URL}} + +sharing: + enabled: true + includePromptSources: false + includeMetadata: true \ No newline at end of file From 40ae25a4c846e73469c3f2520f02eba2a8203220 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 15:36:49 -0700 Subject: [PATCH 06/22] save sheet urls to secrets, add PR comment --- .../promptfoo-googlesheet-evaluation.yml | 23 +++++++++++-------- app/promptfooconfig.ci.yaml | 4 ++-- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 38899a79..4f479b41 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -18,6 +18,10 @@ jobs: pull-requests: write contents: read + env: + GOOGLE_SHEET_INPUT_URL: ${{ secrets.GOOGLE_SHEET_INPUT_URL }} + GOOGLE_SHEET_OUTPUT_URL: ${{ secrets.GOOGLE_SHEET_OUTPUT_URL }} + steps: - name: Checkout code uses: actions/checkout@v4 @@ -87,19 +91,19 @@ jobs: # Add a JSON output file in addition to Google Sheets output OUTPUT_JSON_FILE="/tmp/promptfoo-output.json" # Run evaluation with share flag to get shareable URL, using the processed config - promptfoo eval --config /tmp/promptfooconfig.processed.yaml --share --output $OUTPUT_JSON_FILE + promptfoo eval --config "/tmp/promptfooconfig.processed.yaml" --share --output "${OUTPUT_JSON_FILE}" # Extract evaluation ID and shareable URL if available - if [ -f "$OUTPUT_JSON_FILE" ]; then - EVAL_ID=$(jq -r '.evaluationId' $OUTPUT_JSON_FILE) - SHARE_URL=$(jq -r '.shareableUrl // ""' $OUTPUT_JSON_FILE) + if [ -f "${OUTPUT_JSON_FILE}" ]; then + EVAL_ID=$(jq -r '.evaluationId' "${OUTPUT_JSON_FILE}") + SHARE_URL=$(jq -r '.shareableUrl // ""' "${OUTPUT_JSON_FILE}") # Save as outputs - echo "eval_id=$EVAL_ID" >> $GITHUB_OUTPUT - echo "share_url=$SHARE_URL" >> $GITHUB_OUTPUT + echo "eval_id=${EVAL_ID}" >> "${GITHUB_OUTPUT}" + echo "share_url=${SHARE_URL}" >> "${GITHUB_OUTPUT}" # Print the share URL for debugging - echo "Shareable URL: $SHARE_URL" + echo "Shareable URL: ${SHARE_URL}" else echo "No output JSON file was generated" fi @@ -108,12 +112,13 @@ jobs: - name: Create PR comment if: github.event_name == 'pull_request' - uses: actions/github-script@v6 + uses: actions/github-script@v7 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | const evalId = '${{ steps.eval.outputs.eval_id }}'; const shareUrl = '${{ steps.eval.outputs.share_url }}'; + const googleSheetUrl = '${{ env.GOOGLE_SHEET_OUTPUT_URL }}'; // Get the test results from the previous step const fs = require('fs'); @@ -140,7 +145,7 @@ jobs: `; // Add Google Sheet link - body += `[View detailed results in Google Sheets](${{ env.GOOGLE_SHEET_OUTPUT_URL }})\n\n`; + body += `[View detailed results in Google Sheets](${googleSheetUrl})\n\n`; // Add shareable link if available if (shareUrl) { diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml index 0a91cfa3..943728e7 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfooconfig.ci.yaml @@ -30,8 +30,8 @@ evaluateOptions: showProgressBar: true -tests: {{env.GOOGLE_SHEET_INPUT_URL}} -outputPath: {{env.GOOGLE_SHEET_OUTPUT_URL}} +tests: GOOGLE_SHEET_INPUT_URL +outputPath: GOOGLE_SHEET_OUTPUT_URL sharing: enabled: true From fcd24a1874e90c52cce11ea11c8b82ee1724c36c Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 15:50:00 -0700 Subject: [PATCH 07/22] add share link --- .../promptfoo-googlesheet-evaluation.yml | 35 ++++++++++++++++--- app/promptfooconfig.ci.yaml | 7 +--- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 4f479b41..29d2e5b0 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -72,6 +72,15 @@ jobs: }; EOF + - name: Authenticate with Promptfoo + if: ${{ secrets.PROMPTFOO_API_KEY != '' }} + run: | + # Authenticate with Promptfoo using the login command + promptfoo auth login --host https://api.promptfoo.app --api-key "${{ secrets.PROMPTFOO_API_KEY }}" + + # Verify authentication + promptfoo account + - name: Process config file run: | # Create a temporary copy of the config file @@ -90,14 +99,27 @@ jobs: run: | # Add a JSON output file in addition to Google Sheets output OUTPUT_JSON_FILE="/tmp/promptfoo-output.json" + # Run evaluation with share flag to get shareable URL, using the processed config promptfoo eval --config "/tmp/promptfooconfig.processed.yaml" --share --output "${OUTPUT_JSON_FILE}" # Extract evaluation ID and shareable URL if available if [ -f "${OUTPUT_JSON_FILE}" ]; then - EVAL_ID=$(jq -r '.evaluationId' "${OUTPUT_JSON_FILE}") + # Print the output file content for debugging + echo "Output JSON file content:" + cat "${OUTPUT_JSON_FILE}" | grep -v "sensitive" + + EVAL_ID=$(jq -r '.evaluationId // "unknown"' "${OUTPUT_JSON_FILE}") SHARE_URL=$(jq -r '.shareableUrl // ""' "${OUTPUT_JSON_FILE}") + # Try to share the results if not already shared + if [ -z "${SHARE_URL}" ] && [ "${EVAL_ID}" != "unknown" ] && [ "${EVAL_ID}" != "null" ]; then + echo "No shareable URL found, attempting to share evaluation..." + SHARE_RESULT=$(promptfoo share --id "${EVAL_ID}" --json || echo '{}') + SHARE_URL=$(echo "${SHARE_RESULT}" | jq -r '.shareableUrl // ""') + echo "Share result: ${SHARE_URL}" + fi + # Save as outputs echo "eval_id=${EVAL_ID}" >> "${GITHUB_OUTPUT}" echo "share_url=${SHARE_URL}" >> "${GITHUB_OUTPUT}" @@ -109,6 +131,7 @@ jobs: fi env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PROMPTFOO_API_KEY: ${{ secrets.PROMPTFOO_API_KEY }} - name: Create PR comment if: github.event_name == 'pull_request' @@ -147,11 +170,13 @@ jobs: // Add Google Sheet link body += `[View detailed results in Google Sheets](${googleSheetUrl})\n\n`; - // Add shareable link if available - if (shareUrl) { - body += `[View interactive evaluation results](${shareUrl})\n`; - } else { + // Add shareable link if available, matching the format in the TS example + if (shareUrl && shareUrl.length > 0) { + body += `**» [View eval results](${shareUrl}) «**\n`; + } else if (evalId && evalId !== 'null' && evalId !== 'unknown') { body += `Run \`promptfoo view --id ${evalId}\` locally to view interactive results\n`; + } else { + body += `**» View eval results in CI console «**\n`; } // Post comment to PR diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml index 943728e7..8bd464ca 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfooconfig.ci.yaml @@ -31,9 +31,4 @@ evaluateOptions: tests: GOOGLE_SHEET_INPUT_URL -outputPath: GOOGLE_SHEET_OUTPUT_URL - -sharing: - enabled: true - includePromptSources: false - includeMetadata: true \ No newline at end of file +outputPath: GOOGLE_SHEET_OUTPUT_URL \ No newline at end of file From dbeb07f7f8cf95ae75189d0148b6ecd4da010ff6 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 15:53:00 -0700 Subject: [PATCH 08/22] fix: lint errors --- .../workflows/promptfoo-googlesheet-evaluation.yml | 10 +++++----- app/promptfooconfig.ci.yaml | 11 ++++++++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 29d2e5b0..d2026b33 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -21,6 +21,7 @@ jobs: env: GOOGLE_SHEET_INPUT_URL: ${{ secrets.GOOGLE_SHEET_INPUT_URL }} GOOGLE_SHEET_OUTPUT_URL: ${{ secrets.GOOGLE_SHEET_OUTPUT_URL }} + PROMPTFOO_API_KEY: ${{ secrets.PROMPTFOO_API_KEY }} steps: - name: Checkout code @@ -73,10 +74,10 @@ jobs: EOF - name: Authenticate with Promptfoo - if: ${{ secrets.PROMPTFOO_API_KEY != '' }} + if: env.PROMPTFOO_API_KEY != '' run: | # Authenticate with Promptfoo using the login command - promptfoo auth login --host https://api.promptfoo.app --api-key "${{ secrets.PROMPTFOO_API_KEY }}" + promptfoo auth login --host https://api.promptfoo.app --api-key "${PROMPTFOO_API_KEY}" # Verify authentication promptfoo account @@ -105,9 +106,9 @@ jobs: # Extract evaluation ID and shareable URL if available if [ -f "${OUTPUT_JSON_FILE}" ]; then - # Print the output file content for debugging + # Print the output file content for debugging (fixing 'useless cat') echo "Output JSON file content:" - cat "${OUTPUT_JSON_FILE}" | grep -v "sensitive" + grep -v "sensitive" "${OUTPUT_JSON_FILE}" EVAL_ID=$(jq -r '.evaluationId // "unknown"' "${OUTPUT_JSON_FILE}") SHARE_URL=$(jq -r '.shareableUrl // ""' "${OUTPUT_JSON_FILE}") @@ -131,7 +132,6 @@ jobs: fi env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - PROMPTFOO_API_KEY: ${{ secrets.PROMPTFOO_API_KEY }} - name: Create PR comment if: github.event_name == 'pull_request' diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml index 8bd464ca..dd88cb8a 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfooconfig.ci.yaml @@ -31,4 +31,13 @@ evaluateOptions: tests: GOOGLE_SHEET_INPUT_URL -outputPath: GOOGLE_SHEET_OUTPUT_URL \ No newline at end of file +outputPath: GOOGLE_SHEET_OUTPUT_URL + +# Enhanced sharing configuration for CI +sharing: + enabled: true + persist: true + includePromptSources: true + includeMetadata: true + includeModelResponses: true + includeCitations: true \ No newline at end of file From 62322a4555472d76e7e29b034b480629af477608 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 15:57:41 -0700 Subject: [PATCH 09/22] fix: auth check --- .../workflows/promptfoo-googlesheet-evaluation.yml | 5 ++--- app/promptfooconfig.ci.yaml | 11 +---------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index d2026b33..bf6fbbb3 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -78,9 +78,8 @@ jobs: run: | # Authenticate with Promptfoo using the login command promptfoo auth login --host https://api.promptfoo.app --api-key "${PROMPTFOO_API_KEY}" - - # Verify authentication - promptfoo account + echo "Authentication completed. Checking auth status..." + promptfoo debug --auth - name: Process config file run: | diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml index dd88cb8a..8bd464ca 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfooconfig.ci.yaml @@ -31,13 +31,4 @@ evaluateOptions: tests: GOOGLE_SHEET_INPUT_URL -outputPath: GOOGLE_SHEET_OUTPUT_URL - -# Enhanced sharing configuration for CI -sharing: - enabled: true - persist: true - includePromptSources: true - includeMetadata: true - includeModelResponses: true - includeCitations: true \ No newline at end of file +outputPath: GOOGLE_SHEET_OUTPUT_URL \ No newline at end of file From 9f47e0956b35542babafcda5bfdae5d3ef876896 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 16:01:49 -0700 Subject: [PATCH 10/22] fix: auth check; --- app/promptfooconfig.ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml index 8bd464ca..7e8a55d5 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfooconfig.ci.yaml @@ -31,4 +31,4 @@ evaluateOptions: tests: GOOGLE_SHEET_INPUT_URL -outputPath: GOOGLE_SHEET_OUTPUT_URL \ No newline at end of file +outputPath: GOOGLE_SHEET_OUTPUT_URL From 88aa6fac8e26aae0baaf94d99905180b38b40596 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 17:52:44 -0700 Subject: [PATCH 11/22] fix: auth check; --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index bf6fbbb3..47090c1a 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -78,8 +78,6 @@ jobs: run: | # Authenticate with Promptfoo using the login command promptfoo auth login --host https://api.promptfoo.app --api-key "${PROMPTFOO_API_KEY}" - echo "Authentication completed. Checking auth status..." - promptfoo debug --auth - name: Process config file run: | From 54003622d6ae105d16e6cdd3b89f82857c63d654 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 18:07:54 -0700 Subject: [PATCH 12/22] fix: continue on error --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 47090c1a..d6f09411 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -94,6 +94,7 @@ jobs: - name: Run promptfoo evaluation id: eval + continue-on-error: true run: | # Add a JSON output file in addition to Google Sheets output OUTPUT_JSON_FILE="/tmp/promptfoo-output.json" From db9cc71110830e8e09fa0655380cea62268f9455 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 18:21:46 -0700 Subject: [PATCH 13/22] fix: exit code --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 1 + app/promptfooconfig.ci.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index d6f09411..fcdcb229 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -130,6 +130,7 @@ jobs: fi env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PROMPTFOO_FAILED_TEST_EXIT_CODE: 0 - name: Create PR comment if: github.event_name == 'pull_request' diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml index 7e8a55d5..c7585728 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfooconfig.ci.yaml @@ -21,11 +21,11 @@ providers: defaultTest: vars: uniqueSessionId: file:///tmp/generateUniqueId.js # Use the tmp script that we make in the GHA workflow + rubric: "{{__expected1}}" options: timeout: 360000 evaluateOptions: - maxConcurrency: 3 delay: 1000 showProgressBar: true From 10a92fb1ebd0b3fdb4204f76a1096f4a1c103a74 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 18:42:28 -0700 Subject: [PATCH 14/22] fix: cleanup inline comments and outputs --- .../promptfoo-googlesheet-evaluation.yml | 36 +++++-------------- app/promptfooconfig.ci.yaml | 1 - 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index fcdcb229..76acdea8 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -41,9 +41,7 @@ jobs: - name: Set up Google Cloud credentials run: | - # Ensure the JSON is properly formatted without escaping issues echo '${{ secrets.GOOGLE_CREDENTIALS_JSON }}' > "/tmp/gcp-creds.json" - # Verify JSON is valid without printing content jq -e . "/tmp/gcp-creds.json" > /dev/null || echo "Warning: Invalid JSON format in credentials" echo "GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-creds.json" >> "${GITHUB_ENV}" @@ -53,19 +51,10 @@ jobs: npm install -g googleapis npm install -g @actions/core @actions/github - - name: Set up promptfoo cache - uses: actions/cache@v4 - with: - path: ~/.cache/promptfoo - key: ${{ runner.os }}-promptfoo-v1-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-promptfoo-v1- - - name: Create unique ID generator run: | cat > /tmp/generateUniqueId.js << 'EOF' module.exports = function (varName, prompt, otherVars) { - // Generate a unique ID using timestamp and a random component const uniqueId = 'promptfoo-eval-test-' + Date.now().toString() + '-' + Math.random().toString(36).substring(2, 9); return { output: uniqueId @@ -83,12 +72,10 @@ jobs: run: | # Create a temporary copy of the config file cp app/promptfooconfig.ci.yaml /tmp/promptfooconfig.processed.yaml - - # Replace placeholders with actual secret values + sed -i "s|GOOGLE_SHEET_INPUT_URL|${{ secrets.GOOGLE_SHEET_INPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml sed -i "s|GOOGLE_SHEET_OUTPUT_URL|${{ secrets.GOOGLE_SHEET_OUTPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml - # Debug output (redacting sensitive parts) echo "Config file processed. Validating structure..." grep -v "GOOGLE_SHEET" /tmp/promptfooconfig.processed.yaml | grep -i "url\|path" @@ -96,34 +83,27 @@ jobs: id: eval continue-on-error: true run: | - # Add a JSON output file in addition to Google Sheets output OUTPUT_JSON_FILE="/tmp/promptfoo-output.json" - - # Run evaluation with share flag to get shareable URL, using the processed config + + # Ruan evaluation with share flag to get shareable URL, using the processed config promptfoo eval --config "/tmp/promptfooconfig.processed.yaml" --share --output "${OUTPUT_JSON_FILE}" - + # Extract evaluation ID and shareable URL if available if [ -f "${OUTPUT_JSON_FILE}" ]; then - # Print the output file content for debugging (fixing 'useless cat') - echo "Output JSON file content:" - grep -v "sensitive" "${OUTPUT_JSON_FILE}" - + echo "Output JSON file generated successfully" + EVAL_ID=$(jq -r '.evaluationId // "unknown"' "${OUTPUT_JSON_FILE}") SHARE_URL=$(jq -r '.shareableUrl // ""' "${OUTPUT_JSON_FILE}") - - # Try to share the results if not already shared + if [ -z "${SHARE_URL}" ] && [ "${EVAL_ID}" != "unknown" ] && [ "${EVAL_ID}" != "null" ]; then echo "No shareable URL found, attempting to share evaluation..." SHARE_RESULT=$(promptfoo share --id "${EVAL_ID}" --json || echo '{}') SHARE_URL=$(echo "${SHARE_RESULT}" | jq -r '.shareableUrl // ""') echo "Share result: ${SHARE_URL}" fi - - # Save as outputs + echo "eval_id=${EVAL_ID}" >> "${GITHUB_OUTPUT}" echo "share_url=${SHARE_URL}" >> "${GITHUB_OUTPUT}" - - # Print the share URL for debugging echo "Shareable URL: ${SHARE_URL}" else echo "No output JSON file was generated" diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml index c7585728..57edfc2f 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfooconfig.ci.yaml @@ -21,7 +21,6 @@ providers: defaultTest: vars: uniqueSessionId: file:///tmp/generateUniqueId.js # Use the tmp script that we make in the GHA workflow - rubric: "{{__expected1}}" options: timeout: 360000 From 8be97cecf5b092eef6546af4767c4115eafe4cfe Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Fri, 9 May 2025 18:47:33 -0700 Subject: [PATCH 15/22] fix: add no-cache and suppress npm install warnings --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 76acdea8..fafe7405 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -47,7 +47,7 @@ jobs: - name: Install promptfoo and googleapis run: | - npm install -g promptfoo + npm install -g promptfoo --no-fund --no-audit --loglevel=error npm install -g googleapis npm install -g @actions/core @actions/github @@ -86,7 +86,7 @@ jobs: OUTPUT_JSON_FILE="/tmp/promptfoo-output.json" # Ruan evaluation with share flag to get shareable URL, using the processed config - promptfoo eval --config "/tmp/promptfooconfig.processed.yaml" --share --output "${OUTPUT_JSON_FILE}" + promptfoo eval --config "/tmp/promptfooconfig.processed.yaml" --share --output "${OUTPUT_JSON_FILE}" --no-cache # Extract evaluation ID and shareable URL if available if [ -f "${OUTPUT_JSON_FILE}" ]; then From 4ab6e4d6b6d2e2ab7756dd8de4f0d930c2fae2de Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Mon, 12 May 2025 11:03:58 -0700 Subject: [PATCH 16/22] fix: add workflow_dispatch for inputs --- .../promptfoo-googlesheet-evaluation.yml | 64 +++++++++++++------ app/promptfooconfig.ci.yaml | 4 +- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index fafe7405..6ba9a70f 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -10,6 +10,23 @@ on: - 'app/src/generate.py' - 'app/promptfooconfig.ci.yaml' - 'docs/app/evaluation/generateUniqueId.js' + workflow_dispatch: + inputs: + input_sheet_url: + type: string + description: 'Google Sheet URL for test case inputs' + required: true + default: ${{ secrets.GOOGLE_SHEET_INPUT_URL }} + output_sheet_url: + type: string + description: 'Google Sheet URL for evaluation outputs' + required: true + default: ${{ secrets.GOOGLE_SHEET_OUTPUT_URL }} + chatbot_instance_url: + type: string + description: 'Chatbot API endpoint URL' + required: true + default: 'https://decision-support-tool-dev.navateam.com/api/query' jobs: evaluate: @@ -19,9 +36,10 @@ jobs: contents: read env: - GOOGLE_SHEET_INPUT_URL: ${{ secrets.GOOGLE_SHEET_INPUT_URL }} - GOOGLE_SHEET_OUTPUT_URL: ${{ secrets.GOOGLE_SHEET_OUTPUT_URL }} - PROMPTFOO_API_KEY: ${{ secrets.PROMPTFOO_API_KEY }} + GOOGLE_SHEET_INPUT_URL: ${{ inputs.input_sheet_url }} + GOOGLE_SHEET_OUTPUT_URL: ${{ inputs.output_sheet_url }} + PROMPTFOO_API_KEY: ${{ secrets.PROMPTFOO_API_KEY || '' }} + CHATBOT_INSTANCE_URL: ${{ inputs.chatbot_instance_url }} steps: - name: Checkout code @@ -73,11 +91,12 @@ jobs: # Create a temporary copy of the config file cp app/promptfooconfig.ci.yaml /tmp/promptfooconfig.processed.yaml - sed -i "s|GOOGLE_SHEET_INPUT_URL|${{ secrets.GOOGLE_SHEET_INPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml - sed -i "s|GOOGLE_SHEET_OUTPUT_URL|${{ secrets.GOOGLE_SHEET_OUTPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml + sed -i "s|GOOGLE_SHEET_INPUT_URL|${{ env.GOOGLE_SHEET_INPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml + sed -i "s|GOOGLE_SHEET_OUTPUT_URL|${{ env.GOOGLE_SHEET_OUTPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml + sed -i "s|CHATBOT_INSTANCE_URL|${{ env.CHATBOT_INSTANCE_URL }}|g" /tmp/promptfooconfig.processed.yaml - echo "Config file processed. Validating structure..." - grep -v "GOOGLE_SHEET" /tmp/promptfooconfig.processed.yaml | grep -i "url\|path" + echo "Config file processed, checking..." + grep -v "GOOGLE_SHEET\|CHATBOT_INSTANCE" /tmp/promptfooconfig.processed.yaml | grep -i "url\|path" - name: Run promptfoo evaluation id: eval @@ -85,26 +104,29 @@ jobs: run: | OUTPUT_JSON_FILE="/tmp/promptfoo-output.json" - # Ruan evaluation with share flag to get shareable URL, using the processed config - promptfoo eval --config "/tmp/promptfooconfig.processed.yaml" --share --output "${OUTPUT_JSON_FILE}" --no-cache + if [ -n "$PROMPTFOO_API_KEY" ]; then + promptfoo eval --config "/tmp/promptfooconfig.processed.yaml" --share --output "${OUTPUT_JSON_FILE}" --no-cache + else + promptfoo eval --config "/tmp/promptfooconfig.processed.yaml" --output "${OUTPUT_JSON_FILE}" --no-cache + fi - # Extract evaluation ID and shareable URL if available if [ -f "${OUTPUT_JSON_FILE}" ]; then echo "Output JSON file generated successfully" - EVAL_ID=$(jq -r '.evaluationId // "unknown"' "${OUTPUT_JSON_FILE}") - SHARE_URL=$(jq -r '.shareableUrl // ""' "${OUTPUT_JSON_FILE}") + if [ -n "$PROMPTFOO_API_KEY" ]; then + EVAL_ID=$(jq -r '.evaluationId // "unknown"' "${OUTPUT_JSON_FILE}") + SHARE_URL=$(jq -r '.shareableUrl // ""' "${OUTPUT_JSON_FILE}") - if [ -z "${SHARE_URL}" ] && [ "${EVAL_ID}" != "unknown" ] && [ "${EVAL_ID}" != "null" ]; then - echo "No shareable URL found, attempting to share evaluation..." - SHARE_RESULT=$(promptfoo share --id "${EVAL_ID}" --json || echo '{}') - SHARE_URL=$(echo "${SHARE_RESULT}" | jq -r '.shareableUrl // ""') - echo "Share result: ${SHARE_URL}" - fi + if [ -z "${SHARE_URL}" ] && [ "${EVAL_ID}" != "unknown" ] && [ "${EVAL_ID}" != "null" ]; then + echo "No shareable URL found, attempting to share evaluation..." + SHARE_RESULT=$(promptfoo share --id "${EVAL_ID}" --json || echo '{}') + SHARE_URL=$(echo "${SHARE_RESULT}" | jq -r '.shareableUrl // ""') + echo "Share result: ${SHARE_URL}" + fi - echo "eval_id=${EVAL_ID}" >> "${GITHUB_OUTPUT}" - echo "share_url=${SHARE_URL}" >> "${GITHUB_OUTPUT}" - echo "Shareable URL: ${SHARE_URL}" + echo "eval_id=${EVAL_ID}" >> "${GITHUB_OUTPUT}" + echo "share_url=${SHARE_URL}" >> "${GITHUB_OUTPUT}" + fi else echo "No output JSON file was generated" fi diff --git a/app/promptfooconfig.ci.yaml b/app/promptfooconfig.ci.yaml index 57edfc2f..ae5dc148 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfooconfig.ci.yaml @@ -4,8 +4,7 @@ description: 'Decision Support Tool Evaluation (CI)' prompts: - response providers: - # Use the hardcoded dev endpoint URL directly in CI - - id: "https://decision-support-tool-dev.navateam.com/api/query" + - id: CHATBOT_INSTANCE_URL config: method: POST headers: @@ -28,6 +27,5 @@ evaluateOptions: delay: 1000 showProgressBar: true - tests: GOOGLE_SHEET_INPUT_URL outputPath: GOOGLE_SHEET_OUTPUT_URL From 12fce13d9e05222384c354c34507ddd975e1336e Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Mon, 12 May 2025 11:06:01 -0700 Subject: [PATCH 17/22] fix: remove defaults from dispatch inputs --- .../workflows/promptfoo-googlesheet-evaluation.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 6ba9a70f..be1e73b4 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -15,17 +15,15 @@ on: input_sheet_url: type: string description: 'Google Sheet URL for test case inputs' - required: true - default: ${{ secrets.GOOGLE_SHEET_INPUT_URL }} + required: false output_sheet_url: type: string description: 'Google Sheet URL for evaluation outputs' - required: true - default: ${{ secrets.GOOGLE_SHEET_OUTPUT_URL }} + required: false chatbot_instance_url: type: string description: 'Chatbot API endpoint URL' - required: true + required: false default: 'https://decision-support-tool-dev.navateam.com/api/query' jobs: @@ -36,8 +34,8 @@ jobs: contents: read env: - GOOGLE_SHEET_INPUT_URL: ${{ inputs.input_sheet_url }} - GOOGLE_SHEET_OUTPUT_URL: ${{ inputs.output_sheet_url }} + GOOGLE_SHEET_INPUT_URL: ${{ inputs.input_sheet_url || secrets.GOOGLE_SHEET_INPUT_URL }} + GOOGLE_SHEET_OUTPUT_URL: ${{ inputs.output_sheet_url || secrets.GOOGLE_SHEET_OUTPUT_URL }} PROMPTFOO_API_KEY: ${{ secrets.PROMPTFOO_API_KEY || '' }} CHATBOT_INSTANCE_URL: ${{ inputs.chatbot_instance_url }} From 454ab3e3ef619ae9024d3fa62bfdce66bc5397cb Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Mon, 12 May 2025 11:10:00 -0700 Subject: [PATCH 18/22] fix: add fallback to chatbot instance url --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index be1e73b4..1f17e3a4 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -24,7 +24,6 @@ on: type: string description: 'Chatbot API endpoint URL' required: false - default: 'https://decision-support-tool-dev.navateam.com/api/query' jobs: evaluate: @@ -37,7 +36,7 @@ jobs: GOOGLE_SHEET_INPUT_URL: ${{ inputs.input_sheet_url || secrets.GOOGLE_SHEET_INPUT_URL }} GOOGLE_SHEET_OUTPUT_URL: ${{ inputs.output_sheet_url || secrets.GOOGLE_SHEET_OUTPUT_URL }} PROMPTFOO_API_KEY: ${{ secrets.PROMPTFOO_API_KEY || '' }} - CHATBOT_INSTANCE_URL: ${{ inputs.chatbot_instance_url }} + CHATBOT_INSTANCE_URL: ${{ inputs.chatbot_instance_url || 'https://decision-support-tool-dev.navateam.com/api/query' }} steps: - name: Checkout code From 95c892e40b49bae20ce727c2051e18b6d4bc1d89 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Mon, 12 May 2025 11:44:36 -0700 Subject: [PATCH 19/22] Update .github/workflows/promptfoo-googlesheet-evaluation.yml Co-authored-by: Kevin Boyer --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 1f17e3a4..53571363 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -24,6 +24,7 @@ on: type: string description: 'Chatbot API endpoint URL' required: false + default: 'https://decision-support-tool-dev.navateam.com/api/query' jobs: evaluate: From 6fc870958001da77666ddc4e06bdd3755ea953d6 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Mon, 12 May 2025 12:45:07 -0700 Subject: [PATCH 20/22] fix: move config deps to app/promptfoo --- .../promptfoo-googlesheet-evaluation.yml | 17 +++-------------- .../promptfoo}/generateUniqueId.js | 0 app/{ => promptfoo}/promptfooconfig.ci.yaml | 4 ++-- 3 files changed, 5 insertions(+), 16 deletions(-) rename {docs/app/evaluation => app/promptfoo}/generateUniqueId.js (100%) rename app/{ => promptfoo}/promptfooconfig.ci.yaml (81%) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 53571363..8e9fa5ac 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -8,8 +8,8 @@ on: - 'app/src/chat_api.py' - 'app/src/chat_engine.py' - 'app/src/generate.py' - - 'app/promptfooconfig.ci.yaml' - - 'docs/app/evaluation/generateUniqueId.js' + - 'app/promptfoo/promptfooconfig.ci.yaml' + - 'app/promptfoo/generateUniqueId.js' workflow_dispatch: inputs: input_sheet_url: @@ -67,17 +67,6 @@ jobs: npm install -g googleapis npm install -g @actions/core @actions/github - - name: Create unique ID generator - run: | - cat > /tmp/generateUniqueId.js << 'EOF' - module.exports = function (varName, prompt, otherVars) { - const uniqueId = 'promptfoo-eval-test-' + Date.now().toString() + '-' + Math.random().toString(36).substring(2, 9); - return { - output: uniqueId - }; - }; - EOF - - name: Authenticate with Promptfoo if: env.PROMPTFOO_API_KEY != '' run: | @@ -87,7 +76,7 @@ jobs: - name: Process config file run: | # Create a temporary copy of the config file - cp app/promptfooconfig.ci.yaml /tmp/promptfooconfig.processed.yaml + cp app/promptfoo/promptfooconfig.ci.yaml /tmp/promptfooconfig.processed.yaml sed -i "s|GOOGLE_SHEET_INPUT_URL|${{ env.GOOGLE_SHEET_INPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml sed -i "s|GOOGLE_SHEET_OUTPUT_URL|${{ env.GOOGLE_SHEET_OUTPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml diff --git a/docs/app/evaluation/generateUniqueId.js b/app/promptfoo/generateUniqueId.js similarity index 100% rename from docs/app/evaluation/generateUniqueId.js rename to app/promptfoo/generateUniqueId.js diff --git a/app/promptfooconfig.ci.yaml b/app/promptfoo/promptfooconfig.ci.yaml similarity index 81% rename from app/promptfooconfig.ci.yaml rename to app/promptfoo/promptfooconfig.ci.yaml index ae5dc148..3cd698f2 100644 --- a/app/promptfooconfig.ci.yaml +++ b/app/promptfoo/promptfooconfig.ci.yaml @@ -19,7 +19,7 @@ providers: defaultTest: vars: - uniqueSessionId: file:///tmp/generateUniqueId.js # Use the tmp script that we make in the GHA workflow + uniqueSessionId: file://promptfoo/generateUniqueId.js options: timeout: 360000 @@ -28,4 +28,4 @@ evaluateOptions: showProgressBar: true tests: GOOGLE_SHEET_INPUT_URL -outputPath: GOOGLE_SHEET_OUTPUT_URL +outputPath: GOOGLE_SHEET_OUTPUT_URL \ No newline at end of file From b7d1cbbaa005c7d1ad3c6d908adf305a6cbdf1ea Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Mon, 12 May 2025 12:50:05 -0700 Subject: [PATCH 21/22] fix: path to js script --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 2 +- app/promptfoo/promptfooconfig.ci.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 8e9fa5ac..5562cd85 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -75,7 +75,7 @@ jobs: - name: Process config file run: | - # Create a temporary copy of the config file + cp app/promptfoo/generateUniqueId.js /tmp/generateUniqueId.js cp app/promptfoo/promptfooconfig.ci.yaml /tmp/promptfooconfig.processed.yaml sed -i "s|GOOGLE_SHEET_INPUT_URL|${{ env.GOOGLE_SHEET_INPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml diff --git a/app/promptfoo/promptfooconfig.ci.yaml b/app/promptfoo/promptfooconfig.ci.yaml index 3cd698f2..3da46619 100644 --- a/app/promptfoo/promptfooconfig.ci.yaml +++ b/app/promptfoo/promptfooconfig.ci.yaml @@ -19,7 +19,7 @@ providers: defaultTest: vars: - uniqueSessionId: file://promptfoo/generateUniqueId.js + uniqueSessionId: file:///tmp/generateUniqueId.js options: timeout: 360000 From 4d1908b84a2e7d7aa17bcf63c3d7ca494c327aa3 Mon Sep 17 00:00:00 2001 From: fg-nava <189638926+fg-nava@users.noreply.github.com> Date: Mon, 12 May 2025 13:13:34 -0700 Subject: [PATCH 22/22] try use envsubst in nunjucks --- .github/workflows/promptfoo-googlesheet-evaluation.yml | 8 ++------ app/promptfoo/promptfooconfig.ci.yaml | 6 +++--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/promptfoo-googlesheet-evaluation.yml b/.github/workflows/promptfoo-googlesheet-evaluation.yml index 5562cd85..2417f634 100644 --- a/.github/workflows/promptfoo-googlesheet-evaluation.yml +++ b/.github/workflows/promptfoo-googlesheet-evaluation.yml @@ -53,7 +53,7 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y jq + sudo apt-get install -y jq gettext - name: Set up Google Cloud credentials run: | @@ -76,11 +76,7 @@ jobs: - name: Process config file run: | cp app/promptfoo/generateUniqueId.js /tmp/generateUniqueId.js - cp app/promptfoo/promptfooconfig.ci.yaml /tmp/promptfooconfig.processed.yaml - - sed -i "s|GOOGLE_SHEET_INPUT_URL|${{ env.GOOGLE_SHEET_INPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml - sed -i "s|GOOGLE_SHEET_OUTPUT_URL|${{ env.GOOGLE_SHEET_OUTPUT_URL }}|g" /tmp/promptfooconfig.processed.yaml - sed -i "s|CHATBOT_INSTANCE_URL|${{ env.CHATBOT_INSTANCE_URL }}|g" /tmp/promptfooconfig.processed.yaml + envsubst < app/promptfoo/promptfooconfig.ci.yaml > /tmp/promptfooconfig.processed.yaml echo "Config file processed, checking..." grep -v "GOOGLE_SHEET\|CHATBOT_INSTANCE" /tmp/promptfooconfig.processed.yaml | grep -i "url\|path" diff --git a/app/promptfoo/promptfooconfig.ci.yaml b/app/promptfoo/promptfooconfig.ci.yaml index 3da46619..bc2b71d2 100644 --- a/app/promptfoo/promptfooconfig.ci.yaml +++ b/app/promptfoo/promptfooconfig.ci.yaml @@ -4,7 +4,7 @@ description: 'Decision Support Tool Evaluation (CI)' prompts: - response providers: - - id: CHATBOT_INSTANCE_URL + - id: ${CHATBOT_INSTANCE_URL} config: method: POST headers: @@ -27,5 +27,5 @@ evaluateOptions: delay: 1000 showProgressBar: true -tests: GOOGLE_SHEET_INPUT_URL -outputPath: GOOGLE_SHEET_OUTPUT_URL \ No newline at end of file +tests: ${GOOGLE_SHEET_INPUT_URL} +outputPath: ${GOOGLE_SHEET_OUTPUT_URL} \ No newline at end of file