sst
diff --git a/‎cli.ts‎
Lines changed: 72 additions & 39 deletions b/‎cli.ts‎
Lines changed: 72 additions & 39 deletions
diff --git a/‎dataset.yaml‎
Lines changed: 3 additions & 0 deletions b/‎dataset.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎lib/dataset.ts‎
Lines changed: 1 addition & 0 deletions b/‎lib/dataset.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/github.ts‎
Lines changed: 10 additions & 5 deletions b/‎lib/github.ts‎
Lines changed: 10 additions & 5 deletions
@@ -1,32 +1,25 @@
-#!/usr/bin/env node
+#!/usr/bin/env bun
 import { strict as assert } from "node:assert";
 import process from "node:process";
 
 import { execSync } from "node:child_process";
 import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
 import { dirname, join } from "node:path";
 import { tmpdir } from "node:os";
-
-import { createOpencode } from "@opencode-ai/sdk";
-import detectPort from "detect-port";
-
 import { getAgent, listAgents } from "~/agents/index.js";
 import type { AgentRegistration } from "~/agents/index.js";
 import { listScores, scores as scoreRegistry } from "~/scores/index.js";
 import { dataset } from "~/lib/dataset.js";
 import type { DatasetEval, ScoreAssignment } from "~/lib/dataset.js";
-import { generatePlannerTasks, type PlannerTask } from "~/lib/planner.js";
+import { generatePromptsForEval } from "~/lib/prompts.js";
 import {
   generateActionsSummary,
   type EpisodeActions,
 } from "~/lib/summarizer.js";
-import { fetchPlannerCommitDiffs } from "~/lib/github.js";
 import { finalizeAgentChanges } from "~/lib/finalizeAgentChanges.js";
+import { loadPromptsFile } from "~/lib/prompts.js";
 import { judges, getJudgeModelId } from "~/judges.js";
-import {
-  aggregateScores,
-  averageJudgeScore,
-} from "~/lib/utils/scoreAggregation.js";
+import { aggregateScores } from "~/lib/utils/scoreAggregation.js";
 import type { Judge } from "~/lib/judgeTypes.js";
 import type {
   AggregationSummary,
@@ -155,6 +148,62 @@ function isHelpRequest(arg: string | undefined): boolean {
   );
 }
 
+async function handlePrompts(args: string[]): Promise<void> {
+  if (args.length === 0 || args[0] === "--help" || args[0] === "-h") {
+    console.log("Usage: orvl prompts [--eval <repo>] ");
+    console.log("");
+    console.log("Options:");
+    console.log(
+      "  --eval <repo>  Generate prompts for a specific evaluation (e.g., DataDog/datadog-lambda-python)",
+    );
+    console.log("");
+    console.log("Examples:");
+    console.log("  orvl prompts --eval DataDog/datadog-lambda-python");
+    return;
+  }
+
+  let generateAll = true;
+  let targetEval: string | undefined;
+
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i];
+    if (arg === "--eval") {
+      generateAll = false;
+      i++;
+      targetEval = args[i];
+      assert(targetEval, "Option --eval requires a value");
+    } else {
+      console.error(`Unknown option: ${arg}`);
+      process.exitCode = 1;
+      return;
+    }
+  }
+
+  let evalsToGenerate: DatasetEval[] = [];
+
+  if (generateAll) {
+    evalsToGenerate = [...dataset];
+  } else if (targetEval) {
+    const evalDef = dataset.find((entry) => entry.repo === targetEval);
+    if (!evalDef) {
+      console.error(`Evaluation not found: ${targetEval}`);
+      console.error("Available evaluations:");
+      dataset.forEach((entry) => console.error(`  - ${entry.repo}`));
+      process.exitCode = 1;
+      return;
+    }
+    evalsToGenerate = [evalDef];
+  }
+
+  console.log(
+    `Generating prompts for ${evalsToGenerate.length} evaluation(s)...\n`,
+  );
+
+  await Promise.all(
+    evalsToGenerate.map((evalDef) => generatePromptsForEval(evalDef)),
+  );
+}
+
 async function main(): Promise<void> {
   const args = process.argv.slice(2);
   const agentName = args[0];
@@ -164,6 +213,12 @@ async function main(): Promise<void> {
     return;
   }
 
+  // Handle special commands
+  if (agentName === "prompts") {
+    await handlePrompts(args.slice(1));
+    return;
+  }
+
   let options: ParsedCliOptions;
   try {
     options = parseOptions(args.slice(1));
@@ -215,34 +270,12 @@ async function main(): Promise<void> {
 
     const evalId = evalDefinition.repo;
 
-    let plannerTasks: PlannerTask[] = [];
-
-    try {
-      console.log(`[${evalId} planner] Fetching commit diffs from GitHub...`);
-      const commitDiffs = await fetchPlannerCommitDiffs(evalDefinition);
-
-      assert(
-        commitDiffs.length > 0,
-        `No commits found between ${evalDefinition.from} and ${evalDefinition.to} for ${evalDefinition.repo}.`,
-      );
+    const tasks = loadPromptsFile(evalDefinition.prompts);
 
-      plannerTasks = await generatePlannerTasks(evalDefinition, commitDiffs);
-
-      assert(
-        plannerTasks.length > 0,
-        `Planner produced no tasks for ${evalDefinition.repo} (${evalDefinition.from}..${evalDefinition.to}).`,
-      );
-    } catch (error) {
-      if (error instanceof Error) {
-        console.error(
-          `Failed to prepare evaluation ${evalId}: ${error.message}`,
-        );
-      } else {
-        console.error("Failed to prepare evaluation", evalId);
-      }
-      process.exitCode = 1;
-      assert(false, "evaluation preparation failed");
-    }
+    assert(
+      tasks.length > 0,
+      `No prompts found in ${evalDefinition.prompts} for ${evalDefinition.repo}.`,
+    );
 
     const executeCombination = async (): Promise<{
       lines: string[];
@@ -309,7 +342,7 @@ async function main(): Promise<void> {
           let usage: Usage = { input: 0, output: 0 };
           const episodeActions: string[] = [];
 
-          for (const task of plannerTasks) {
+          for (const task of tasks) {
             const logPrefix = `${prefix} ${task.commit}`;
 
             try {
 
@@ -1,6 +1,7 @@
 - repo: prismicio-community/course-fizzi-next
   from: 15037446358508e153e765da49f8f5defa7fbbf6
   to: 2760114f2647ebec8f63e0ecc2dc87a8cd4096ac
+  prompts: prompts/course-fizzi-next.yaml
   issues: []
   scores:
     api-signature:
@@ -22,6 +23,7 @@
 - repo: DataDog/datadog-lambda-python
   from: 93d4a07fa61a4d4d2feec08e722505a9e0cc8657
   to: d7763789f262b2da228f8210509e302e6e510d0a
+  prompts: prompts/datadog-lambda-python.yaml
   issues: []
   scores:
     api-signature:
@@ -44,6 +46,7 @@
 - repo: AlaminPu1007/algorithm-visualizer
   from: ca409519ec96a83ec8d6c2ba30f2487f8d601719
   to: 21845e972dd8e2378cbcd16accc5ae8cdd37acb2
+  prompts: prompts/algorithm-visualizer.yaml
   issues: []
   scores:
     api-signature:
 
@@ -16,6 +16,7 @@ const datasetSchema = z.array(
       .regex(/^[^/]+\/[^/]+$/, "repo must follow the format <owner>/<name>."),
     from: z.string().min(1, "from commit SHA is required."),
     to: z.string().min(1, "to commit SHA is required."),
+    prompts: z.string().min(1, "prompts file path is required."),
     issues: z.array(z.number().int()),
     scores: z.record(scoreConfigSchema)
   })
 
@@ -4,7 +4,6 @@ import { request as octokitRequest } from "@octokit/request";
 import type { RequestInterface } from "@octokit/types";
 
 import type { DatasetEval } from "~/lib/dataset.js";
-import type { PlannerCommitDiff } from "~/lib/planner.js";
 
 const DIFF_ACCEPT_HEADER = "application/vnd.github.v3.diff";
 
@@ -64,9 +63,15 @@ export async function fetchComparisonDiff(entry: DatasetEval): Promise<string> {
   return diff;
 }
 
-export async function fetchPlannerCommitDiffs(
+export interface CommitDiff {
+  sha: string;
+  title: string;
+  diff: string;
+}
+
+export async function fetchCommitDiffs(
   entry: DatasetEval,
-): Promise<PlannerCommitDiff[]> {
+): Promise<CommitDiff[]> {
   const client = getRequestClient();
   const { owner, repo } = splitRepo(entry);
 
@@ -124,7 +129,7 @@ export async function fetchPlannerCommitDiffs(
           sha,
           title,
           diff,
-        } satisfies PlannerCommitDiff;
+        } satisfies CommitDiff;
       } catch (error) {
         console.error(
           `Failed to fetch diff for commit ${sha} in ${entry.repo}:`,
@@ -135,5 +140,5 @@ export async function fetchPlannerCommitDiffs(
     }),
   );
 
-  return results.filter((value): value is PlannerCommitDiff => value !== null);
+  return results.filter((value): value is CommitDiff => value !== null);
 }