1- #!/usr/bin/env node
1+ #!/usr/bin/env bun
22import { strict as assert } from "node:assert" ;
33import process from "node:process" ;
44
55import { execSync } from "node:child_process" ;
66import { mkdirSync , mkdtempSync , rmSync , writeFileSync } from "node:fs" ;
77import { dirname , join } from "node:path" ;
88import { tmpdir } from "node:os" ;
9-
10- import { createOpencode } from "@opencode-ai/sdk" ;
11- import detectPort from "detect-port" ;
12-
139import { getAgent , listAgents } from "~/agents/index.js" ;
1410import type { AgentRegistration } from "~/agents/index.js" ;
1511import { listScores , scores as scoreRegistry } from "~/scores/index.js" ;
1612import { dataset } from "~/lib/dataset.js" ;
1713import type { DatasetEval , ScoreAssignment } from "~/lib/dataset.js" ;
18- import { generatePlannerTasks , type PlannerTask } from "~/lib/planner .js" ;
14+ import { generatePromptsForEval } from "~/lib/prompts .js" ;
1915import {
2016 generateActionsSummary ,
2117 type EpisodeActions ,
2218} from "~/lib/summarizer.js" ;
23- import { fetchPlannerCommitDiffs } from "~/lib/github.js" ;
2419import { finalizeAgentChanges } from "~/lib/finalizeAgentChanges.js" ;
20+ import { loadPromptsFile } from "~/lib/prompts.js" ;
2521import { judges , getJudgeModelId } from "~/judges.js" ;
26- import {
27- aggregateScores ,
28- averageJudgeScore ,
29- } from "~/lib/utils/scoreAggregation.js" ;
22+ import { aggregateScores } from "~/lib/utils/scoreAggregation.js" ;
3023import type { Judge } from "~/lib/judgeTypes.js" ;
3124import type {
3225 AggregationSummary ,
@@ -155,6 +148,62 @@ function isHelpRequest(arg: string | undefined): boolean {
155148 ) ;
156149}
157150
151+ async function handlePrompts ( args : string [ ] ) : Promise < void > {
152+ if ( args . length === 0 || args [ 0 ] === "--help" || args [ 0 ] === "-h" ) {
153+ console . log ( "Usage: orvl prompts [--eval <repo>] " ) ;
154+ console . log ( "" ) ;
155+ console . log ( "Options:" ) ;
156+ console . log (
157+ " --eval <repo> Generate prompts for a specific evaluation (e.g., DataDog/datadog-lambda-python)" ,
158+ ) ;
159+ console . log ( "" ) ;
160+ console . log ( "Examples:" ) ;
161+ console . log ( " orvl prompts --eval DataDog/datadog-lambda-python" ) ;
162+ return ;
163+ }
164+
165+ let generateAll = true ;
166+ let targetEval : string | undefined ;
167+
168+ for ( let i = 0 ; i < args . length ; i ++ ) {
169+ const arg = args [ i ] ;
170+ if ( arg === "--eval" ) {
171+ generateAll = false ;
172+ i ++ ;
173+ targetEval = args [ i ] ;
174+ assert ( targetEval , "Option --eval requires a value" ) ;
175+ } else {
176+ console . error ( `Unknown option: ${ arg } ` ) ;
177+ process . exitCode = 1 ;
178+ return ;
179+ }
180+ }
181+
182+ let evalsToGenerate : DatasetEval [ ] = [ ] ;
183+
184+ if ( generateAll ) {
185+ evalsToGenerate = [ ...dataset ] ;
186+ } else if ( targetEval ) {
187+ const evalDef = dataset . find ( ( entry ) => entry . repo === targetEval ) ;
188+ if ( ! evalDef ) {
189+ console . error ( `Evaluation not found: ${ targetEval } ` ) ;
190+ console . error ( "Available evaluations:" ) ;
191+ dataset . forEach ( ( entry ) => console . error ( ` - ${ entry . repo } ` ) ) ;
192+ process . exitCode = 1 ;
193+ return ;
194+ }
195+ evalsToGenerate = [ evalDef ] ;
196+ }
197+
198+ console . log (
199+ `Generating prompts for ${ evalsToGenerate . length } evaluation(s)...\n` ,
200+ ) ;
201+
202+ await Promise . all (
203+ evalsToGenerate . map ( ( evalDef ) => generatePromptsForEval ( evalDef ) ) ,
204+ ) ;
205+ }
206+
158207async function main ( ) : Promise < void > {
159208 const args = process . argv . slice ( 2 ) ;
160209 const agentName = args [ 0 ] ;
@@ -164,6 +213,12 @@ async function main(): Promise<void> {
164213 return ;
165214 }
166215
216+ // Handle special commands
217+ if ( agentName === "prompts" ) {
218+ await handlePrompts ( args . slice ( 1 ) ) ;
219+ return ;
220+ }
221+
167222 let options : ParsedCliOptions ;
168223 try {
169224 options = parseOptions ( args . slice ( 1 ) ) ;
@@ -215,34 +270,12 @@ async function main(): Promise<void> {
215270
216271 const evalId = evalDefinition . repo ;
217272
218- let plannerTasks : PlannerTask [ ] = [ ] ;
219-
220- try {
221- console . log ( `[${ evalId } planner] Fetching commit diffs from GitHub...` ) ;
222- const commitDiffs = await fetchPlannerCommitDiffs ( evalDefinition ) ;
223-
224- assert (
225- commitDiffs . length > 0 ,
226- `No commits found between ${ evalDefinition . from } and ${ evalDefinition . to } for ${ evalDefinition . repo } .` ,
227- ) ;
273+ const tasks = loadPromptsFile ( evalDefinition . prompts ) ;
228274
229- plannerTasks = await generatePlannerTasks ( evalDefinition , commitDiffs ) ;
230-
231- assert (
232- plannerTasks . length > 0 ,
233- `Planner produced no tasks for ${ evalDefinition . repo } (${ evalDefinition . from } ..${ evalDefinition . to } ).` ,
234- ) ;
235- } catch ( error ) {
236- if ( error instanceof Error ) {
237- console . error (
238- `Failed to prepare evaluation ${ evalId } : ${ error . message } ` ,
239- ) ;
240- } else {
241- console . error ( "Failed to prepare evaluation" , evalId ) ;
242- }
243- process . exitCode = 1 ;
244- assert ( false , "evaluation preparation failed" ) ;
245- }
275+ assert (
276+ tasks . length > 0 ,
277+ `No prompts found in ${ evalDefinition . prompts } for ${ evalDefinition . repo } .` ,
278+ ) ;
246279
247280 const executeCombination = async ( ) : Promise < {
248281 lines : string [ ] ;
@@ -309,7 +342,7 @@ async function main(): Promise<void> {
309342 let usage : Usage = { input : 0 , output : 0 } ;
310343 const episodeActions : string [ ] = [ ] ;
311344
312- for ( const task of plannerTasks ) {
345+ for ( const task of tasks ) {
313346 const logPrefix = `${ prefix } ${ task . commit } ` ;
314347
315348 try {
0 commit comments