1
1
import process from "process" ;
2
2
import { EvalCategorySchema } from "@/types/evals" ;
3
3
4
- // Extract command-line arguments passed to this script.
5
- const args = process . argv . slice ( 2 ) ;
4
+ const rawArgs = process . argv . slice ( 2 ) ;
5
+
6
+ const parsedArgs : {
7
+ env ?: string ;
8
+ trials ?: number ;
9
+ concurrency ?: number ;
10
+ extractMethod ?: string ;
11
+ leftover : string [ ] ;
12
+ } = {
13
+ leftover : [ ] ,
14
+ } ;
15
+
16
+ for ( const arg of rawArgs ) {
17
+ if ( arg . startsWith ( "env=" ) ) {
18
+ parsedArgs . env = arg . split ( "=" ) [ 1 ] ?. toLowerCase ( ) ;
19
+ } else if ( arg . startsWith ( "trials=" ) ) {
20
+ const val = parseInt ( arg . split ( "=" ) [ 1 ] , 10 ) ;
21
+ if ( ! isNaN ( val ) ) {
22
+ parsedArgs . trials = val ;
23
+ }
24
+ } else if ( arg . startsWith ( "concurrency=" ) ) {
25
+ const val = parseInt ( arg . split ( "=" ) [ 1 ] , 10 ) ;
26
+ if ( ! isNaN ( val ) ) {
27
+ parsedArgs . concurrency = val ;
28
+ }
29
+ } else if ( arg . startsWith ( "--extract-method=" ) ) {
30
+ parsedArgs . extractMethod = arg . split ( "=" ) [ 1 ] ;
31
+ } else {
32
+ parsedArgs . leftover . push ( arg ) ;
33
+ }
34
+ }
35
+
36
+ /** Apply environment defaults or overrides */
37
+ if ( parsedArgs . env === "browserbase" ) {
38
+ process . env . EVAL_ENV = "BROWSERBASE" ;
39
+ } else if ( parsedArgs . env === "local" ) {
40
+ process . env . EVAL_ENV = "LOCAL" ;
41
+ }
42
+
43
+ if ( parsedArgs . trials !== undefined ) {
44
+ process . env . EVAL_TRIAL_COUNT = String ( parsedArgs . trials ) ;
45
+ }
46
+ if ( parsedArgs . concurrency !== undefined ) {
47
+ process . env . EVAL_MAX_CONCURRENCY = String ( parsedArgs . concurrency ) ;
48
+ }
49
+
50
+ const extractMethod = parsedArgs . extractMethod || "domExtract" ;
51
+ process . env . EXTRACT_METHOD = extractMethod ;
52
+
53
+ const useTextExtract = extractMethod === "textExtract" ;
54
+ const useAccessibilityTree = extractMethod === "accessibilityTree" ;
6
55
7
- /**
8
- * The default categories of evaluations to run if none is specified.
9
- * These categories represent different styles or types of tasks.
10
- */
11
56
const DEFAULT_EVAL_CATEGORIES = process . env . EVAL_CATEGORIES
12
57
? process . env . EVAL_CATEGORIES . split ( "," )
13
58
: [
@@ -19,45 +64,17 @@ const DEFAULT_EVAL_CATEGORIES = process.env.EVAL_CATEGORIES
19
64
"text_extract" ,
20
65
] ;
21
66
22
- /**
23
- * Determine which extraction method to use for tasks that involve extraction.
24
- * By default, "domExtract" is used. However, if a `--extract-method=<method>`
25
- * argument is provided, it will override the default.
26
- */
27
- let extractMethod = "domExtract" ;
28
- const extractMethodArg = args . find ( ( arg ) =>
29
- arg . startsWith ( "--extract-method=" ) ,
30
- ) ;
31
- if ( extractMethodArg ) {
32
- extractMethod = extractMethodArg . split ( "=" ) [ 1 ] ;
33
- }
34
-
35
- // Set the extraction method in the process environment so tasks can reference it.
36
- process . env . EXTRACT_METHOD = extractMethod ;
37
- const useTextExtract = process . env . EXTRACT_METHOD === "textExtract" ;
38
- const useAccessibilityTree = process . env . EXTRACT_METHOD === "accessibilityTree" ;
39
-
40
- /**
41
- * Variables for filtering which tasks to run:
42
- * - `filterByCategory`: if provided, only tasks that belong to this category will be run.
43
- * - `filterByEvalName`: if provided, only the task with this name will be run.
44
- */
67
+ // Finally, interpret leftover arguments to see if user typed "category X" or a single eval name
45
68
let filterByCategory : string | null = null ;
46
69
let filterByEvalName : string | null = null ;
47
70
48
- /**
49
- * Check the first argument:
50
- * - If it is "category", the next argument should be the category name.
51
- * - Otherwise, assume it is a specific evaluation (task) name.
52
- */
53
- if ( args . length > 0 ) {
54
- if ( args [ 0 ] . toLowerCase ( ) === "category" ) {
55
- filterByCategory = args [ 1 ] ;
71
+ if ( parsedArgs . leftover . length > 0 ) {
72
+ if ( parsedArgs . leftover [ 0 ] . toLowerCase ( ) === "category" ) {
73
+ filterByCategory = parsedArgs . leftover [ 1 ] ;
56
74
if ( ! filterByCategory ) {
57
75
console . error ( "Error: Category name not specified." ) ;
58
76
process . exit ( 1 ) ;
59
77
}
60
- // Validate that the category is one of the known ones.
61
78
try {
62
79
EvalCategorySchema . parse ( filterByCategory ) ;
63
80
} catch {
@@ -67,8 +84,8 @@ if (args.length > 0) {
67
84
process . exit ( 1 ) ;
68
85
}
69
86
} else {
70
- // Otherwise, treat it as a filter by evaluation name.
71
- filterByEvalName = args [ 0 ] ;
87
+ // If leftover[0] is not "category", interpret it as a task/eval name
88
+ filterByEvalName = parsedArgs . leftover [ 0 ] ;
72
89
}
73
90
}
74
91
0 commit comments