@@ -74,22 +74,22 @@ def __init__(self, **data):
7474class  GraderSpec (BaseModel ):
7575    """Grader configuration for evaluation.""" 
7676
77-     kind : GraderKind  =  Field (description = "Type of grader (tool  or rubric )" )
77+     kind : GraderKind  =  Field (description = "Type of grader (tool, model_judge,  or letta_judge )" )
7878
7979    # Optional display name for UI/CLI output 
8080    display_name : Optional [str ] =  Field (default = None , description = "Human-friendly name for this metric" )
8181
8282    function : Optional [str ] =  Field (default = None , description = "Name of grading function for tool grader" )
8383
84-     prompt : Optional [str ] =  Field (default = None , description = "Rubric prompt  for LLM  judge" )
85-     prompt_path : Optional [Path ] =  Field (default = None , description = "Path to file containing rubric  prompt" )
86-     model : Optional [str ] =  Field (default = "gpt-4o-mini" , description = "LLM model to use for rubric grading " )
87-     temperature : Optional [float ] =  Field (default = 0.0 , description = "Temperature for LLM  judge" )
88-     provider : Optional [LLMProvider ] =  Field (default = LLMProvider .OPENAI , description = "LLM provider for rubric grading " )
89-     max_retries : Optional [int ] =  Field (default = 5 , description = "Maximum number of retries for rubric grading " )
90-     timeout : Optional [float ] =  Field (default = 120.0 , description = "Timeout for rubric grading  in seconds" )
84+     prompt : Optional [str ] =  Field (default = None , description = "Prompt  for model judge or letta  judge" )
85+     prompt_path : Optional [Path ] =  Field (default = None , description = "Path to file containing prompt" )
86+     model : Optional [str ] =  Field (default = "gpt-4o-mini" , description = "LLM model to use for model judge " )
87+     temperature : Optional [float ] =  Field (default = 0.0 , description = "Temperature for model  judge" )
88+     provider : Optional [LLMProvider ] =  Field (default = LLMProvider .OPENAI , description = "LLM provider for model judge " )
89+     max_retries : Optional [int ] =  Field (default = 5 , description = "Maximum number of retries for model judge " )
90+     timeout : Optional [float ] =  Field (default = 120.0 , description = "Timeout for model judge  in seconds" )
9191    rubric_vars : Optional [List [str ]] =  Field (
92-         default = None , description = "List of required custom variables for rubric  substitution" 
92+         default = None , description = "List of required custom variables for prompt  substitution" 
9393    )
9494
9595    # Agent-based judge fields 
@@ -115,25 +115,13 @@ def __init__(self, **data):
115115            if  not  self .function :
116116                raise  ValueError ("Tool grader requires function name" )
117117            if  self .rubric_vars :
118-                 raise  ValueError ("Tool grader cannot use rubric_vars (only available for rubric graders)" )
119-         elif  self .kind  ==  GraderKind .RUBRIC :
120-             # check if agent-based or LLM-based judge 
121-             if  self .agent_file :
122-                 # agent-based judge validation 
123-                 if  not  self .prompt  and  not  self .prompt_path :
124-                     raise  ValueError ("Agent judge requires either prompt or prompt_path for rubric text" )
125-                 if  self .prompt  and  self .prompt_path :
126-                     raise  ValueError ("Agent judge cannot have both prompt and prompt_path" )
127-                 if  self .model  !=  "gpt-4o-mini"  or  self .temperature  !=  0.0  or  self .provider  !=  LLMProvider .OPENAI :
128-                     raise  ValueError (
129-                         "Agent judge should not specify model/temperature/provider (those are only for LLM judges)" 
130-                     )
131-             else :
132-                 # LLM-based judge validation 
133-                 if  not  self .prompt  and  not  self .prompt_path :
134-                     raise  ValueError ("Rubric grader requires either prompt or prompt_path" )
135-                 if  self .prompt  and  self .prompt_path :
136-                     raise  ValueError ("Rubric grader cannot have both prompt and prompt_path" )
118+                 raise  ValueError ("Tool grader cannot use rubric_vars (only available for model_judge and letta_judge)" )
119+         elif  self .kind  ==  GraderKind .MODEL_JUDGE :
120+             # model judge validation 
121+             if  not  self .prompt  and  not  self .prompt_path :
122+                 raise  ValueError ("Model judge requires either prompt or prompt_path" )
123+             if  self .prompt  and  self .prompt_path :
124+                 raise  ValueError ("Model judge cannot have both prompt and prompt_path" )
137125
138126            # load prompt from file if needed 
139127            if  self .prompt_path :
@@ -142,7 +130,7 @@ def __init__(self, **data):
142130        elif  self .kind  ==  GraderKind .LETTA_JUDGE :
143131            # letta judge validation 
144132            if  not  self .prompt  and  not  self .prompt_path :
145-                 raise  ValueError ("Letta judge requires either prompt or prompt_path for rubric text " )
133+                 raise  ValueError ("Letta judge requires either prompt or prompt_path" )
146134            if  self .prompt  and  self .prompt_path :
147135                raise  ValueError ("Letta judge cannot have both prompt and prompt_path" )
148136
@@ -153,10 +141,10 @@ def __init__(self, **data):
153141                    "To use a custom judge_tool_name, provide a custom agent_file." 
154142                )
155143
156-             # disallow LLM -specific fields for letta judge 
144+             # disallow model -specific fields for letta judge 
157145            if  self .model  !=  "gpt-4o-mini"  or  self .temperature  !=  0.0  or  self .provider  !=  LLMProvider .OPENAI :
158146                raise  ValueError (
159-                     "Letta judge should not specify model/temperature/provider (those are only for LLM  judges)" 
147+                     "Letta judge should not specify model/temperature/provider (those are only for model  judges)" 
160148                )
161149
162150            # load prompt from file if needed 
0 commit comments