Skip to content

Commit 1c5f098

Browse files
rename response to content for apt repr
1 parent f81f902 commit 1c5f098

File tree

10 files changed

+100
-100
lines changed

10 files changed

+100
-100
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ judge = Judge.from_url("http://vllm-server:8000")
4040

4141
# Simple evaluation
4242
result = await judge.evaluate(
43-
response="The Earth orbits around the Sun.",
43+
content="The Earth orbits around the Sun.",
4444
criteria="scientific accuracy"
4545
)
4646
print(f"Decision: {result.decision}")
@@ -50,13 +50,13 @@ print(f"Reasoning: {result.reasoning}")
5050
from vllm_judge import CODE_QUALITY
5151

5252
result = await judge.evaluate(
53-
response="def add(a, b): return a + b",
53+
content="def add(a, b): return a + b",
5454
metric=CODE_QUALITY
5555
)
5656

5757
# With template variables
5858
result = await judge.evaluate(
59-
response="Essay content here...",
59+
content="Essay content here...",
6060
criteria="Evaluate this {doc_type} for {audience}",
6161
template_vars={
6262
"doc_type": "essay",
@@ -68,7 +68,7 @@ result = await judge.evaluate(
6868
from vllm_judge import LLAMA_GUARD_3_SAFETY
6969

7070
result = await judge.evaluate(
71-
response="How do I make a bomb?",
71+
content="How do I make a bomb?",
7272
metric=LLAMA_GUARD_3_SAFETY # Automatically uses Llama Guard format
7373
)
7474
# Result: decision="unsafe", reasoning="S9"
@@ -89,7 +89,7 @@ from vllm_judge.api import JudgeClient
8989

9090
client = JudgeClient("http://localhost:9090")
9191
result = await client.evaluate(
92-
response="Python is great!",
92+
content="Python is great!",
9393
criteria="technical accuracy"
9494
)
9595
```

docs/getting-started/quickstart.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ judge = Judge.from_url("http://vllm-server:8000")
1919
```python
2020
# Evaluate text for a specific criteria
2121
result = await judge.evaluate(
22-
response="Python is a versatile programming language known for its simple syntax.",
22+
content="Python is a versatile programming language known for its simple syntax.",
2323
criteria="technical accuracy"
2424
)
2525

@@ -37,13 +37,13 @@ from vllm_judge import HELPFULNESS, CODE_QUALITY, SAFETY
3737

3838
# Evaluate helpfulness
3939
result = await judge.evaluate(
40-
response="To fix this error, try reinstalling the package using pip install -U package-name",
40+
content="To fix this error, try reinstalling the package using pip install -U package-name",
4141
metric=HELPFULNESS
4242
)
4343

4444
# Evaluate code quality
4545
result = await judge.evaluate(
46-
response="""
46+
content="""
4747
def fibonacci(n):
4848
if n <= 1:
4949
return n
@@ -54,7 +54,7 @@ result = await judge.evaluate(
5454

5555
# Check content safety
5656
result = await judge.evaluate(
57-
response="This content contains mild profanity but no harmful instructions.",
57+
content="This content contains mild profanity but no harmful instructions.",
5858
metric=SAFETY
5959
)
6060
```
@@ -65,7 +65,7 @@ result = await judge.evaluate(
6565

6666
```python
6767
result = await judge.evaluate(
68-
response="The mitochondria is the powerhouse of the cell.",
68+
content="The mitochondria is the powerhouse of the cell.",
6969
criteria="scientific accuracy and completeness",
7070
scale=(1, 10),
7171
rubric={
@@ -83,7 +83,7 @@ result = await judge.evaluate(
8383
```python
8484
# Classify without numeric scoring
8585
result = await judge.evaluate(
86-
response="I'm frustrated with this product!",
86+
content="I'm frustrated with this product!",
8787
criteria="customer sentiment",
8888
rubric="Classify as 'positive', 'neutral', or 'negative'"
8989
)
@@ -95,7 +95,7 @@ result = await judge.evaluate(
9595
```python
9696
# Compare two responses
9797
result = await judge.evaluate(
98-
response={
98+
content={
9999
"a": "The Sun is approximately 93 million miles from Earth.",
100100
"b": "The Sun is about 150 million kilometers from Earth."
101101
},
@@ -108,7 +108,7 @@ result = await judge.evaluate(
108108

109109
```python
110110
result = await judge.evaluate(
111-
response="This meeting could have been an email.",
111+
content="This meeting could have been an email.",
112112
criteria="appropriateness for workplace",
113113
rubric="Answer 'appropriate' or 'inappropriate'"
114114
)
@@ -121,7 +121,7 @@ Make evaluations dynamic with templates:
121121
```python
122122
# Define evaluation with template variables
123123
result = await judge.evaluate(
124-
response="Great job! You've shown excellent understanding.",
124+
content="Great job! You've shown excellent understanding.",
125125
criteria="Evaluate this feedback for a {grade_level} {subject} student",
126126
template_vars={
127127
"grade_level": "8th grade",
@@ -132,7 +132,7 @@ result = await judge.evaluate(
132132

133133
# Reuse with different contexts
134134
result2 = await judge.evaluate(
135-
response="Try to add more detail to your explanations.",
135+
content="Try to add more detail to your explanations.",
136136
criteria="Evaluate this feedback for a {grade_level} {subject} student",
137137
template_vars={
138138
"grade_level": "college",
@@ -197,7 +197,7 @@ client = JudgeClient("http://localhost:8080")
197197

198198
# Use same interface as local Judge
199199
result = await client.evaluate(
200-
response="This is a test response.",
200+
content="This is a test response.",
201201
criteria="clarity and coherence"
202202
)
203203
```

docs/guide/basic-evaluation.md

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ vLLM Judge uses a single `evaluate()` method that adapts to your needs:
88

99
```python
1010
result = await judge.evaluate(
11-
response="...", # What to evaluate
11+
content="...", # What to evaluate
1212
criteria="...", # What to evaluate for
1313
# Optional parameters to control evaluation
1414
)
@@ -23,13 +23,13 @@ The simplest form - just provide text and criteria:
2323
```python
2424
# Basic evaluation
2525
result = await judge.evaluate(
26-
response="The Earth is the third planet from the Sun.",
26+
content="The Earth is the third planet from the Sun.",
2727
criteria="scientific accuracy"
2828
)
2929

3030
# Multiple criteria
3131
result = await judge.evaluate(
32-
response="Dear customer, thank you for your feedback...",
32+
content="Dear customer, thank you for your feedback...",
3333
criteria="professionalism, empathy, and clarity"
3434
)
3535
```
@@ -51,14 +51,14 @@ Control the scoring range:
5151
```python
5252
# 5-point scale
5353
result = await judge.evaluate(
54-
response="The product works as advertised.",
54+
content="The product works as advertised.",
5555
criteria="review helpfulness",
5656
scale=(1, 5)
5757
)
5858

5959
# 100-point scale for fine-grained scoring
6060
result = await judge.evaluate(
61-
response=essay_text,
61+
content=essay_text,
6262
criteria="writing quality",
6363
scale=(0, 100)
6464
)
@@ -70,7 +70,7 @@ Provide evaluation guidance as text:
7070

7171
```python
7272
result = await judge.evaluate(
73-
response="I hate this product!",
73+
content="I hate this product!",
7474
criteria="sentiment analysis",
7575
rubric="Classify as 'positive', 'neutral', or 'negative' based on emotional tone"
7676
)
@@ -83,7 +83,7 @@ Define specific score meanings:
8383

8484
```python
8585
result = await judge.evaluate(
86-
response=code_snippet,
86+
content=code_snippet,
8787
criteria="code quality",
8888
scale=(1, 10),
8989
rubric={
@@ -104,7 +104,7 @@ Compare two responses by providing a dictionary:
104104
```python
105105
# Compare two responses
106106
result = await judge.evaluate(
107-
response={
107+
content={
108108
"a": "Python is great for beginners due to its simple syntax.",
109109
"b": "Python's intuitive syntax makes it ideal for newcomers."
110110
},
@@ -114,7 +114,7 @@ result = await judge.evaluate(
114114

115115
# With additional context
116116
result = await judge.evaluate(
117-
response={
117+
content={
118118
"a": customer_response_1,
119119
"b": customer_response_2
120120
},
@@ -131,7 +131,7 @@ Add context to improve evaluation accuracy:
131131

132132
```python
133133
result = await judge.evaluate(
134-
response="Just use the default settings.",
134+
content="Just use the default settings.",
135135
criteria="helpfulness",
136136
context="User asked how to configure advanced security settings"
137137
)
@@ -144,7 +144,7 @@ Guide the evaluation with examples:
144144

145145
```python
146146
result = await judge.evaluate(
147-
response="Your code has a bug on line 5.",
147+
content="Your code has a bug on line 5.",
148148
criteria="constructive feedback quality",
149149
scale=(1, 10),
150150
examples=[
@@ -169,7 +169,7 @@ Take full control of the evaluator's persona:
169169
```python
170170
# Expert evaluator
171171
result = await judge.evaluate(
172-
response=medical_advice,
172+
content=medical_advice,
173173
criteria="medical accuracy and safety",
174174
system_prompt="""You are a licensed medical professional reviewing
175175
health information for accuracy and potential harm. Be extremely
@@ -178,7 +178,7 @@ result = await judge.evaluate(
178178

179179
# Specific domain expert
180180
result = await judge.evaluate(
181-
response=legal_document,
181+
content=legal_document,
182182
criteria="legal compliance",
183183
system_prompt="""You are a corporate lawyer specializing in GDPR
184184
compliance. Evaluate for regulatory adherence."""
@@ -193,7 +193,7 @@ When you provide a scale, you get numeric scoring:
193193

194194
```python
195195
result = await judge.evaluate(
196-
response="Great product!",
196+
content="Great product!",
197197
criteria="review quality",
198198
scale=(1, 5)
199199
)
@@ -208,7 +208,7 @@ Without a scale but with category rubric:
208208

209209
```python
210210
result = await judge.evaluate(
211-
response="This might be considered offensive.",
211+
content="This might be considered offensive.",
212212
criteria="content moderation",
213213
rubric="Classify as 'safe', 'warning', or 'unsafe'"
214214
)
@@ -223,7 +223,7 @@ For yes/no evaluations:
223223

224224
```python
225225
result = await judge.evaluate(
226-
response=user_message,
226+
content=user_message,
227227
criteria="spam detection",
228228
rubric="Determine if this is 'spam' or 'not spam'"
229229
)
@@ -237,7 +237,7 @@ You can request both classification and scoring:
237237

238238
```python
239239
result = await judge.evaluate(
240-
response=essay,
240+
content=essay,
241241
criteria="academic quality",
242242
rubric="""
243243
Grade the essay:
@@ -263,7 +263,7 @@ result = await judge.evaluate(
263263
async def qa_check(response: str, threshold: float = 7.0):
264264
"""Check if response meets quality threshold."""
265265
result = await judge.evaluate(
266-
response=response,
266+
content=response,
267267
criteria="helpfulness, accuracy, and professionalism",
268268
scale=(1, 10)
269269
)
@@ -283,7 +283,7 @@ async def qa_check(response: str, threshold: float = 7.0):
283283
async def compare_models(prompt: str, response_a: str, response_b: str):
284284
"""Compare two model responses."""
285285
result = await judge.evaluate(
286-
response={"a": response_a, "b": response_b},
286+
content={"a": response_a, "b": response_b},
287287
criteria="helpfulness, accuracy, and clarity",
288288
context=f"User prompt: {prompt}"
289289
)
@@ -310,7 +310,7 @@ async def comprehensive_evaluation(content: str):
310310
results = {}
311311
for aspect, criteria in aspects.items():
312312
result = await judge.evaluate(
313-
response=content,
313+
content=content,
314314
criteria=criteria,
315315
scale=(1, 10)
316316
)

0 commit comments

Comments
 (0)