Skip to content

Commit d2b2c64

Browse files
committed
Adds usage information and such to the response.
1 parent 5002a51 commit d2b2c64

File tree

4 files changed

+62
-25
lines changed

4 files changed

+62
-25
lines changed

API.md

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,38 @@
2323
* `num-passing-grades`: The number of grades to consider 'passing'. Defaults: 2 (pass fail)
2424
* `temperature`: The 'temperature' value for ChatGPT LLMs.
2525

26-
* **Response**: `application/json`: A list of key concepts, assessment values, and reasons. Example below.
26+
* **Response**: `application/json`: Data and metadata related to the response. The `data` is the list of key concepts, assessment values, and reasons. The `metadata` is the input to the AI and some usage information. `n` is the number of responses asked for in the input. Example below.
2727

2828
```
29-
[{
30-
"Key Concept": "Program Development 2",
31-
"Observations": "The program uses whitespace good nami [... snipped for brevity ...]. The code is easily readable.",
32-
"Grade": "Extensive Evidence",
33-
"Reason": "The program code effectively uses whitespace, good naming conventions, indentation and comments to make the code easily readable."
34-
}, {
35-
"Key Concept": "Algorithms and Control Structures",
36-
"Observations": "Sprite interactions occur at lines 48-50 (player touches burger), 52 (sw[... snipped for brevity ...]",
37-
"Grade": "Extensive Evidence",
38-
"Reason": "The game includes multiple different interactions between sprites, responds to multiple types of user input (e.g. different arrow keys)."
39-
}]
29+
{
30+
"metadata": {
31+
"time": 39.43,
32+
"student_id": 1553633,
33+
"usage": {
34+
"prompt_tokens": 454,
35+
"completion_tokens": 1886,
36+
"total_tokens": 2340
37+
},
38+
"request": {
39+
"model": "gpt4",
40+
"temperature": 0.2,
41+
"messages": [ ... ],
42+
"n": 3
43+
}
44+
},
45+
"data": [
46+
{
47+
"Key Concept": "Program Development 2",
48+
"Observations": "The program uses whitespace good nami [... snipped for brevity ...]. The code is easily readable.",
49+
"Grade": "Extensive Evidence",
50+
"Reason": "The program code effectively uses whitespace, good naming conventions, indentation and comments to make the code easily readable."
51+
}, {
52+
"Key Concept": "Algorithms and Control Structures",
53+
"Observations": "Sprite interactions occur at lines 48-50 (player touches burger), 52 (sw[... snipped for brevity ...]",
54+
"Grade": "Extensive Evidence",
55+
"Reason": "The game includes multiple different interactions between sprites, responds to multiple types of user input (e.g. different arrow keys)."
56+
}
57+
]
4058
```
4159

4260
`(GET|POST) /test/assessment`: Issue a test rubric assessment to the AI agent and wait for a response.
@@ -48,4 +66,4 @@
4866
* `num-passing-grades`: The number of grades to consider 'passing'. Defaults: 2 (pass fail)
4967
* `temperature`: The 'temperature' value for ChatGPT LLMs.
5068

51-
* **Response**: `application/json`: A list of key concepts, assessment values, and reasons. See above.
69+
* **Response**: `application/json`: A set of data and metadata where `data` is a list of key concepts, assessment values, and reasons. See above.

TESTING.md

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,30 @@
11
# Testing
22

3+
This assumes you have built and are running the container as depicted in the main `README.md`.
4+
In that case, you have a running server on port 5000.
5+
36
You can issue a "test" rubric assessment using hard-coded content that is found in the
47
`test/data` path by using the `/test/assessment` URL. Here, I'm using `curl` to show
5-
me the headers and send a `POST` to that route (assuming I have the server running):
8+
me the headers and send a `POST` to that route (may take 30 to 50 seconds):
69

710
```
8-
curl localhost:8080/test/assessment -i --header "Content-Type:multipart/form-data" --form "num-responses=3"
11+
curl localhost:5000/test/assessment -i --header "Content-Type:multipart/form-data" --form "num-responses=3"
912
```
1013

1114
This gives me this response:
1215

1316
```
1417
HTTP/1.1 200 OK
15-
Content-Length: 2123
16-
Content-Type: text/html; charset=utf-8
17-
Date: Wed, 27 Sep 2023 10:48:36 GMT
18+
Content-Length: 10883
19+
Content-Type: application/json
20+
Date: Tue, 10 Oct 2023 21:00:48 GMT
1821
Server: waitress
1922
20-
[{"Key Concept": "Program Development 2", "Observations": "The program uses whitespace (e.g., lines 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63), good naming conventions (e.g., \"player\", \"burger\", \"sword\", \"sword2\"), indentation (e.g., lines 20-22, 24-26, 28-30, 32-34, 36-38, 40-42, 44-46, 48-50, 52-54, 56-58, 60-62), and comments (e.g., lines 2, 16, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62). The code is easily readable.", "Grade": "Extensive Evidence", "Reason": "The program code effectively uses whitespace, good naming conventions, indentation and comments to make the code easily readable."}, {"Key Concept": "Algorithms and Control Structures", "Observations": "Sprite interactions occur at lines 48-50 (player touches burger), 52 (sword displaces player), and 53 (sword2 displaces player). The program responds to user input at lines 24-26 (up key), 28-30 (left key), and 32-34 (right key).", "Grade": "Extensive Evidence", "Reason": "The game includes multiple different interactions between sprites, responds to multiple types of user input (e.g. different arrow keys)."}, {"Key Concept": "Position and Movement", "Observations": "The program generates movement at lines 14 (sword), 15 (sword2), 20 (player falling), 24-26 (player moving up), 28-30 (player moving left), 32-34 (player moving right). The movement involves acceleration at lines 20, 24-26, 28-30, 32-34.", "Grade": "Extensive Evidence", "Reason": "Complex movement such as acceleration, moving in a curve, or jumping is included in multiple places in the program."}, {"Key Concept": "Variables", "Observations": "The program updates sprite properties inside the draw loop at lines 20 (player.velocityY), 24-26 (player.velocityY), 28-30 (player.velocityX), 32-34 (player.velocityX), 48-50 (burger.x, burger.y). These updates affect the user's experience of playing the game by controlling the player's movement and the burger's position.", "Grade": "Extensive Evidence", "Reason": "The game includes multiple variables or sprite properties that are updated during the game and affect the user's experience of playing the game."}]
23+
{"data":[{"Grade":"Extensive Evidence","Key Concept":"Program Development 2","Observations":"The program uses whitespace effectively (e.g. lines 18, 24, 30, 36, 42, 48, 54, 60). The program uses good naming conventions (e.g. \"player\", \"burger\", \"sword\", \"sword2\"). The program has good indentation (e.g. lines 20-23, 26-29, 32-35, 38-41, 44-47, 50-53, 56-59). The program has good comments (e.g. lines 1, 2, 18, 24, 30, 36, 42, 48, 54, 60). The code is easily readable.","Reason":"The program code effectively uses whitespace, good naming conventions, indentation and comments to make the code easily readable."},{"Grade":"Extensive Evidence","Key Concept":"Algorithms and Control Structures","Observations":"Sprite interactions occur at lines 50-53 (player touches burger) and lines 56-57 (sword and sword2 displace player). The program responds to user input at lines 32-35 (up key), lines 38-41 (left key), and lines 44-47 (right key).","Reason":"The game includes multiple different interactions between sprites, responds to multiple types of user input (e.g. different arrow keys)."},{"Grade":"Extensive Evidence","Key Concept":"Position and Movement","Observations":"The program generates movement at lines 14-15 (sword and sword2), line 20 (player falling), lines 32-35 (player moves up), lines 38-41 (player moves left), and lines 44-47 (player moves right). The movement involves acceleration at lines 20, 32-35, 38-41, and 44-47.","Reason":"Complex movement such as acceleration, moving in a curve, or jumping is included in multiple places in the program."},{"Grade":"Extensive Evidence","Key Concept":"Variables","Observations":"The program updates sprite properties inside the draw loop at lines 20 (player.velocityY), lines 32-35 (player.velocityY), lines 38-41 (player.velocityX), lines 44-47 (player.velocityX), and lines 50-53 (burger.x and burger.y). These updates affect the user's experience of playing the game by controlling the player's movement and the burger's position.","Reason":"The game includes multiple variables or sprite properties that are updated during the game and affect the user's experience of playing the game."}],"metadata":{"request":{"messages":[{"content":"You are a teaching assistant whose job is to assess a student program written in\njavascript based on several Key Concepts. For each Key Concept you will answer by\ngiving the highest grade which accurately describes the student's program:\nExtensive Evidence, Convincing Evidence, Limited Evidence, or No\nEvidence. You will also provide a reason explaining your grade for each\nKey Concept, citing examples from the code to support your decision when possible.\n\nThe student's code should contain a method called `draw()` which will be\nreferred to as the \"draw loop\". Any code outside of the draw loop will be run\nonce, then any code inside the draw loop will be run repeatedly, like this:\n```\n// student's code\n\nwhile (true) {\n draw();\n}\n```\n\nPlease keep in mind that acceleration occurs when the velocity of a sprite is changed incrementally within the draw loop, such as in these examples:\n* `sprite.velocityX += 0.2;`\n* `sprite.velocityY -= 1;`\n* `foo.velocityX = foo.velocityX + 5;`\n* `foo.velocityY = foo.velocityY - 10;`\n\nThe following examples do not count as acceleration, because they set the velocity to a specific value, rather than changing it incrementally:\n* `sprite.velocityX = 5;`\n* `sprite.velocityY = -10;`\n\nThe following does not count as acceleration, because it sets the velocity to a random value, rather than changing it incrementally:\n* `foo.velocityX = randomNumber(-5, 5);`\n\nThe student's code will access an API defined by Code.org's fork of the p5play\nlibrary. This API contains methods like createSprite(), background(), and drawSprites(),\nas well as sprite properties like x, y, velocityX and velocityY.\n\nIn order to help you evaluate the student's work, you will be given a rubric in\nCSV format. The first column provides the list of Key Concepts to evaluate,\nthe second column, Instructions, tells you what aspects of the code to consider\nwhen choosing a grade. the next four columns describe what it means for a program\nto be classified as each of the four possible grades.\n\nwhen choosing a grade for each Key Concept, please follow the following steps:\n1. follow the instructions in the Instructions column from the rubric to generate observations about the student's program. Include the result to the Observations column in your response.\n2. based on those observations, determine the highest grade which accurately describes the student's program. Write this result to the Grade column in your response.\n3. write a reason for your grade in the Reason column, citing evidence from the Observations column when possible.\n\nplease provide your evaluation formatted as a TSV table including a header row\nwith column names Key Concept, Observations, Grade, and Reason. There should be one\nnon-header row for each Key Concept.\n\nThe student's work should be evaluated based on what they have added beyond the\nstarter code that was provided to them. Here is the starter code:\n```\n// GAME SETUP\n// create player, target, and obstacles\nvar player = createSprite(200, 100);\nplayer.setAnimation(\"fly_bot\");\nplayer.scale = 0.8;\n\n\nfunction draw() {\n background(\"lightblue\");\n\n // FALLING\n\n // LOOPING\n\n\n // PLAYER CONTROLS\n // change the y velocity when the user clicks \"up\"\n\n // decrease the x velocity when user clicks \"left\"\n\n // increase the x velocity when the user clicks \"right\"\n\n // SPRITE INTERACTIONS\n // reset the coin when the player touches it\n\n // make the obstacles push the player\n\n\n // DRAW SPRITES\n drawSprites();\n\n // GAME OVER\n if (player.x < -50 || player.x > 450 || player.y < -50 || player.y > 450) {\n background(\"black\");\n textSize(50);\n fill(\"green\");\n text(\"Game Over!\", 50, 200);\n }\n\n}\n```\n\n\nRubric:\nKey Concept,Instructions,Extensive Evidence,Convincing Evidence,Limited Evidence,No Evidence\nProgram Development 2,(1) does the program effectively use whitespace? (2) does the program use good naming conventions? (3) does the program have good indentation? (4) does the program have good comments? (5) is the code easily readable?,\"The program code effectively uses whitespace, good naming conventions, indentation and comments to make the code easily readable.\",\"The program code makes use of whitespace, indentation, and comments.\",The program code has few comments and does not consistently use formatting such as whitespace and indentation.,The program code does not contain comments and is difficult to read.\nAlgorithms and Control Structures,\"(1) list the line number of each sprite interaction, and note the type of interaction. (2) list the line number of each place the program responds to user input, and note the type of user input (e.g. which key or mouse event).\",\"The game includes multiple different interactions between sprites, responds to multiple types of user input (e.g. different arrow keys).\",The game includes at least one type of sprite interaction and responds to user input.,\"The game responds to user input through a conditional, but has no sprite interactions.\",The game includes no conditionals.\nPosition and Movement,\"list the line numbers of each place the program generates movement, and note whether the movement involves acceleration, keeping in mind that acceleration is incremental change to velocity (e.g. `sprite.velocityX = sprite.velocityX + 1` or `sprite.velocityY -= 1`).\",\"Complex movement such as acceleration, moving in a curve, or jumping is included in multiple places in the program.\",\"The program includes some complex movement, such as jumping, acceleration, or moving in a curve.\",\"The program does not include complex movement such as jumping, acceleration or moving in a curve. However, the program does include simple independent movement, such as a straight line, rotation or bouncing.\",\"There is no movement in the program, other than direct user control.\"\nVariables,\"(1) list the line number of every place a variable (including sprite properties) is updated inside the draw loop (2) for each variable or sprite property, describe whether it affects the user's experience of playing the game.\",The game includes multiple variables or sprite properties that are updated during the game and affect the user's experience of playing the game.,The game includes at least one variable or sprite property that is updated during the game and affects the user's experience of playing the game.,There is at least one variable or sprite property updated in the program.,\"There are no variables or sprite properties, or they are not updated.\"\n","role":"system"},{"content":"// GAME SETUP\n// create player, target, and obstacles\nvar player = createSprite(200, 100);\nplayer.setAnimation(\"player\");\nplayer.scale = 0.8;\nvar burger = createSprite(randomNumber(0,400),randomNumber(0,400));\nburger.setAnimation(\"burger\");\nburger.scale = 0.2;\nburger.setCollider(\"circle\");\nvar sword = createSprite(-50, randomNumber(0, 400));\nsword.setAnimation(\"sword\");\nsword.scale = 0.5;\nvar sword2 = createSprite(randomNumber(0, 400), -50);\nsword2.setAnimation(\"sword2\");\nsword2.scale = 0.5;\nsword.velocityX = 3;\nsword2.velocityY = 3;\n\n\nfunction draw() {\n background(\"green\");\n \n // FALLING\n player.velocityY+=0.7;\n \n // LOOPING\n if (sword.x>425){\n sword.y=-50;\n sword.x=randomNumber(0, 400);\n }\n if (sword2.x>425){\n sword2.y=-50;\n sword2.x=randomNumber(0, 400);\n }\n \n // PLAYER CONTROLS\n // change the y velocity when the user clicks \"up\"\n if (keyDown(\"up\")) {\n player.velocityY-=1.5;\n }\n \n // decrease the x velocity when user clicks \"left\"\n if (keyDown(\"LEFT\")) {\n player.velocityX-=0.1;\n \n }\n // increase the x velocity when the user clicks \"right\"\n if (keyDown(\"RIGHT\")) {\n player.velocityX+=0.1;\n \n }\n // SPRITE INTERACTIONS\n // reset the coin when the player touches it\n if (player.isTouching(burger)) {\n burger.x=randomNumber(0, 400);\n burger.y=randomNumber(0,400);\n }\n \n // make the obstacles push the player\n sword.displace(player);\n sword2.displace(player);\n // DRAW SPRITES\n drawSprites();\n \n // GAME OVER\n if (player.x < -50 || player.x > 450 || player.y < -50 || player.y > 450) {\n background(\"black\");\n textSize(50);\n fill(\"blue\");\n text(\"Game Over!\", 50, 200);\n }\n \n}\n","role":"user"}],"model":"gpt-4","n":1,"temperature":0.2},"student_id":"student","time":31.761487007141113,"usage":{"completion_tokens":504,"prompt_tokens":1897,"total_tokens":2401}}}
24+
```
25+
26+
If you want a cleaner response, ignore printing the headers and use Python as well:
27+
28+
```
29+
curl localhost:5000/test/assessment --header "Content-Type:multipart/form-data" --form "num-responses=3" | python -m json.tool
2130
```

lib/assessment/grade.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,12 @@ def grade_student_work(self, prompt, rubric, student_code, student_id, examples=
5050
print(f"{student_id} Response body: {response.text}")
5151
return None
5252

53-
tokens = response.json()['usage']['total_tokens']
54-
print(f"{student_id} request succeeded in {(time.time() - start_time):.0f} seconds. {tokens} tokens used.")
53+
info = response.json()
54+
tokens = info['usage']['total_tokens']
55+
elapsed = time.time() - start_time
56+
print(f"{student_id} request succeeded in {elapsed:.0f} seconds. {tokens} tokens used.")
5557

56-
tsv_data_choices = [self.get_tsv_data_if_valid(choice['message']['content'], rubric, student_id, choice_index=index) for index, choice in enumerate(response.json()['choices']) if choice['message']['content']]
58+
tsv_data_choices = [self.get_tsv_data_if_valid(choice['message']['content'], rubric, student_id, choice_index=index) for index, choice in enumerate(info['choices']) if choice['message']['content']]
5759
tsv_data_choices = [choice for choice in tsv_data_choices if choice]
5860

5961
if len(tsv_data_choices) == 0:
@@ -68,7 +70,15 @@ def grade_student_work(self, prompt, rubric, student_code, student_id, examples=
6870
with open(f"cached_responses/{student_id}.json", 'w') as f:
6971
json.dump(tsv_data, f, indent=4)
7072

71-
return tsv_data
73+
return {
74+
'metadata': {
75+
'time': elapsed,
76+
'student_id': student_id,
77+
'usage': info['usage'],
78+
'request': data,
79+
},
80+
'data': tsv_data,
81+
}
7282

7383
def sanitize_code(self, student_code, remove_comments=False):
7484
# Remove comments

src/assessment.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def post_assessment():
4848
except openai.error.InvalidRequestError as e:
4949
return str(e), 400
5050

51-
if not isinstance(grades, list):
51+
if not isinstance(grades, dict) and isinstance(grades.get("data"), list):
5252
return "response from AI or service not valid", 400
5353

5454
return grades
@@ -84,7 +84,7 @@ def test_assessment():
8484
except openai.error.InvalidRequestError as e:
8585
return str(e), 400
8686

87-
if not isinstance(grades, list):
87+
if not isinstance(grades, dict) and isinstance(grades.get("data"), list):
8888
return "response from AI or service not valid", 400
8989

9090
return grades

0 commit comments

Comments
 (0)