From 8890c37ae05d0437cf2c5e8d606bca38db129370 Mon Sep 17 00:00:00 2001 From: Seth Nickell Date: Mon, 19 Feb 2024 14:16:53 -1000 Subject: [PATCH 1/7] Add a setup.py --- Dockerfile | 2 +- README.md | 8 +++--- {lib => aiproxy}/__init__.py | 0 {src => aiproxy/app}/__init__.py | 6 ++-- {src => aiproxy/app}/assessment.py | 9 +++--- {src => aiproxy/app}/openai.py | 0 {src => aiproxy/app}/test.py | 0 {lib => aiproxy}/assessment/__init__.py | 0 {lib => aiproxy}/assessment/assess.py | 4 +-- {lib => aiproxy}/assessment/config.py | 0 {lib => aiproxy}/assessment/label.py | 2 +- {lib => aiproxy}/assessment/report.py | 2 +- {lib => aiproxy}/assessment/rubric_tester.py | 30 +++++++++++--------- bin/rubric_tester | 7 +++++ run.py | 8 ++++++ setup.py | 14 +++++++++ 16 files changed, 62 insertions(+), 30 deletions(-) rename {lib => aiproxy}/__init__.py (100%) rename {src => aiproxy/app}/__init__.py (92%) rename {src => aiproxy/app}/assessment.py (96%) rename {src => aiproxy/app}/openai.py (100%) rename {src => aiproxy/app}/test.py (100%) rename {lib => aiproxy}/assessment/__init__.py (100%) rename {lib => aiproxy}/assessment/assess.py (93%) rename {lib => aiproxy}/assessment/config.py (100%) rename {lib => aiproxy}/assessment/label.py (99%) rename {lib => aiproxy}/assessment/report.py (99%) rename {lib => aiproxy}/assessment/rubric_tester.py (97%) create mode 100755 bin/rubric_tester create mode 100755 run.py create mode 100644 setup.py diff --git a/Dockerfile b/Dockerfile index adbbc2f..5c2e43b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,4 +10,4 @@ COPY ./lib /app/lib COPY ./src /app/src EXPOSE 80 -CMD ["waitress-serve", "--host=0.0.0.0", "--port=80", "--call", "src:create_app"] +CMD ["waitress-serve", "--host=0.0.0.0", "--port=80", "--call", "app:create_app"] diff --git a/README.md b/README.md index 2fdb022..09cde43 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,7 @@ Export the following environment variables (or add them once to your shell profi * `export PYTHONPATH=` See rubric tester options with: -* `python lib/assessment/rubric_tester.py --help` +* `bin/rubric_tester --help` ### example usage @@ -132,7 +132,7 @@ GPT 3.5 Turbo is the default because a complete test run with that model costs o A recommended first run is to use default experiment and dataset, limited to 1 lesson: ``` -(.venv) Dave-MBP:~/src/aiproxy (rt-recover-from-bad-llm-responses)$ python ./lib/assessment/rubric_tester.py --lesson-names csd3-2023-L11 +(.venv) Dave-MBP:~/src/aiproxy (rt-recover-from-bad-llm-responses)$ bin/rubric_tester --lesson-names csd3-2023-L11 2024-02-13 20:15:30,127: INFO: Evaluating lesson csd3-2023-L11 for dataset contractor-grades-batch-1-fall-2023 and experiment ai-rubrics-pilot-gpt-3.5-turbo... ``` @@ -150,7 +150,7 @@ The report that gets generated will contain a count of how many errors there wer In order to rerun only the failed student projects, you can pass the `-c` (`--use-cached`) option: ```commandline -(.venv) Dave-MBP:~/src/aiproxy (rt-recover-from-bad-llm-responses)$ python ./lib/assessment/rubric_tester.py --lesson-names csd3-2023-L11 -c +(.venv) Dave-MBP:~/src/aiproxy (rt-recover-from-bad-llm-responses)$ bin/rubric_tester --lesson-names csd3-2023-L11 -c ``` ![Screenshot 2024-02-13 at 8 24 31 PM](https://github.com/code-dot-org/aiproxy/assets/8001765/ff560302-94b9-4966-a5d6-7d9a9fa54892) @@ -163,7 +163,7 @@ After enough reruns, you'll have a complete accuracy measurement for the lesson. experiments run against GPT 4, GPT 4 Turbo and other pricey models should include report html and cached response data. this allows you to quickly view reports for these datasets either by looking directly at the `output/report*html` files or by regenerating the report against cached data via a command like: ```commandline -python ./lib/assessment/rubric_tester.py --experiment-name ai-rubrics-pilot-baseline-gpt-4-turbo --use-cached +bin/rubric_tester --experiment-name ai-rubrics-pilot-baseline-gpt-4-turbo --use-cached ``` #### smaller test runs diff --git a/lib/__init__.py b/aiproxy/__init__.py similarity index 100% rename from lib/__init__.py rename to aiproxy/__init__.py diff --git a/src/__init__.py b/aiproxy/app/__init__.py similarity index 92% rename from src/__init__.py rename to aiproxy/app/__init__.py index 0dafb2a..99b16ce 100644 --- a/src/__init__.py +++ b/aiproxy/app/__init__.py @@ -5,9 +5,9 @@ import logging # Our modules -from src.test import test_routes -from src.openai import openai_routes -from src.assessment import assessment_routes +from .test import test_routes +from .openai import openai_routes +from .assessment import assessment_routes # Flask from flask import Flask diff --git a/src/assessment.py b/aiproxy/app/assessment.py similarity index 96% rename from src/assessment.py rename to aiproxy/app/assessment.py index 6735d18..b827454 100644 --- a/src/assessment.py +++ b/aiproxy/app/assessment.py @@ -7,12 +7,13 @@ import openai import json -from lib.assessment.config import DEFAULT_MODEL +from aiproxy.assessment.config import DEFAULT_MODEL # Our assessment code -from lib.assessment import assess -from lib.assessment.assess import KeyConceptError -from lib.assessment.label import InvalidResponseError +from aiproxy.assessment import assess +from aiproxy.assessment import assess +from aiproxy.assessment.assess import KeyConceptError +from aiproxy.assessment.label import InvalidResponseError assessment_routes = Blueprint('assessment_routes', __name__) diff --git a/src/openai.py b/aiproxy/app/openai.py similarity index 100% rename from src/openai.py rename to aiproxy/app/openai.py diff --git a/src/test.py b/aiproxy/app/test.py similarity index 100% rename from src/test.py rename to aiproxy/app/test.py diff --git a/lib/assessment/__init__.py b/aiproxy/assessment/__init__.py similarity index 100% rename from lib/assessment/__init__.py rename to aiproxy/assessment/__init__.py diff --git a/lib/assessment/assess.py b/aiproxy/assessment/assess.py similarity index 93% rename from lib/assessment/assess.py rename to aiproxy/assessment/assess.py index 2909ced..ad93bb0 100644 --- a/lib/assessment/assess.py +++ b/aiproxy/assessment/assess.py @@ -7,8 +7,8 @@ import logging # Import our support classes -from lib.assessment.config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS -from lib.assessment.label import Label +from .config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS +from .label import Label class KeyConceptError(Exception): pass diff --git a/lib/assessment/config.py b/aiproxy/assessment/config.py similarity index 100% rename from lib/assessment/config.py rename to aiproxy/assessment/config.py diff --git a/lib/assessment/label.py b/aiproxy/assessment/label.py similarity index 99% rename from lib/assessment/label.py rename to aiproxy/assessment/label.py index b0dd2f1..8476084 100644 --- a/lib/assessment/label.py +++ b/aiproxy/assessment/label.py @@ -7,7 +7,7 @@ import logging from typing import List, Dict, Any -from lib.assessment.config import VALID_LABELS +from .config import VALID_LABELS from io import StringIO diff --git a/lib/assessment/report.py b/aiproxy/assessment/report.py similarity index 99% rename from lib/assessment/report.py rename to aiproxy/assessment/report.py index 5a68c8d..99a6d4b 100644 --- a/lib/assessment/report.py +++ b/aiproxy/assessment/report.py @@ -4,7 +4,7 @@ import json import math from typing import List, Dict, Any -from lib.assessment.config import VALID_LABELS +from .config import VALID_LABELS class Report: def _compute_pass_fail_cell_color(self, actual, predicted, passing_labels): diff --git a/lib/assessment/rubric_tester.py b/aiproxy/assessment/rubric_tester.py similarity index 97% rename from lib/assessment/rubric_tester.py rename to aiproxy/assessment/rubric_tester.py index 4a085e3..4a005db 100644 --- a/lib/assessment/rubric_tester.py +++ b/aiproxy/assessment/rubric_tester.py @@ -1,28 +1,30 @@ -#!/usr/bin/env python - -# Make sure the caller sees a helpful error message if they try to run this script with Python 2 -f"This script requires {'Python 3'}. Please be sure to activate your virtual environment via `source .venv/bin/activate`." +#!/usr/bin/env python3 import argparse +import boto3 +import concurrent.futures import csv import glob -import json -import time -import os -from multiprocessing import Pool -import concurrent.futures import io +import json import logging +import os import pprint -import boto3 import subprocess +import sys +import time -from sklearn.metrics import accuracy_score, confusion_matrix +from multiprocessing import Pool from collections import defaultdict -from lib.assessment.config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS, LESSONS, DEFAULT_DATASET_NAME, DEFAULT_EXPERIMENT_NAME -from lib.assessment.label import Label, InvalidResponseError -from lib.assessment.report import Report +from sklearn.metrics import accuracy_score, confusion_matrix + +from .config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS, LESSONS, DEFAULT_DATASET_NAME, DEFAULT_EXPERIMENT_NAME +from .label import Label, InvalidResponseError +from .report import Report + +if 'OPEN_AI_KEY' not in os.environ: + print("Warning: OPEN_AI_KEY environment variable is not set.", file=sys.stderr) #globals prompt_file = 'system_prompt.txt' diff --git a/bin/rubric_tester b/bin/rubric_tester new file mode 100755 index 0000000..2844bc7 --- /dev/null +++ b/bin/rubric_tester @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +# Set current working dir to ../ +cd "$(dirname "$0")"/.. + +source .venv/bin/activate +python3 -m aiproxy.assessment.rubric_tester "$@" diff --git a/run.py b/run.py new file mode 100755 index 0000000..54f485e --- /dev/null +++ b/run.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 + +from aiproxy.app import create_app + +app = create_app() + +if __name__ == '__main__': + app.run(debug=True) \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ad888f7 --- /dev/null +++ b/setup.py @@ -0,0 +1,14 @@ +from setuptools import setup, find_packages + +setup( + name='aiproxy', + version='0.1', + packages=find_packages(), + install_requires=[line.strip() for line in open('requirements.txt')], + entry_points={ + 'console_scripts': [ + 'rubric_tester=aiproxy.assessment.rubric_tester:main', + 'aiproxy=aiproxy.app:create_app', + ] + }, +) \ No newline at end of file From 7fec2d6a7b5da9c182816f18e87d05f598c3bc8d Mon Sep 17 00:00:00 2001 From: Seth Nickell Date: Mon, 19 Feb 2024 14:17:09 -1000 Subject: [PATCH 2/7] On macOS /bin/env doesn't exist --- bin/assessment-test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/assessment-test.rb b/bin/assessment-test.rb index 2c98e15..3385e7a 100755 --- a/bin/assessment-test.rb +++ b/bin/assessment-test.rb @@ -1,4 +1,4 @@ -#!/bin/env ruby +#!/usr/bin/env ruby require 'net/http' require 'uri' From 3c23c26d5109e7874b60f07b5b538a09b019ef6b Mon Sep 17 00:00:00 2001 From: Seth Nickell Date: Mon, 19 Feb 2024 14:33:37 -1000 Subject: [PATCH 3/7] No longer need to set PYTHONPATH --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 09cde43..eae4cc0 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,6 @@ Install requirements to the virtual environment with pip: Export the following environment variables (or add them once to your shell profile) * `export OPENAI_API_KEY=` -* `export PYTHONPATH=` See rubric tester options with: * `bin/rubric_tester --help` From 7ebca5f51281afb147af8bd1c7d2ee6577ce0c56 Mon Sep 17 00:00:00 2001 From: Seth Nickell Date: Mon, 19 Feb 2024 14:40:18 -1000 Subject: [PATCH 4/7] Newline at end of run.py --- run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.py b/run.py index 54f485e..8c243b0 100755 --- a/run.py +++ b/run.py @@ -5,4 +5,4 @@ app = create_app() if __name__ == '__main__': - app.run(debug=True) \ No newline at end of file + app.run(debug=True) From ed2ce5c25dcbe75d00317c296267951775424909 Mon Sep 17 00:00:00 2001 From: Seth Nickell Date: Mon, 19 Feb 2024 14:40:31 -1000 Subject: [PATCH 5/7] Newline at end of setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ad888f7..e8b7f92 100644 --- a/setup.py +++ b/setup.py @@ -11,4 +11,4 @@ 'aiproxy=aiproxy.app:create_app', ] }, -) \ No newline at end of file +) From 81b53d75d07def4833f874b9e3616d1b16668d06 Mon Sep 17 00:00:00 2001 From: Seth Nickell Date: Thu, 29 Feb 2024 10:35:36 -1000 Subject: [PATCH 6/7] Use new module name in waitress-serve command --- Dockerfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5c2e43b..305e171 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,9 +5,7 @@ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY ./tests /app/tests -COPY ./lib /app/lib -COPY ./src /app/src +COPY . . EXPOSE 80 -CMD ["waitress-serve", "--host=0.0.0.0", "--port=80", "--call", "app:create_app"] +CMD ["waitress-serve", "--host=0.0.0.0", "--port=80", "--call", "aiproxy.app:create_app"] From fe136cb45764fe0454f8f04d195b787219d118f1 Mon Sep 17 00:00:00 2001 From: Seth Nickell Date: Thu, 29 Feb 2024 10:41:06 -1000 Subject: [PATCH 7/7] Update lib. to aiproxy. in tests --- tests/accuracy/test_accuracy.py | 2 +- tests/conftest.py | 2 +- tests/routes/test_assessment_routes.py | 30 ++++++++++----------- tests/unit/assessment/test_assessment.py | 14 +++++----- tests/unit/assessment/test_label.py | 8 +++--- tests/unit/assessment/test_report.py | 2 +- tests/unit/assessment/test_rubric_tester.py | 8 +++--- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/tests/accuracy/test_accuracy.py b/tests/accuracy/test_accuracy.py index 4da7f73..52f9ebb 100644 --- a/tests/accuracy/test_accuracy.py +++ b/tests/accuracy/test_accuracy.py @@ -3,7 +3,7 @@ from unittest import mock -from lib.assessment.rubric_tester import ( +from aiproxy.assessment.rubric_tester import ( main, ) diff --git a/tests/conftest.py b/tests/conftest.py index f459c5f..89d87e5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,7 +4,7 @@ import pytest -from src import create_app +from aiproxy.app import create_app import contextlib import os diff --git a/tests/routes/test_assessment_routes.py b/tests/routes/test_assessment_routes.py index 5fd726e..1a8c590 100644 --- a/tests/routes/test_assessment_routes.py +++ b/tests/routes/test_assessment_routes.py @@ -45,7 +45,7 @@ def test_should_return_400_when_no_rubric(self, client, randomstring): assert response.status_code == 400 def test_should_return_400_on_openai_error(self, mocker, client, randomstring): - mocker.patch('lib.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '') + mocker.patch('aiproxy.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '') response = client.post('/assessment', data={ "code": randomstring(10), "prompt": randomstring(10), @@ -88,7 +88,7 @@ def test_should_return_400_when_passing_not_a_number_to_temperature(self, client assert response.status_code == 400 def test_should_return_400_when_the_label_function_does_not_return_data(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') label_mock.return_value = [] response = client.post('/assessment', data={ @@ -106,7 +106,7 @@ def test_should_return_400_when_the_label_function_does_not_return_data(self, mo assert response.status_code == 400 def test_should_return_400_when_the_label_function_does_not_return_the_right_structure(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') label_mock.return_value = { 'metadata': {}, 'data': {} @@ -127,7 +127,7 @@ def test_should_return_400_when_the_label_function_does_not_return_the_right_str assert response.status_code == 400 def test_should_pass_arguments_to_label_function(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') data = { "code": randomstring(10), "prompt": randomstring(10), @@ -155,7 +155,7 @@ def test_should_pass_arguments_to_label_function(self, mocker, client, randomstr ) def test_should_return_the_result_from_label_function_when_valid(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') label_mock.return_value = { 'metadata': {}, 'data': [ @@ -190,7 +190,7 @@ class TestPostTestAssessment: """ def test_should_return_400_on_openai_error(self, mocker, client, randomstring): - mocker.patch('lib.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '') + mocker.patch('aiproxy.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) response = client.post('/test/assessment', data={ @@ -236,7 +236,7 @@ def test_should_return_400_when_passing_not_a_number_to_temperature(self, mocker assert response.status_code == 400 def test_should_return_400_when_the_label_function_does_not_return_data(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) label_mock.return_value = [] @@ -255,7 +255,7 @@ def test_should_return_400_when_the_label_function_does_not_return_data(self, mo assert response.status_code == 400 def test_should_return_400_when_the_label_function_does_not_return_the_right_structure(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) label_mock.return_value = { @@ -277,7 +277,7 @@ def test_should_return_400_when_the_label_function_does_not_return_the_right_str assert response.status_code == 400 def test_should_pass_arguments_to_label_function(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) data = { @@ -305,7 +305,7 @@ def test_should_pass_arguments_to_label_function(self, mocker, client, randomstr ) def test_should_return_the_result_from_label_function_when_valid(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) label_mock.return_value = { @@ -341,7 +341,7 @@ class TestPostBlankAssessment: """ def test_should_return_400_on_openai_error(self, mocker, client, randomstring): - mocker.patch('lib.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '') + mocker.patch('aiproxy.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) response = client.post('/test/assessment/blank', data={ @@ -384,7 +384,7 @@ def test_should_return_400_when_passing_not_a_number_to_temperature(self, mocker assert response.status_code == 400 def test_should_return_400_when_the_label_function_does_not_return_data(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) label_mock.return_value = [] @@ -402,7 +402,7 @@ def test_should_return_400_when_the_label_function_does_not_return_data(self, mo assert response.status_code == 400 def test_should_return_400_when_the_label_function_does_not_return_the_right_structure(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) label_mock.return_value = { @@ -423,7 +423,7 @@ def test_should_return_400_when_the_label_function_does_not_return_the_right_str assert response.status_code == 400 def test_should_pass_arguments_including_blank_code_to_label_function(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) data = { @@ -450,7 +450,7 @@ def test_should_pass_arguments_including_blank_code_to_label_function(self, mock ) def test_should_return_the_result_from_label_function_when_valid(self, mocker, client, randomstring): - label_mock = mocker.patch('lib.assessment.assess.label') + label_mock = mocker.patch('aiproxy.assessment.assess.label') mock_open = mocker.mock_open(read_data='file data') mock_file = mocker.patch('builtins.open', mock_open) label_mock.return_value = { diff --git a/tests/unit/assessment/test_assessment.py b/tests/unit/assessment/test_assessment.py index c2cc7df..69704f6 100644 --- a/tests/unit/assessment/test_assessment.py +++ b/tests/unit/assessment/test_assessment.py @@ -2,14 +2,14 @@ import pytest -from lib.assessment.label import Label -from lib.assessment.assess import label, KeyConceptError +from aiproxy.assessment.label import Label +from aiproxy.assessment.assess import label, KeyConceptError def test_label_should_pass_arguments_along( mocker, code, prompt, rubric, examples, openai_api_key, llm_model, num_responses, temperature, remove_comments): - """ Tests lib.assessment.assess.label() + """ Tests aiproxy.assessment.assess.label() """ # import test data @@ -52,7 +52,7 @@ def test_label_should_pass_arguments_along( def test_label_should_set_api_key_in_env_var( mocker, code, prompt, rubric, examples, openai_api_key, llm_model, num_responses, temperature, remove_comments): - """ Tests lib.assessment.assess.label() + """ Tests aiproxy.assessment.assess.label() """ # Mock the Label() class @@ -75,7 +75,7 @@ def test_label_should_set_api_key_in_env_var( def test_label_should_return_empty_result_when_no_api_key( mocker, code, prompt, rubric, examples, llm_model, num_responses, temperature, remove_comments): - """ Tests lib.assessment.assess.label() (without an api-key) + """ Tests aiproxy.assessment.assess.label() (without an api-key) """ # Mock the Label() class @@ -97,7 +97,7 @@ def test_label_should_return_empty_result_when_no_api_key( def test_label_should_return_empty_result_when_example_and_rubric_key_concepts_mismatch( mocker, code, prompt, rubric, examples, openai_api_key, llm_model, num_responses, temperature, remove_comments): - """ Tests lib.assessment.assess.label() (without an api-key) + """ Tests aiproxy.assessment.assess.label() (without an api-key) """ # Mock the Label() class label_student_work = mocker.patch.object(Label, 'label_student_work') @@ -122,7 +122,7 @@ def test_label_should_return_empty_result_when_example_and_rubric_key_concepts_m def test_label_should_call_label_student_work_with_api_key_in_env_var( mocker, code, prompt, rubric, examples, openai_api_key, llm_model, num_responses, temperature, remove_comments): - """ Tests lib.assessment.assess.label() (without an api-key) + """ Tests aiproxy.assessment.assess.label() (without an api-key) """ # Set the environment variable diff --git a/tests/unit/assessment/test_label.py b/tests/unit/assessment/test_label.py index d3a66fc..a99b0ea 100644 --- a/tests/unit/assessment/test_label.py +++ b/tests/unit/assessment/test_label.py @@ -9,7 +9,7 @@ import requests import pytest -from lib.assessment.label import Label, InvalidResponseError +from aiproxy.assessment.label import Label, InvalidResponseError @pytest.fixture @@ -439,7 +439,7 @@ def test_should_pass_arguments_to_openai(self, requests_mock, mocker, openai_gpt assert requests_mock.last_request.json()['messages'] == messages def test_should_raise_timeout(self, mocker, label, prompt, rubric, code, student_id, examples, num_responses, temperature, llm_model): - mocker.patch('lib.assessment.label.requests.post', side_effect = requests.exceptions.ReadTimeout()) + mocker.patch('aiproxy.assessment.label.requests.post', side_effect = requests.exceptions.ReadTimeout()) # Mock out compute_messages compute_messages = mocker.patch.object(Label, 'compute_messages') @@ -530,7 +530,7 @@ def test_should_open_cached_responses_when_asked_and_they_exist(self, mocker, la mock_file = mocker.patch('builtins.open', mock_open) # Mock the file exists - exists_mock = mocker.patch('lib.assessment.label.os.path.exists', return_value=True) + exists_mock = mocker.patch('aiproxy.assessment.label.os.path.exists', return_value=True) result = label.label_student_work( prompt, rubric, code, student_id, @@ -555,7 +555,7 @@ def test_should_write_cached_responses_when_asked(self, mocker, label, assessmen mock_file = mocker.patch('builtins.open', mock_open) # Mock the file so it does not exist - exists_mock = mocker.patch('lib.assessment.label.os.path.exists', return_value=False) + exists_mock = mocker.patch('aiproxy.assessment.label.os.path.exists', return_value=False) # Get mocks statically_label_student_work_mock = mocker.patch.object( diff --git a/tests/unit/assessment/test_report.py b/tests/unit/assessment/test_report.py index b4079b9..1ef6f0b 100644 --- a/tests/unit/assessment/test_report.py +++ b/tests/unit/assessment/test_report.py @@ -4,7 +4,7 @@ import pytest import random -from lib.assessment.report import Report +from aiproxy.assessment.report import Report @pytest.fixture diff --git a/tests/unit/assessment/test_rubric_tester.py b/tests/unit/assessment/test_rubric_tester.py index 321df79..692d363 100644 --- a/tests/unit/assessment/test_rubric_tester.py +++ b/tests/unit/assessment/test_rubric_tester.py @@ -5,7 +5,7 @@ from unittest import mock from types import SimpleNamespace -from lib.assessment.rubric_tester import ( +from aiproxy.assessment.rubric_tester import ( read_and_label_student_work, get_passing_labels, read_inputs, @@ -19,7 +19,7 @@ get_examples, ) -from lib.assessment.label import Label, InvalidResponseError +from aiproxy.assessment.label import Label, InvalidResponseError class TestReadAndLabelStudentWork: @@ -321,8 +321,8 @@ class TestMain: class TestInit: def test_should_call_main_when_running_by_itself(self, mocker): - main_mock = mocker.patch('lib.assessment.rubric_tester.main') - mocker.patch('lib.assessment.rubric_tester.__name__', '__main__') + main_mock = mocker.patch('aiproxy.assessment.rubric_tester.main') + mocker.patch('aiproxy.assessment.rubric_tester.__name__', '__main__') init()