From 8890c37ae05d0437cf2c5e8d606bca38db129370 Mon Sep 17 00:00:00 2001
From: Seth Nickell <snickell@gmail.com>
Date: Mon, 19 Feb 2024 14:16:53 -1000
Subject: [PATCH 1/7] Add a setup.py

---
 Dockerfile                                   |  2 +-
 README.md                                    |  8 +++---
 {lib => aiproxy}/__init__.py                 |  0
 {src => aiproxy/app}/__init__.py             |  6 ++--
 {src => aiproxy/app}/assessment.py           |  9 +++---
 {src => aiproxy/app}/openai.py               |  0
 {src => aiproxy/app}/test.py                 |  0
 {lib => aiproxy}/assessment/__init__.py      |  0
 {lib => aiproxy}/assessment/assess.py        |  4 +--
 {lib => aiproxy}/assessment/config.py        |  0
 {lib => aiproxy}/assessment/label.py         |  2 +-
 {lib => aiproxy}/assessment/report.py        |  2 +-
 {lib => aiproxy}/assessment/rubric_tester.py | 30 +++++++++++---------
 bin/rubric_tester                            |  7 +++++
 run.py                                       |  8 ++++++
 setup.py                                     | 14 +++++++++
 16 files changed, 62 insertions(+), 30 deletions(-)
 rename {lib => aiproxy}/__init__.py (100%)
 rename {src => aiproxy/app}/__init__.py (92%)
 rename {src => aiproxy/app}/assessment.py (96%)
 rename {src => aiproxy/app}/openai.py (100%)
 rename {src => aiproxy/app}/test.py (100%)
 rename {lib => aiproxy}/assessment/__init__.py (100%)
 rename {lib => aiproxy}/assessment/assess.py (93%)
 rename {lib => aiproxy}/assessment/config.py (100%)
 rename {lib => aiproxy}/assessment/label.py (99%)
 rename {lib => aiproxy}/assessment/report.py (99%)
 rename {lib => aiproxy}/assessment/rubric_tester.py (97%)
 create mode 100755 bin/rubric_tester
 create mode 100755 run.py
 create mode 100644 setup.py
diff --git a/Dockerfile b/Dockerfile
index adbbc2f..5c2e43b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,4 +10,4 @@ COPY ./lib /app/lib
 COPY ./src /app/src
 
 EXPOSE 80
-CMD ["waitress-serve", "--host=0.0.0.0", "--port=80", "--call", "src:create_app"]
+CMD ["waitress-serve", "--host=0.0.0.0", "--port=80", "--call", "app:create_app"]
diff --git a/README.md b/README.md
index 2fdb022..09cde43 100644
--- a/README.md
+++ b/README.md
@@ -122,7 +122,7 @@ Export the following environment variables (or add them once to your shell profi
 * `export PYTHONPATH=<path to aiproxy root>`
 
 See rubric tester options with:
-* `python lib/assessment/rubric_tester.py --help`
+* `bin/rubric_tester --help`
 
 ### example usage
 
@@ -132,7 +132,7 @@ GPT 3.5 Turbo is the default because a complete test run with that model costs o
 
 A recommended first run is to use default experiment and dataset, limited to 1 lesson:
 ```
-(.venv) Dave-MBP:~/src/aiproxy (rt-recover-from-bad-llm-responses)$ python ./lib/assessment/rubric_tester.py --lesson-names csd3-2023-L11 
+(.venv) Dave-MBP:~/src/aiproxy (rt-recover-from-bad-llm-responses)$ bin/rubric_tester --lesson-names csd3-2023-L11 
 2024-02-13 20:15:30,127: INFO: Evaluating lesson csd3-2023-L11 for dataset contractor-grades-batch-1-fall-2023 and experiment ai-rubrics-pilot-gpt-3.5-turbo...
 ```
 
@@ -150,7 +150,7 @@ The report that gets generated will contain a count of how many errors there wer
 In order to rerun only the failed student projects, you can pass the `-c` (`--use-cached`) option:
 
 ```commandline
-(.venv) Dave-MBP:~/src/aiproxy (rt-recover-from-bad-llm-responses)$ python ./lib/assessment/rubric_tester.py --lesson-names csd3-2023-L11 -c
+(.venv) Dave-MBP:~/src/aiproxy (rt-recover-from-bad-llm-responses)$ bin/rubric_tester --lesson-names csd3-2023-L11 -c
 ```
 
 ![Screenshot 2024-02-13 at 8 24 31 PM](https://github.com/code-dot-org/aiproxy/assets/8001765/ff560302-94b9-4966-a5d6-7d9a9fa54892)
@@ -163,7 +163,7 @@ After enough reruns, you'll have a complete accuracy measurement for the lesson.
 
 experiments run against GPT 4, GPT 4 Turbo and other pricey models should include report html and cached response data. this allows you to quickly view reports for these datasets either by looking directly at the `output/report*html` files or by regenerating the report against cached data via a command like:
 ```commandline
-python ./lib/assessment/rubric_tester.py --experiment-name ai-rubrics-pilot-baseline-gpt-4-turbo --use-cached
+bin/rubric_tester --experiment-name ai-rubrics-pilot-baseline-gpt-4-turbo --use-cached
 ```
 
 #### smaller test runs
diff --git a/lib/__init__.py b/aiproxy/__init__.py
similarity index 100%
rename from lib/__init__.py
rename to aiproxy/__init__.py
diff --git a/src/__init__.py b/aiproxy/app/__init__.py
similarity index 92%
rename from src/__init__.py
rename to aiproxy/app/__init__.py
index 0dafb2a..99b16ce 100644
--- a/src/__init__.py
+++ b/aiproxy/app/__init__.py
@@ -5,9 +5,9 @@
 import logging
 
 # Our modules
-from src.test import test_routes
-from src.openai import openai_routes
-from src.assessment import assessment_routes
+from .test import test_routes
+from .openai import openai_routes
+from .assessment import assessment_routes
 
 # Flask
 from flask import Flask
diff --git a/src/assessment.py b/aiproxy/app/assessment.py
similarity index 96%
rename from src/assessment.py
rename to aiproxy/app/assessment.py
index 6735d18..b827454 100644
--- a/src/assessment.py
+++ b/aiproxy/app/assessment.py
@@ -7,12 +7,13 @@
 import openai
 import json
 
-from lib.assessment.config import DEFAULT_MODEL
+from aiproxy.assessment.config import DEFAULT_MODEL
 
 # Our assessment code
-from lib.assessment import assess
-from lib.assessment.assess import KeyConceptError
-from lib.assessment.label import InvalidResponseError
+from aiproxy.assessment import assess
+from aiproxy.assessment import assess
+from aiproxy.assessment.assess import KeyConceptError
+from aiproxy.assessment.label import InvalidResponseError
 
 assessment_routes = Blueprint('assessment_routes', __name__)
 
diff --git a/src/openai.py b/aiproxy/app/openai.py
similarity index 100%
rename from src/openai.py
rename to aiproxy/app/openai.py
diff --git a/src/test.py b/aiproxy/app/test.py
similarity index 100%
rename from src/test.py
rename to aiproxy/app/test.py
diff --git a/lib/assessment/__init__.py b/aiproxy/assessment/__init__.py
similarity index 100%
rename from lib/assessment/__init__.py
rename to aiproxy/assessment/__init__.py
diff --git a/lib/assessment/assess.py b/aiproxy/assessment/assess.py
similarity index 93%
rename from lib/assessment/assess.py
rename to aiproxy/assessment/assess.py
index 2909ced..ad93bb0 100644
--- a/lib/assessment/assess.py
+++ b/aiproxy/assessment/assess.py
@@ -7,8 +7,8 @@
 import logging
 
 # Import our support classes
-from lib.assessment.config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS
-from lib.assessment.label import Label
+from .config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS
+from .label import Label
 
 class KeyConceptError(Exception):
   pass
diff --git a/lib/assessment/config.py b/aiproxy/assessment/config.py
similarity index 100%
rename from lib/assessment/config.py
rename to aiproxy/assessment/config.py
diff --git a/lib/assessment/label.py b/aiproxy/assessment/label.py
similarity index 99%
rename from lib/assessment/label.py
rename to aiproxy/assessment/label.py
index b0dd2f1..8476084 100644
--- a/lib/assessment/label.py
+++ b/aiproxy/assessment/label.py
@@ -7,7 +7,7 @@
 import logging
 
 from typing import List, Dict, Any
-from lib.assessment.config import VALID_LABELS
+from .config import VALID_LABELS
 
 from io import StringIO
 
diff --git a/lib/assessment/report.py b/aiproxy/assessment/report.py
similarity index 99%
rename from lib/assessment/report.py
rename to aiproxy/assessment/report.py
index 5a68c8d..99a6d4b 100644
--- a/lib/assessment/report.py
+++ b/aiproxy/assessment/report.py
@@ -4,7 +4,7 @@
 import json
 import math
 from typing import List, Dict, Any
-from lib.assessment.config import VALID_LABELS
+from .config import VALID_LABELS
 
 class Report:
     def _compute_pass_fail_cell_color(self, actual, predicted, passing_labels):
diff --git a/lib/assessment/rubric_tester.py b/aiproxy/assessment/rubric_tester.py
similarity index 97%
rename from lib/assessment/rubric_tester.py
rename to aiproxy/assessment/rubric_tester.py
index 4a085e3..4a005db 100644
--- a/lib/assessment/rubric_tester.py
+++ b/aiproxy/assessment/rubric_tester.py
@@ -1,28 +1,30 @@
-#!/usr/bin/env python
-
-# Make sure the caller sees a helpful error message if they try to run this script with Python 2
-f"This script requires {'Python 3'}. Please be sure to activate your virtual environment via `source .venv/bin/activate`."
+#!/usr/bin/env python3
 
 import argparse
+import boto3
+import concurrent.futures
 import csv
 import glob
-import json
-import time
-import os
-from multiprocessing import Pool
-import concurrent.futures
 import io
+import json
 import logging
+import os
 import pprint
-import boto3
 import subprocess
+import sys
+import time
 
-from sklearn.metrics import accuracy_score, confusion_matrix
+from multiprocessing import Pool
 from collections import defaultdict
 
-from lib.assessment.config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS, LESSONS, DEFAULT_DATASET_NAME, DEFAULT_EXPERIMENT_NAME
-from lib.assessment.label import Label, InvalidResponseError
-from lib.assessment.report import Report
+from sklearn.metrics import accuracy_score, confusion_matrix
+
+from .config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS, LESSONS, DEFAULT_DATASET_NAME, DEFAULT_EXPERIMENT_NAME
+from .label import Label, InvalidResponseError
+from .report import Report
+
+if 'OPEN_AI_KEY' not in os.environ:
+    print("Warning: OPEN_AI_KEY environment variable is not set.", file=sys.stderr)
 
 #globals
 prompt_file = 'system_prompt.txt'
diff --git a/bin/rubric_tester b/bin/rubric_tester
new file mode 100755
index 0000000..2844bc7
--- /dev/null
+++ b/bin/rubric_tester
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# Set current working dir to ../
+cd "$(dirname "$0")"/..
+
+source .venv/bin/activate
+python3 -m aiproxy.assessment.rubric_tester "$@"
diff --git a/run.py b/run.py
new file mode 100755
index 0000000..54f485e
--- /dev/null
+++ b/run.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+
+from aiproxy.app import create_app
+
+app = create_app()
+
+if __name__ == '__main__':
+    app.run(debug=True)
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..ad888f7
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,14 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='aiproxy',
+    version='0.1',
+    packages=find_packages(),
+    install_requires=[line.strip() for line in open('requirements.txt')],
+    entry_points={
+        'console_scripts': [
+          'rubric_tester=aiproxy.assessment.rubric_tester:main',
+          'aiproxy=aiproxy.app:create_app',
+        ]
+    },
+)
\ No newline at end of file

From 7fec2d6a7b5da9c182816f18e87d05f598c3bc8d Mon Sep 17 00:00:00 2001
From: Seth Nickell <snickell@gmail.com>
Date: Mon, 19 Feb 2024 14:17:09 -1000
Subject: [PATCH 2/7] On macOS /bin/env doesn't exist

---
 bin/assessment-test.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/assessment-test.rb b/bin/assessment-test.rb
index 2c98e15..3385e7a 100755
--- a/bin/assessment-test.rb
+++ b/bin/assessment-test.rb
@@ -1,4 +1,4 @@
-#!/bin/env ruby
+#!/usr/bin/env ruby
 
 require 'net/http'
 require 'uri'

From 3c23c26d5109e7874b60f07b5b538a09b019ef6b Mon Sep 17 00:00:00 2001
From: Seth Nickell <snickell@gmail.com>
Date: Mon, 19 Feb 2024 14:33:37 -1000
Subject: [PATCH 3/7] No longer need to set PYTHONPATH

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 09cde43..eae4cc0 100644
--- a/README.md
+++ b/README.md
@@ -119,7 +119,6 @@ Install requirements to the virtual environment with pip:
 
 Export the following environment variables (or add them once to your shell profile)
 * `export OPENAI_API_KEY=<your API key>`
-* `export PYTHONPATH=<path to aiproxy root>`
 
 See rubric tester options with:
 * `bin/rubric_tester --help`

From 7ebca5f51281afb147af8bd1c7d2ee6577ce0c56 Mon Sep 17 00:00:00 2001
From: Seth Nickell <snickell@gmail.com>
Date: Mon, 19 Feb 2024 14:40:18 -1000
Subject: [PATCH 4/7] Newline at end of run.py

---
 run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run.py b/run.py
index 54f485e..8c243b0 100755
--- a/run.py
+++ b/run.py
@@ -5,4 +5,4 @@
 app = create_app()
 
 if __name__ == '__main__':
-    app.run(debug=True)
\ No newline at end of file
+    app.run(debug=True)

From ed2ce5c25dcbe75d00317c296267951775424909 Mon Sep 17 00:00:00 2001
From: Seth Nickell <snickell@gmail.com>
Date: Mon, 19 Feb 2024 14:40:31 -1000
Subject: [PATCH 5/7] Newline at end of setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ad888f7..e8b7f92 100644
--- a/setup.py
+++ b/setup.py
@@ -11,4 +11,4 @@
           'aiproxy=aiproxy.app:create_app',
         ]
     },
-)
\ No newline at end of file
+)

From 81b53d75d07def4833f874b9e3616d1b16668d06 Mon Sep 17 00:00:00 2001
From: Seth Nickell <snickell@gmail.com>
Date: Thu, 29 Feb 2024 10:35:36 -1000
Subject: [PATCH 6/7] Use new module name in waitress-serve command

---
 Dockerfile | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 5c2e43b..305e171 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,9 +5,7 @@ COPY requirements.txt .
 
 RUN pip install --no-cache-dir -r requirements.txt
 
-COPY ./tests /app/tests
-COPY ./lib /app/lib
-COPY ./src /app/src
+COPY . .
 
 EXPOSE 80
-CMD ["waitress-serve", "--host=0.0.0.0", "--port=80", "--call", "app:create_app"]
+CMD ["waitress-serve", "--host=0.0.0.0", "--port=80", "--call", "aiproxy.app:create_app"]

From fe136cb45764fe0454f8f04d195b787219d118f1 Mon Sep 17 00:00:00 2001
From: Seth Nickell <snickell@gmail.com>
Date: Thu, 29 Feb 2024 10:41:06 -1000
Subject: [PATCH 7/7] Update lib. to aiproxy. in tests

---
 tests/accuracy/test_accuracy.py             |  2 +-
 tests/conftest.py                           |  2 +-
 tests/routes/test_assessment_routes.py      | 30 ++++++++++-----------
 tests/unit/assessment/test_assessment.py    | 14 +++++-----
 tests/unit/assessment/test_label.py         |  8 +++---
 tests/unit/assessment/test_report.py        |  2 +-
 tests/unit/assessment/test_rubric_tester.py |  8 +++---
 7 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/tests/accuracy/test_accuracy.py b/tests/accuracy/test_accuracy.py
index 4da7f73..52f9ebb 100644
--- a/tests/accuracy/test_accuracy.py
+++ b/tests/accuracy/test_accuracy.py
@@ -3,7 +3,7 @@
 
 from unittest import mock
 
-from lib.assessment.rubric_tester import (
+from aiproxy.assessment.rubric_tester import (
     main,
 )
 
diff --git a/tests/conftest.py b/tests/conftest.py
index f459c5f..89d87e5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from src import create_app
+from aiproxy.app import create_app
 
 import contextlib
 import os
diff --git a/tests/routes/test_assessment_routes.py b/tests/routes/test_assessment_routes.py
index 5fd726e..1a8c590 100644
--- a/tests/routes/test_assessment_routes.py
+++ b/tests/routes/test_assessment_routes.py
@@ -45,7 +45,7 @@ def test_should_return_400_when_no_rubric(self, client, randomstring):
         assert response.status_code == 400
 
     def test_should_return_400_on_openai_error(self, mocker, client, randomstring):
-        mocker.patch('lib.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '')
+        mocker.patch('aiproxy.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '')
         response = client.post('/assessment', data={
           "code": randomstring(10),
           "prompt": randomstring(10),
@@ -88,7 +88,7 @@ def test_should_return_400_when_passing_not_a_number_to_temperature(self, client
         assert response.status_code == 400
 
     def test_should_return_400_when_the_label_function_does_not_return_data(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         label_mock.return_value = []
 
         response = client.post('/assessment', data={
@@ -106,7 +106,7 @@ def test_should_return_400_when_the_label_function_does_not_return_data(self, mo
         assert response.status_code == 400
 
     def test_should_return_400_when_the_label_function_does_not_return_the_right_structure(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         label_mock.return_value = {
             'metadata': {},
             'data': {}
@@ -127,7 +127,7 @@ def test_should_return_400_when_the_label_function_does_not_return_the_right_str
         assert response.status_code == 400
 
     def test_should_pass_arguments_to_label_function(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         data = {
           "code": randomstring(10),
           "prompt": randomstring(10),
@@ -155,7 +155,7 @@ def test_should_pass_arguments_to_label_function(self, mocker, client, randomstr
         )
 
     def test_should_return_the_result_from_label_function_when_valid(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         label_mock.return_value = {
             'metadata': {},
             'data': [
@@ -190,7 +190,7 @@ class TestPostTestAssessment:
     """
 
     def test_should_return_400_on_openai_error(self, mocker, client, randomstring):
-        mocker.patch('lib.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '')
+        mocker.patch('aiproxy.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         response = client.post('/test/assessment', data={
@@ -236,7 +236,7 @@ def test_should_return_400_when_passing_not_a_number_to_temperature(self, mocker
         assert response.status_code == 400
 
     def test_should_return_400_when_the_label_function_does_not_return_data(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         label_mock.return_value = []
@@ -255,7 +255,7 @@ def test_should_return_400_when_the_label_function_does_not_return_data(self, mo
         assert response.status_code == 400
 
     def test_should_return_400_when_the_label_function_does_not_return_the_right_structure(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         label_mock.return_value = {
@@ -277,7 +277,7 @@ def test_should_return_400_when_the_label_function_does_not_return_the_right_str
         assert response.status_code == 400
 
     def test_should_pass_arguments_to_label_function(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         data = {
@@ -305,7 +305,7 @@ def test_should_pass_arguments_to_label_function(self, mocker, client, randomstr
         )
 
     def test_should_return_the_result_from_label_function_when_valid(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         label_mock.return_value = {
@@ -341,7 +341,7 @@ class TestPostBlankAssessment:
     """
 
     def test_should_return_400_on_openai_error(self, mocker, client, randomstring):
-        mocker.patch('lib.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '')
+        mocker.patch('aiproxy.assessment.assess.label').side_effect = openai.error.InvalidRequestError('', '')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         response = client.post('/test/assessment/blank', data={
@@ -384,7 +384,7 @@ def test_should_return_400_when_passing_not_a_number_to_temperature(self, mocker
         assert response.status_code == 400
 
     def test_should_return_400_when_the_label_function_does_not_return_data(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         label_mock.return_value = []
@@ -402,7 +402,7 @@ def test_should_return_400_when_the_label_function_does_not_return_data(self, mo
         assert response.status_code == 400
 
     def test_should_return_400_when_the_label_function_does_not_return_the_right_structure(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         label_mock.return_value = {
@@ -423,7 +423,7 @@ def test_should_return_400_when_the_label_function_does_not_return_the_right_str
         assert response.status_code == 400
 
     def test_should_pass_arguments_including_blank_code_to_label_function(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         data = {
@@ -450,7 +450,7 @@ def test_should_pass_arguments_including_blank_code_to_label_function(self, mock
         )
 
     def test_should_return_the_result_from_label_function_when_valid(self, mocker, client, randomstring):
-        label_mock = mocker.patch('lib.assessment.assess.label')
+        label_mock = mocker.patch('aiproxy.assessment.assess.label')
         mock_open = mocker.mock_open(read_data='file data')
         mock_file = mocker.patch('builtins.open', mock_open)
         label_mock.return_value = {
diff --git a/tests/unit/assessment/test_assessment.py b/tests/unit/assessment/test_assessment.py
index c2cc7df..69704f6 100644
--- a/tests/unit/assessment/test_assessment.py
+++ b/tests/unit/assessment/test_assessment.py
@@ -2,14 +2,14 @@
 
 import pytest
 
-from lib.assessment.label import Label
-from lib.assessment.assess import label, KeyConceptError
+from aiproxy.assessment.label import Label
+from aiproxy.assessment.assess import label, KeyConceptError
 
 
 def test_label_should_pass_arguments_along(
         mocker, code, prompt, rubric, examples, openai_api_key,
         llm_model, num_responses, temperature, remove_comments):
-    """ Tests lib.assessment.assess.label()
+    """ Tests aiproxy.assessment.assess.label()
     """
 
     # import test data
@@ -52,7 +52,7 @@ def test_label_should_pass_arguments_along(
 def test_label_should_set_api_key_in_env_var(
         mocker, code, prompt, rubric, examples, openai_api_key,
         llm_model, num_responses, temperature, remove_comments):
-    """ Tests lib.assessment.assess.label()
+    """ Tests aiproxy.assessment.assess.label()
     """
 
     # Mock the Label() class
@@ -75,7 +75,7 @@ def test_label_should_set_api_key_in_env_var(
 def test_label_should_return_empty_result_when_no_api_key(
         mocker, code, prompt, rubric, examples,
         llm_model, num_responses, temperature, remove_comments):
-    """ Tests lib.assessment.assess.label() (without an api-key)
+    """ Tests aiproxy.assessment.assess.label() (without an api-key)
     """
 
     # Mock the Label() class
@@ -97,7 +97,7 @@ def test_label_should_return_empty_result_when_no_api_key(
 def test_label_should_return_empty_result_when_example_and_rubric_key_concepts_mismatch(
         mocker, code, prompt, rubric, examples, openai_api_key,
         llm_model, num_responses, temperature, remove_comments):
-    """ Tests lib.assessment.assess.label() (without an api-key)
+    """ Tests aiproxy.assessment.assess.label() (without an api-key)
     """
     # Mock the Label() class
     label_student_work = mocker.patch.object(Label, 'label_student_work')
@@ -122,7 +122,7 @@ def test_label_should_return_empty_result_when_example_and_rubric_key_concepts_m
 def test_label_should_call_label_student_work_with_api_key_in_env_var(
         mocker, code, prompt, rubric, examples, openai_api_key,
         llm_model, num_responses, temperature, remove_comments):
-    """ Tests lib.assessment.assess.label() (without an api-key)
+    """ Tests aiproxy.assessment.assess.label() (without an api-key)
     """
 
     # Set the environment variable
diff --git a/tests/unit/assessment/test_label.py b/tests/unit/assessment/test_label.py
index d3a66fc..a99b0ea 100644
--- a/tests/unit/assessment/test_label.py
+++ b/tests/unit/assessment/test_label.py
@@ -9,7 +9,7 @@
 import requests
 import pytest
 
-from lib.assessment.label import Label, InvalidResponseError
+from aiproxy.assessment.label import Label, InvalidResponseError
 
 
 @pytest.fixture
@@ -439,7 +439,7 @@ def test_should_pass_arguments_to_openai(self, requests_mock, mocker, openai_gpt
         assert requests_mock.last_request.json()['messages'] == messages
 
     def test_should_raise_timeout(self, mocker, label, prompt, rubric, code, student_id, examples, num_responses, temperature, llm_model):
-        mocker.patch('lib.assessment.label.requests.post', side_effect = requests.exceptions.ReadTimeout())
+        mocker.patch('aiproxy.assessment.label.requests.post', side_effect = requests.exceptions.ReadTimeout())
 
         # Mock out compute_messages
         compute_messages = mocker.patch.object(Label, 'compute_messages')
@@ -530,7 +530,7 @@ def test_should_open_cached_responses_when_asked_and_they_exist(self, mocker, la
         mock_file = mocker.patch('builtins.open', mock_open)
 
         # Mock the file exists
-        exists_mock = mocker.patch('lib.assessment.label.os.path.exists', return_value=True)
+        exists_mock = mocker.patch('aiproxy.assessment.label.os.path.exists', return_value=True)
 
         result = label.label_student_work(
             prompt, rubric, code, student_id,
@@ -555,7 +555,7 @@ def test_should_write_cached_responses_when_asked(self, mocker, label, assessmen
         mock_file = mocker.patch('builtins.open', mock_open)
 
         # Mock the file so it does not exist
-        exists_mock = mocker.patch('lib.assessment.label.os.path.exists', return_value=False)
+        exists_mock = mocker.patch('aiproxy.assessment.label.os.path.exists', return_value=False)
 
         # Get mocks
         statically_label_student_work_mock = mocker.patch.object(
diff --git a/tests/unit/assessment/test_report.py b/tests/unit/assessment/test_report.py
index b4079b9..1ef6f0b 100644
--- a/tests/unit/assessment/test_report.py
+++ b/tests/unit/assessment/test_report.py
@@ -4,7 +4,7 @@
 import pytest
 import random
 
-from lib.assessment.report import Report
+from aiproxy.assessment.report import Report
 
 
 @pytest.fixture
diff --git a/tests/unit/assessment/test_rubric_tester.py b/tests/unit/assessment/test_rubric_tester.py
index 321df79..692d363 100644
--- a/tests/unit/assessment/test_rubric_tester.py
+++ b/tests/unit/assessment/test_rubric_tester.py
@@ -5,7 +5,7 @@
 from unittest import mock
 from types import SimpleNamespace
 
-from lib.assessment.rubric_tester import (
+from aiproxy.assessment.rubric_tester import (
     read_and_label_student_work,
     get_passing_labels,
     read_inputs,
@@ -19,7 +19,7 @@
     get_examples,
 )
 
-from lib.assessment.label import Label, InvalidResponseError
+from aiproxy.assessment.label import Label, InvalidResponseError
 
 
 class TestReadAndLabelStudentWork:
@@ -321,8 +321,8 @@ class TestMain:
 
 class TestInit:
     def test_should_call_main_when_running_by_itself(self, mocker):
-        main_mock = mocker.patch('lib.assessment.rubric_tester.main')
-        mocker.patch('lib.assessment.rubric_tester.__name__', '__main__')
+        main_mock = mocker.patch('aiproxy.assessment.rubric_tester.main')
+        mocker.patch('aiproxy.assessment.rubric_tester.__name__', '__main__')
 
         init()