code-dot-org · wilkie · Sep 9, 2024
diff --git a/bin/prepare.rb b/bin/prepare.rb
@@ -34,7 +34,7 @@
     project_link = row.first.last
 
     # Get the project id
-    project_link = project_link.match(/\/([^\/]+)\/view/)[1]
+    project_link = project_link.match(/\/([^\/]+)(?:\/view|\/?$)/)[1]
     tokens << project_link
     new_row = [project_link]
 

diff --git a/lib/assessment/report.py b/lib/assessment/report.py
@@ -1,8 +1,12 @@
-import os
 import csv
+import hashlib
 import io
 import json
 import math
+import os
+import re
+from datetime import datetime
+from itertools import product, chain
 from typing import List, Dict, Any
 from lib.assessment.config import VALID_LABELS, PASSING_LABELS
 
@@ -83,6 +87,132 @@ def _generate_confusion_table(self, confusion_matrix, labels):
         confusion_table += '</table>'
         return confusion_table
 
+    def _label_to_number(self, label):
+        if 'Extensive' in label:
+            return 3
+        elif 'Convincing' in label:
+            return 2
+        elif 'Limited' in label:
+            return 1
+
+        return 0
+
+    def _key_concept_to_filename(self, key_concept):
+        ret = re.sub(r'[^\w\s]', '', key_concept)
+        ret = re.sub(r'\s+', '-', ret)
+        ret = re.sub(r'^-+|-+$', '', ret)
+        return ret.lower()
+
+    def generate_csv_output(self, output_path, prompt, rubric, accuracy=None, predicted_labels=None, actual_labels=None, is_pass_fail=False, accuracy_by_criteria=None, errors=[], input_params={}, confusion_by_criteria=None, overall_confusion=None, label_names=None):
+        # Note: We open the CSV file with the newlines turned off as required by CSV writing
+
+        # Keeps track of the rubric changes
+        rubric_hash = hashlib.sha1(rubric.encode('utf-8')).hexdigest()
+
+        # Keeps track of prompt changes
+        prompt_hash = hashlib.sha1(prompt.encode('utf-8')).hexdigest()
+
+        # String to append based on pass/fail vs. exact match
+        matching_type = "partial" if is_pass_fail else "exact"
+
+        # Get the list of key concepts
+        key_concepts = list(map(lambda label: label['Key Concept'], list(predicted_labels.values())[0]))
+
+        # Reform accuracy to something writable
+        accuracy = 'NaN' if accuracy is None or math.isnan(accuracy) else str(accuracy)
+
+        output_file = os.path.join(output_path, f"{input_params['lesson_name']}-{matching_type}-metadata.csv")
+        with open(output_file, 'w+', newline='') as file:
+            csv_writer = csv.writer(file)
+
+            # Write Header
+            csv_writer.writerow(["RUBRIC_HASH", "PROMPT_HASH", "DATE", "IS_PASS_FAIL", "ERRORS", "ACCURACY"])
+
+            # Write data
+            is_pass_fail_value = "TRUE" if is_pass_fail else "FALSE"
+            csv_writer.writerow([rubric_hash, prompt_hash, datetime.now().isoformat(), is_pass_fail_value, ';'.join(errors), accuracy])
+
+        # Write sample report and aggregate
+        output_file = os.path.join(output_path, f"{input_params['lesson_name']}-sample-accuracy.csv")
+        with open(output_file, 'w+', newline='') as file:
+            csv_writer = csv.writer(file)
+
+            # Write Header
+            csv_writer.writerow(["STUDENT_ID", "LEARNING_GOAL", "ACTUAL", "PREDICTED", "PASS_FAIL_DIFF", "DIFF"])
+
+            # Go through each student and each label
+            for student_id, labels in predicted_labels.items():
+                for label in labels:
+                    criteria = label['Key Concept']
+                    actual = actual_labels[student_id][criteria]
+                    predicted = label['Label']
+                    diff = self._label_to_number(predicted) - self._label_to_number(actual)
+                    pass_fail_diff = (self._label_to_number(predicted) // 2) - (self._label_to_number(actual) // 2)
+                    csv_writer.writerow([student_id, criteria, actual, predicted, pass_fail_diff, diff])
+
+        output_file = os.path.join(output_path, f"{input_params['lesson_name']}-{matching_type}-accuracy.csv")
+        with open(output_file, 'w+', newline='') as file:
+            csv_writer = csv.writer(file)
+
+            # Write Header
+            csv_writer.writerow(["LEARNING_GOAL", "ACCURACY"])
+
+            # Write the overall accuracy (repeated from the metadata report)
+            csv_writer.writerow(["OVERALL", accuracy])
+
+            # For each learning goal, print the accuracy
+            for key_concept in key_concepts:
+                cur_accuracy = accuracy_by_criteria.get(key_concept)
+                cur_accuracy = 'NaN' if cur_accuracy is None or math.isnan(cur_accuracy) else str(cur_accuracy)
+                csv_writer.writerow([key_concept, cur_accuracy])
+
+        labels = ["EXTENSIVE", "CONVINCING", "LIMITED", "NO"]
+
+        # First write confusion matrix for all goals
+        output_file = os.path.join(output_path, f"{input_params['lesson_name']}-{matching_type}-confusion.csv")
+        with open(output_file, 'w+', newline='') as file:
+            csv_writer = csv.writer(file)
+
+            # Write Header
+            if is_pass_fail:
+                csv_writer.writerow(["KEY_CONCEPT", "TRUE_POSITIVE", "FALSE_NEGATIVE", "FALSE_POSITIVE", "TRUE_NEGATIVE"])
+            else:
+                # Yield a permutation of all labels to form the header
+                items = list(map(lambda lst: '/'.join(lst), product(labels, labels)))
+                csv_writer.writerow(["KEY_CONCEPT", *items])
+
+            # A 'chain' just flattens the 2d matrix into the row-ordered list
+            # Write all the values in the overall matrix into the CSV
+            csv_writer.writerow(["OVERALL", *list(chain(*overall_confusion))])
+
+            # Write a row for each concept as well
+            for key_concept, confusion_matrix in confusion_by_criteria.items():
+                csv_writer.writerow([key_concept, *list(chain(*confusion_matrix))])
+
+        # Write learning goal accuracy reports
+        for key_concept in key_concepts:
+            slug = self._key_concept_to_filename(key_concept)
+            output_file = os.path.join(output_path, f"{input_params['lesson_name']}-sample-accuracy-{slug}.csv")
+
+            with open(output_file, 'w+', newline='') as file:
+                csv_writer = csv.writer(file)
+
+                # Write Header
+                csv_writer.writerow(["STUDENT_ID", "ACTUAL", "PREDICTED", "PASS_FAIL_DIFF", "DIFF"])
+
+                # Search the report data for just info relevant to this key concept
+                for student_id, labels in predicted_labels.items():
+                    for label in labels:
+                        criteria = label['Key Concept']
+                        if criteria != key_concept:
+                            continue
+
+                        actual = actual_labels[student_id][criteria]
+                        predicted = label['Label']
+                        diff = self._label_to_number(predicted) - self._label_to_number(actual)
+                        pass_fail_diff = (self._label_to_number(predicted) // 2) - (self._label_to_number(actual) // 2)
+                        csv_writer.writerow([student_id, actual, predicted, pass_fail_diff, diff])
+
     def generate_html_output(self, output_file, prompt, rubric, accuracy=None, predicted_labels=None, actual_labels=None, is_pass_fail=False, accuracy_by_criteria=None, errors=[], input_params={}, confusion_by_criteria=None, overall_confusion=None, label_names=None, prefix='sample_code'):
         link_base_url = f'file://{os.getcwd()}/{prefix}'
         title_suffix = 'pass-fail' if is_pass_fail else 'exact-match'

diff --git a/lib/assessment/rubric_tester.py b/lib/assessment/rubric_tester.py
@@ -32,6 +32,7 @@
 standard_rubric_file = 'standard_rubric.csv'
 actual_labels_file = 'actual_labels.csv'
 output_dir_name = 'output'
+report_dir_name = 'reports'
 datasets_dir = 'datasets'
 cache_dir_name = 'cached_responses'
 accuracy_threshold_file = 'accuracy_thresholds.json'
@@ -363,6 +364,7 @@ def main():
 
         # set up output and cache directories
         os.makedirs(os.path.join(params_lesson_prefix, output_dir_name), exist_ok=True)
+        os.makedirs(os.path.join(params_lesson_prefix, report_dir_name), exist_ok=True)
         os.makedirs(os.path.join(params_lesson_prefix, cache_dir_name), exist_ok=True)
         if not options.use_cached:
             for file in glob.glob(f'{os.path.join(params_lesson_prefix, cache_dir_name)}/*'):
@@ -402,6 +404,21 @@ def main():
                 }
             }
             report = Report()
+            report.generate_csv_output(
+                os.path.join(params_lesson_prefix, report_dir_name),
+                prompt,
+                rubric,
+                accuracy=overall_accuracy_percent,
+                predicted_labels=predicted_labels,
+                actual_labels=actual_labels,
+                is_pass_fail=is_pass_fail,
+                accuracy_by_criteria=accuracy_by_criteria_percent,
+                errors=errors,
+                input_params=input_params,
+                confusion_by_criteria=confusion_by_criteria,
+                overall_confusion=overall_confusion,
+                label_names=label_names,
+            )
             report.generate_html_output(
                 output_file,
                 prompt,