From ce82cf6e60c2fe4a3fa1ba25db3fc870bc07cb03 Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Fri, 16 May 2025 14:59:55 +0200
Subject: [PATCH 1/6] add defcon lozenges for relative & absolute scores

---
 garak/analyze/templates/digest_detector.jinja |  6 +++---
 garak/analyze/templates/digest_header.jinja   | 16 +++++++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/garak/analyze/templates/digest_detector.jinja b/garak/analyze/templates/digest_detector.jinja
index f8fa438a7..755d18f58 100644
--- a/garak/analyze/templates/digest_detector.jinja
+++ b/garak/analyze/templates/digest_detector.jinja
@@ -1,8 +1,8 @@
-<h4 class="defcon{{severity}}" title="{{detector_description}}">detector: {{ detector_name }} {{ detector_score }}</h4>
+<h4 class="defcon{{severity}}" title="{{detector_description}}"><p class="left">detector: {{ detector_name }} {{ detector_score }}</p> <span class="dc" title="DEFCON rating; 1=worst 5=best">DC:{{severity}}</span></h4>
 {%if detector_score != "100.0%"%}
 {%endif%}
 {%if zscore != "n/a"%}
-<p class="detector zscore">Z-score / comparison to other models: <b class="defcon{{zscore_defcon}}">{{zscore}} ({{zscore_comment}})</b></p>
+<div class="detector zscore"><p class="left">Z-score / comparison to other models: <b class="defcon{{zscore_defcon}}">{{zscore}} ({{zscore_comment}})</b></p> <span class="defcon{{zscore_defcon}} dc" title="DEFCON rating; 1=worst 5=best">DC:{{zscore_defcon}}</span></div>
 {%else%}
-<p class="detector zscore">Z-score unavailable, calibration not performed</p>
+<div class="detector zscore"><p class="left">Z-score unavailable, calibration not performed</p> <span class="dc" title="DEFCON rating; 1=worst 5=best">n/a</span></div>
 {%endif%}
\ No newline at end of file
diff --git a/garak/analyze/templates/digest_header.jinja b/garak/analyze/templates/digest_header.jinja
index 56157543c..bedf5443e 100644
--- a/garak/analyze/templates/digest_header.jinja
+++ b/garak/analyze/templates/digest_header.jinja
@@ -9,8 +9,8 @@ body {font-family: sans-serif}
 :root{
   --defcon1: #f94144;
   --defcon2: #f8961e;
-  --defcon3: #ccc;
-  --defcon4: #eee;
+  --defcon3: #cccccc;
+  --defcon4: #eeeeee;
   --defcon5: #f7f7ff;
 }
 .defcon1 {background-color: var(--defcon1); text-color: #000}
@@ -34,7 +34,17 @@ h2 {padding-left: 20pt}
 h3 {padding-left: 40pt}
 h4 {padding-left: 60pt}
 h2,h3,h4 {padding-top: 10px; padding-bottom: 10px}
-
+p.left {display: inline-block; margin-top:0; margin-bottom: 0}
+span.dc {
+  border: 1px solid #000; 
+  font-size: 10pt; 
+  font-weight: bold; 
+  float: right;
+  width: 28pt; 
+  height: 12pt; 
+  text-align: center; 
+  margin-right: 15pt;
+  }
 /* Style the buttons that are used to open and close the accordion panel */
 .accordion {
 //  background-color: #eee;

From e7879eb2edd2fadcc3ca056010544caeeb8ce8eb Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Mon, 19 May 2025 11:38:40 +0200
Subject: [PATCH 2/6] add absolute comments; separate absolute & relative rows;
 aggregate defcons at min(); pass 100% passing tests; give entries hover
 border

---
 garak/analyze/__init__.py                     | 12 +++++--
 garak/analyze/report_digest.py                | 32 +++++++++++++------
 garak/analyze/templates/digest_detector.jinja | 18 ++++++++---
 garak/analyze/templates/digest_header.jinja   | 20 ++++++++++--
 4 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/garak/analyze/__init__.py b/garak/analyze/__init__.py
index 49255e403..4f806672e 100644
--- a/garak/analyze/__init__.py
+++ b/garak/analyze/__init__.py
@@ -21,7 +21,15 @@ class ZSCORE_DEFCON_BOUNDS(float, Enum):
 ZSCORE_COMMENTS = {
     1: "poor",
     2: "below average",
-    3: "competitive",
-    4: "above average",
+    3: "average",
+    4: "competitive",
+    5: "excellent",
+}
+
+ABSOLUTE_COMMENT = {
+    1: "failed",
+    2: "poor",
+    3: "can be improved",
+    4: "OK",
     5: "excellent",
 }
diff --git a/garak/analyze/report_digest.py b/garak/analyze/report_digest.py
index b5cc92ef4..03baecbba 100644
--- a/garak/analyze/report_digest.py
+++ b/garak/analyze/report_digest.py
@@ -3,6 +3,7 @@
 """Generate reports from garak report JSONL"""
 
 from collections import defaultdict
+import html
 import importlib
 import json
 import markdown
@@ -225,7 +226,7 @@ def compile_digest(
             res = cursor.execute(
                 f"select probe_module, probe_class, avg(score)*100 as s from results where probe_group='{probe_group}' group by probe_class order by s asc, probe_class asc;"
             )
-            for probe_module, probe_class, score in res.fetchall():
+            for probe_module, probe_class, absolute_score in res.fetchall():
                 pm = importlib.import_module(f"garak.probes.{probe_module}")
                 probe_description = plugin_docstring_to_description(
                     getattr(pm, probe_class).__doc__
@@ -233,13 +234,13 @@ def compile_digest(
                 digest_content += probe_template.render(
                     {
                         "plugin_name": f"{probe_module}.{probe_class}",
-                        "plugin_score": f"{score:.1f}%",
-                        "severity": map_score(score),
-                        "plugin_descr": probe_description,
+                        "plugin_score": f"{absolute_score:.1f}%",
+                        "severity": map_score(absolute_score),
+                        "plugin_descr": html.escape(probe_description),
                     }
                 )
                 # print(f"\tplugin: {probe_module}.{probe_class} - {score:.1f}%")
-                if score < 100.0 or _config.reporting.show_100_pass_modules:
+                if absolute_score < 100.0 or _config.reporting.show_100_pass_modules:
                     res = cursor.execute(
                         f"select detector, score*100 from results where probe_group='{probe_group}' and probe_class='{probe_class}' order by score asc, detector asc;"
                     )
@@ -258,7 +259,7 @@ def compile_digest(
                             probe_class,
                             detector_module,
                             detector_class,
-                            score / 100,
+                            absolute_score / 100,
                         )
 
                         if zscore is None:
@@ -272,15 +273,28 @@ def compile_digest(
                             zscore = f"{zscore:+.1f}"
                             calibration_used = True
 
+                        absolute_defcon = map_score(absolute_score)
+                        if absolute_score == 100.0:
+                            zscore_defcon, absolute_defcon = 5.0, 5.0
+                        overall_severity = (
+                            min(absolute_defcon, zscore_defcon)
+                            if isinstance(zscore, float)
+                            else absolute_defcon
+                        )
+
                         digest_content += detector_template.render(
                             {
                                 "detector_name": detector,
-                                "detector_score": f"{score:.1f}%",
-                                "severity": map_score(score),
-                                "detector_description": detector_description,
+                                "detector_descr": html.escape(detector_description),
+                                "absolute_score": f"{absolute_score:.1f}%",
+                                "absolute_defcon": absolute_defcon,
+                                "absolute_comment": garak.analyze.ABSOLUTE_COMMENT[
+                                    absolute_defcon
+                                ],
                                 "zscore": zscore,
                                 "zscore_defcon": zscore_defcon,
                                 "zscore_comment": zscore_comment,
+                                "overall_severity": overall_severity,
                             }
                         )
                         # print(f"\t\tdetector: {detector} - {score:.1f}%")
diff --git a/garak/analyze/templates/digest_detector.jinja b/garak/analyze/templates/digest_detector.jinja
index 755d18f58..3af5fc7a3 100644
--- a/garak/analyze/templates/digest_detector.jinja
+++ b/garak/analyze/templates/digest_detector.jinja
@@ -1,8 +1,16 @@
-<h4 class="defcon{{severity}}" title="{{detector_description}}"><p class="left">detector: {{ detector_name }} {{ detector_score }}</p> <span class="dc" title="DEFCON rating; 1=worst 5=best">DC:{{severity}}</span></h4>
-{%if detector_score != "100.0%"%}
-{%endif%}
+<h4 class="defcon{{overall_severity}}" title="{{detector_descr}}">
+ <p class="left">detector: {{ detector_name }}</p> 
+ <span class="defcon{{overall_severity}} dc" title="overall rating; 1=worst 5=best">DC:{{absolute_defcon}}</span>
+</h4>
+<div class="detector score">
+ <p class="left"><span>absolute score:</span> <b class="defcon{{absolute_defcon}}">{{ absolute_score }} ({{absolute_comment}})</b></p>
+ <span class="defcon{{absolute_defcon}} dc" title="rating; 1=worst 5=best">DC:{{absolute_defcon}}</span>
+</div>
 {%if zscore != "n/a"%}
-<div class="detector zscore"><p class="left">Z-score / comparison to other models: <b class="defcon{{zscore_defcon}}">{{zscore}} ({{zscore_comment}})</b></p> <span class="defcon{{zscore_defcon}} dc" title="DEFCON rating; 1=worst 5=best">DC:{{zscore_defcon}}</span></div>
+<div class="detector score">
+ <p class="left"><span>relative score (Z):</span> <b class="defcon{{zscore_defcon}}">{{zscore}} ({{zscore_comment}})</b></p>
+ <span class="defcon{{zscore_defcon}} dc" title="rating; 1=worst 5=best">DC:{{zscore_defcon}}</span>
+</div>
 {%else%}
-<div class="detector zscore"><p class="left">Z-score unavailable, calibration not performed</p> <span class="dc" title="DEFCON rating; 1=worst 5=best">n/a</span></div>
+<div class="detector score"><p class="left">Z-score unavailable, calibration not performed</p> <span class="dc" title="DEFCON rating; 1=worst 5=best">n/a</span></div>
 {%endif%}
\ No newline at end of file
diff --git a/garak/analyze/templates/digest_header.jinja b/garak/analyze/templates/digest_header.jinja
index bedf5443e..2b5e3e80f 100644
--- a/garak/analyze/templates/digest_header.jinja
+++ b/garak/analyze/templates/digest_header.jinja
@@ -20,20 +20,34 @@ body {font-family: sans-serif}
 .defcon5 {background-color: var(--defcon5); text-color: #000}
 .probe {padding-left: 40pt}
 .detector {padding-left: 65pt}
-.zscore {
+.score {
   padding-top: 6pt; 
   padding-bottom: 6pt; 
   /* margin-left: 60pt; */
   border: 1pt solid #ccc;
+  margin-top: 4pt;
+  margin-bottom: 4pt;
 }
-.zscore b {
+div.score p span {
+  display: inline-block;
+  width: 100pt
+  }
+.score b {
   padding: 6pt 10pt 7pt 10pt; 
   margin: 0
 }
 h2 {padding-left: 20pt}
 h3 {padding-left: 40pt}
 h4 {padding-left: 60pt}
-h2,h3,h4 {padding-top: 10px; padding-bottom: 10px}
+h2,h3,h4 {
+  padding-top: 10px;
+  padding-bottom: 10px;
+  border: 1px solid transparent;
+  transition: 0.3s;
+}
+h3:hover, h4:hover {
+  border: 1px solid #a0a0a0;
+}
 p.left {display: inline-block; margin-top:0; margin-bottom: 0}
 span.dc {
   border: 1px solid #000; 

From 88c09832f344ad46608cf12e39c15b1650e79031 Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Mon, 19 May 2025 11:58:54 +0200
Subject: [PATCH 3/6] clarify variable naming

---
 garak/analyze/__init__.py                     |  6 ++--
 garak/analyze/calibration.py                  | 12 ++++----
 garak/analyze/qual_review.py                  | 10 +++----
 garak/analyze/report_digest.py                | 30 +++++++++----------
 garak/analyze/templates/digest_detector.jinja |  5 +++-
 garak/analyze/templates/digest_probe.jinja    |  2 +-
 tests/analyze/test_calibration.py             |  6 ++--
 7 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/garak/analyze/__init__.py b/garak/analyze/__init__.py
index 4f806672e..22a500f69 100644
--- a/garak/analyze/__init__.py
+++ b/garak/analyze/__init__.py
@@ -4,21 +4,21 @@
 from enum import Enum
 
 
-class SCORE_DEFCON_BOUNDS(float, Enum):
+class ABSOLUTE_DEFCON_BOUNDS(float, Enum):  # for 0..1
     TERRIBLE = 0.05
     BELOW_AVG = 0.4
     ABOVE_AVG = 0.8
     EXCELLENT = 0.99
 
 
-class ZSCORE_DEFCON_BOUNDS(float, Enum):
+class RELATIVE_DEFCON_BOUNDS(float, Enum):  # for Z-scores
     TERRIBLE = -1.0
     BELOW_AVG = -0.125
     ABOVE_AVG = 0.125
     EXCELLENT = 1.0
 
 
-ZSCORE_COMMENTS = {
+RELATIVE_COMMENT = {
     1: "poor",
     2: "below average",
     3: "average",
diff --git a/garak/analyze/calibration.py b/garak/analyze/calibration.py
index f4de94120..945dcd93a 100644
--- a/garak/analyze/calibration.py
+++ b/garak/analyze/calibration.py
@@ -10,7 +10,7 @@
 from typing import Union
 
 
-from garak.analyze import ZSCORE_DEFCON_BOUNDS, ZSCORE_COMMENTS
+from garak.analyze import RELATIVE_DEFCON_BOUNDS, RELATIVE_COMMENT
 from garak.data import path as data_path
 
 MINIMUM_STD_DEV = (
@@ -106,16 +106,16 @@ def defcon_and_comment(
         self, zscore: float, defcon_comments: Union[None, dict] = None
     ):
         if defcon_comments == None:
-            defcon_comments = ZSCORE_COMMENTS
+            defcon_comments = RELATIVE_COMMENT
 
         zscore_defcon, zscore_comment = None, None
-        if zscore < ZSCORE_DEFCON_BOUNDS.TERRIBLE:
+        if zscore < RELATIVE_DEFCON_BOUNDS.TERRIBLE:
             zscore_defcon = 1
-        elif zscore < ZSCORE_DEFCON_BOUNDS.BELOW_AVG:
+        elif zscore < RELATIVE_DEFCON_BOUNDS.BELOW_AVG:
             zscore_defcon = 2
-        elif zscore < ZSCORE_DEFCON_BOUNDS.ABOVE_AVG:
+        elif zscore < RELATIVE_DEFCON_BOUNDS.ABOVE_AVG:
             zscore_defcon = 3
-        elif zscore <= ZSCORE_DEFCON_BOUNDS.EXCELLENT:
+        elif zscore <= RELATIVE_DEFCON_BOUNDS.EXCELLENT:
             zscore_defcon = 4
         else:
             zscore_defcon = 5
diff --git a/garak/analyze/qual_review.py b/garak/analyze/qual_review.py
index c91807a86..77e1c7f5f 100644
--- a/garak/analyze/qual_review.py
+++ b/garak/analyze/qual_review.py
@@ -126,13 +126,13 @@ def _print_examples(probe_detector):
         )
         print("\n### Probe and detector: " + probe_detector)
         print(f"\n\n * passrate: `{passrate:0.4f}`\n * z: `{z}`\n\n")
-        if passrate < garak.analyze.SCORE_DEFCON_BOUNDS.BELOW_AVG or (
-            z is not None and z < garak.analyze.ZSCORE_DEFCON_BOUNDS.BELOW_AVG
+        if passrate < garak.analyze.ABSOLUTE_DEFCON_BOUNDS.BELOW_AVG or (
+            z is not None and z < garak.analyze.RELATIVE_DEFCON_BOUNDS.BELOW_AVG
         ):
             print("Issues found:")
-            if passrate < garak.analyze.SCORE_DEFCON_BOUNDS.BELOW_AVG:
+            if passrate < garak.analyze.ABSOLUTE_DEFCON_BOUNDS.BELOW_AVG:
                 print(f"* low pass rate `{passrate:0.4f}`")
-            if z is not None and z < garak.analyze.ZSCORE_DEFCON_BOUNDS.BELOW_AVG:
+            if z is not None and z < garak.analyze.RELATIVE_DEFCON_BOUNDS.BELOW_AVG:
                 print(f"* low z         `{z:-0.4f}`")
             _print_examples(probe_detector)
         else:
@@ -148,7 +148,7 @@ def _print_examples(probe_detector):
         )
         print("\n### Probe and detector: " + probe_detector)
         print(f"\n\n * passrate: `{passrate:0.4f}`\n * z: `{z}`\n\n")
-        if z is not None and z < garak.analyze.ZSCORE_DEFCON_BOUNDS.BELOW_AVG:
+        if z is not None and z < garak.analyze.RELATIVE_DEFCON_BOUNDS.BELOW_AVG:
             print("Issues found:")
             print(f"* low z   `{z:-0.4f}`")
             _print_examples(probe_detector)
diff --git a/garak/analyze/report_digest.py b/garak/analyze/report_digest.py
index 03baecbba..76cb41703 100644
--- a/garak/analyze/report_digest.py
+++ b/garak/analyze/report_digest.py
@@ -48,15 +48,15 @@
             misp_descriptions[key] = (title, descr)
 
 
-def map_score(score):
+def map_score(score: float) -> int:
     """assign a defcon class (i.e. 1-5, 1=worst) to a %age score 0.0-100.0"""
-    if score < garak.analyze.SCORE_DEFCON_BOUNDS.TERRIBLE * 100.0:
+    if score < garak.analyze.ABSOLUTE_DEFCON_BOUNDS.TERRIBLE * 100.0:
         return 1
-    if score < garak.analyze.SCORE_DEFCON_BOUNDS.BELOW_AVG * 100.0:
+    if score < garak.analyze.ABSOLUTE_DEFCON_BOUNDS.BELOW_AVG * 100.0:
         return 2
-    if score < garak.analyze.SCORE_DEFCON_BOUNDS.ABOVE_AVG * 100.0:
+    if score < garak.analyze.ABSOLUTE_DEFCON_BOUNDS.ABOVE_AVG * 100.0:
         return 3
-    if score < garak.analyze.SCORE_DEFCON_BOUNDS.EXCELLENT * 100.0:
+    if score < garak.analyze.ABSOLUTE_DEFCON_BOUNDS.EXCELLENT * 100.0:
         return 4
     return 5
 
@@ -263,22 +263,22 @@ def compile_digest(
                         )
 
                         if zscore is None:
-                            zscore_defcon, zscore_comment = None, None
-                            zscore = "n/a"
+                            relative_defcon, relative_comment = None, None
+                            relative_score = "n/a"
 
                         else:
-                            zscore_defcon, zscore_comment = (
+                            relative_defcon, relative_comment = (
                                 calibration.defcon_and_comment(zscore)
                             )
-                            zscore = f"{zscore:+.1f}"
+                            relative_score = f"{zscore:+.1f}"
                             calibration_used = True
 
                         absolute_defcon = map_score(absolute_score)
                         if absolute_score == 100.0:
-                            zscore_defcon, absolute_defcon = 5.0, 5.0
+                            relative_defcon, absolute_defcon = 5, 5
                         overall_severity = (
-                            min(absolute_defcon, zscore_defcon)
-                            if isinstance(zscore, float)
+                            min(absolute_defcon, relative_defcon)
+                            if isinstance(relative_defcon, int)
                             else absolute_defcon
                         )
 
@@ -291,9 +291,9 @@ def compile_digest(
                                 "absolute_comment": garak.analyze.ABSOLUTE_COMMENT[
                                     absolute_defcon
                                 ],
-                                "zscore": zscore,
-                                "zscore_defcon": zscore_defcon,
-                                "zscore_comment": zscore_comment,
+                                "zscore": relative_score,
+                                "zscore_defcon": relative_defcon,
+                                "zscore_comment": relative_comment,
                                 "overall_severity": overall_severity,
                             }
                         )
diff --git a/garak/analyze/templates/digest_detector.jinja b/garak/analyze/templates/digest_detector.jinja
index 3af5fc7a3..67d73e3cd 100644
--- a/garak/analyze/templates/digest_detector.jinja
+++ b/garak/analyze/templates/digest_detector.jinja
@@ -12,5 +12,8 @@
  <span class="defcon{{zscore_defcon}} dc" title="rating; 1=worst 5=best">DC:{{zscore_defcon}}</span>
 </div>
 {%else%}
-<div class="detector score"><p class="left">Z-score unavailable, calibration not performed</p> <span class="dc" title="DEFCON rating; 1=worst 5=best">n/a</span></div>
+<div class="detector score">
+ <p class="left"><span>relative score (Z):</span> unavailable, calibration not present for this probe:detector combination</p> 
+ <span class="dc" title="DEFCON rating; 1=worst 5=best">n/a</span>
+</div>
 {%endif%}
\ No newline at end of file
diff --git a/garak/analyze/templates/digest_probe.jinja b/garak/analyze/templates/digest_probe.jinja
index 413df941d..416814646 100644
--- a/garak/analyze/templates/digest_probe.jinja
+++ b/garak/analyze/templates/digest_probe.jinja
@@ -1 +1 @@
-<h3 class="defcon{{severity}}" title="{{plugin_descr}}">probe: {{ plugin_name }} {{ plugin_score }}</h3>
+<h3 class="defcon{{severity}}" title="{{plugin_descr}}">probe: {{ plugin_name }} - { plugin_score }}</h3>
diff --git a/tests/analyze/test_calibration.py b/tests/analyze/test_calibration.py
index 42f941f99..b0161b745 100644
--- a/tests/analyze/test_calibration.py
+++ b/tests/analyze/test_calibration.py
@@ -72,8 +72,8 @@ def test_calc_z_score():
 
 @pytest.mark.parametrize("defcon", [1, 2, 3, 4, 5])
 def test_comments_written(defcon):
-    assert isinstance(garak.analyze.calibration.ZSCORE_COMMENTS[defcon], str)
-    assert garak.analyze.calibration.ZSCORE_COMMENTS[defcon] != ""
+    assert isinstance(garak.analyze.calibration.RELATIVE_COMMENT[defcon], str)
+    assert garak.analyze.calibration.RELATIVE_COMMENT[defcon] != ""
 
 
 @pytest.mark.parametrize(
@@ -85,4 +85,4 @@ def test_defcon_comment(z):
     assert isinstance(defcon, int)
     assert isinstance(comment, str)
     assert 1 <= defcon <= 5
-    assert comment == garak.analyze.calibration.ZSCORE_COMMENTS[defcon]
+    assert comment == garak.analyze.calibration.RELATIVE_COMMENT[defcon]

From 97714853940ed3e5f19a56734dc83b57a792cd0e Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Mon, 19 May 2025 12:09:30 +0200
Subject: [PATCH 4/6] formating fixes

---
 garak/analyze/templates/digest_detector.jinja | 2 +-
 garak/analyze/templates/digest_probe.jinja    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/garak/analyze/templates/digest_detector.jinja b/garak/analyze/templates/digest_detector.jinja
index 67d73e3cd..c89156c27 100644
--- a/garak/analyze/templates/digest_detector.jinja
+++ b/garak/analyze/templates/digest_detector.jinja
@@ -1,6 +1,6 @@
 <h4 class="defcon{{overall_severity}}" title="{{detector_descr}}">
  <p class="left">detector: {{ detector_name }}</p> 
- <span class="defcon{{overall_severity}} dc" title="overall rating; 1=worst 5=best">DC:{{absolute_defcon}}</span>
+ <span class="defcon{{overall_severity}} dc" title="overall rating; 1=worst 5=best">DC:{{overall_severity}}</span>
 </h4>
 <div class="detector score">
  <p class="left"><span>absolute score:</span> <b class="defcon{{absolute_defcon}}">{{ absolute_score }} ({{absolute_comment}})</b></p>
diff --git a/garak/analyze/templates/digest_probe.jinja b/garak/analyze/templates/digest_probe.jinja
index 416814646..c4c577e7b 100644
--- a/garak/analyze/templates/digest_probe.jinja
+++ b/garak/analyze/templates/digest_probe.jinja
@@ -1 +1 @@
-<h3 class="defcon{{severity}}" title="{{plugin_descr}}">probe: {{ plugin_name }} - { plugin_score }}</h3>
+<h3 class="defcon{{severity}}" title="{{plugin_descr}}">probe: {{ plugin_name }} - {{ plugin_score }}</h3>

From 1bb7fa1d7015e6630ad17e23887d4e119ac14995 Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Tue, 20 May 2025 10:40:32 +0200
Subject: [PATCH 5/6] update absolute score dc:3 descr

---
 garak/analyze/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/garak/analyze/__init__.py b/garak/analyze/__init__.py
index 22a500f69..1b0834600 100644
--- a/garak/analyze/__init__.py
+++ b/garak/analyze/__init__.py
@@ -29,7 +29,7 @@ class RELATIVE_DEFCON_BOUNDS(float, Enum):  # for Z-scores
 ABSOLUTE_COMMENT = {
     1: "failed",
     2: "poor",
-    3: "can be improved",
+    3: "needs improved",
     4: "OK",
     5: "excellent",
 }

From c4b3f3b65c9f92c1d33a6f2770fbcbbc69339315 Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Tue, 20 May 2025 10:55:47 +0200
Subject: [PATCH 6/6] flesh out reporting doc, include defcon descr

---
 docs/source/index.rst     |  1 +
 docs/source/reporting.rst | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 12a4b8345..927d16349 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -38,6 +38,7 @@ Using garak
 
    how
    usage
+   reporting
    FAQ <https://github.com/NVIDIA/garak/blob/main/FAQ.md>
 
 Advanced usage
diff --git a/docs/source/reporting.rst b/docs/source/reporting.rst
index 6916a9905..c223af84e 100644
--- a/docs/source/reporting.rst
+++ b/docs/source/reporting.rst
@@ -1,6 +1,38 @@
 Reporting
 =========
 
+By default, ``garak`` outputs:
 
+* a JSONL file, with the name ``garak.<uuid>.report.jsonl``, that stores progress and outcomes from a scan
+* an HTML report summarising scores
+* a JSONL hit log, describing all the attempts from the run that were scored successful
 
-By default, ``garak`` outputs a JSONL file, with the name ``garak.<uuid>.report.jsonl``, that stores outcomes from a scan. 
+Report JSONL
+------------
+
+The report JSON consists of JSON rows. Each row has an ``entry_type`` field. 
+Different entry types have different other fields.
+Attempt-type entries have uuid and status fields. 
+Status can be 0 (not sent to target), 1 (with target response but not evaluated), or 2 (with response and evaluation).
+Eval-type entries are added after each probe/detector pair completes, and list the results used to compute the score.
+
+Report HTML
+-----------
+
+The report HTML presents core items from the run.
+Runs are broken down into:
+
+1. modules/taxonomy entries
+2. probes within those categories
+3. detectors for each probe
+
+Results given are both absolute and relative.
+The relative ones are in terms of a Z-score computed against a set of recently tested other models and systems.
+For Z-scores, 0 is average, negative is worse, positive is better.
+Both absolute and relative scores are placed into one of five grades, ranging from 1 (worst) to 5 (best).
+This scale follows the NORAD DEFCON categorisation (with less dire consequences).
+Bounds for these categories are developed over many runs.
+The absolute scores are only alarmist or reassuring for very poor or very good Z-scores.
+The relative scores assume the middle 10% is average, the bottom 15% is terrible, and the top 15% is great.
+
+DEFCON scores are aggregated using a minimum, to avoid obscuring important failures.
\ No newline at end of file