From c207f5b2a05985390421626c37bd6ef2d79e6c88 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 19 Mar 2025 08:30:17 +0100
Subject: [PATCH 1/3] add missing metadata

---
 src/metrics/average_batch_r2/config.vsh.yaml | 34 ++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/src/metrics/average_batch_r2/config.vsh.yaml b/src/metrics/average_batch_r2/config.vsh.yaml
index 348cded..0d7d25b 100644
--- a/src/metrics/average_batch_r2/config.vsh.yaml
+++ b/src/metrics/average_batch_r2/config.vsh.yaml
@@ -8,9 +8,9 @@ info:
   metrics:
       # A unique identifier for your metric (required).
       # Can contain only lowercase letters or underscores.
-    - name: average_batch_r2
+    - name: average_batch_r2_global
       # A relatively short label, used when rendering visualisarions (required)
-      label: Average Batch R-squared ($\overline{R^2_B}$)
+      label: Average Batch R-squared Global
       # A one sentence summary of how this metric works (required). Used when 
       # rendering summary tables.
       summary: "The average batch R-squared quantifies, on average, how strongly the batch variable B explains the variance in the data."
@@ -56,6 +56,36 @@ info:
       max: 1
       # Whether a higher value represents a 'better' solution (required)
       maximize: false
+    - name: average_batch_r2_ct
+      # A relatively short label, used when rendering visualisarions (required)
+      label: Average Batch R-squared Cell Type
+      # A one sentence summary of how this metric works (required). Used when 
+      # rendering summary tables.
+      summary: "TODO: add a summary for the average_batch_r2_ct metric"
+      # A multi-line description of how this component works (required). Used
+      # when rendering reference documentation.
+      description: |
+        TODO: add a description for the average_batch_r2_ct metric
+      references:
+        bibtex:
+          - |
+            @book{draper1998applied,
+            title={Applied regression analysis},
+            author={Draper, Norman R and Smith, Harry},
+            publisher={John Wiley \& Sons}
+            }
+      links:
+        # URL to the documentation for this metric (required).
+        documentation: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
+        # URL to the code repository for this metric (required).
+        repository: https://github.com/scikit-learn/scikit-learn
+      # The minimum possible value for this metric (required)
+      min: -0.001
+      # The maximum possible value for this metric (required)
+      max: 1
+      # Whether a higher value represents a 'better' solution (required)
+      maximize: false
+
 
 # Component-specific parameters (optional)
 # arguments:

From 79d01a29260516a09a64fd19d98bb1ea9c11c685 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 19 Mar 2025 08:30:24 +0100
Subject: [PATCH 2/3] fix max

---
 src/metrics/n_inconsistent_peaks/config.vsh.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/metrics/n_inconsistent_peaks/config.vsh.yaml b/src/metrics/n_inconsistent_peaks/config.vsh.yaml
index e417211..544e017 100644
--- a/src/metrics/n_inconsistent_peaks/config.vsh.yaml
+++ b/src/metrics/n_inconsistent_peaks/config.vsh.yaml
@@ -31,7 +31,7 @@ info:
       # The minimum possible value for this metric (required)
       min: 0
       # The maximum possible value for this metric (required)
-      max: inf
+      max: +.inf
       # Whether a higher value represents a 'better' solution (required)
       maximize: false
 

From 8ddf79a2f89a5407af88ec37d5fa97de6f1d0eb4 Mon Sep 17 00:00:00 2001
From: Luqui12 <luca.leomazzi@gmail.com>
Date: Wed, 19 Mar 2025 11:58:23 +0100
Subject: [PATCH 3/3] Updated description of n_peaks and avg R-squared metrics.

---
 src/metrics/average_batch_r2/config.vsh.yaml  | 39 ++++++++++++-------
 .../n_inconsistent_peaks/config.vsh.yaml      | 33 ++++++++++++++--
 2 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/src/metrics/average_batch_r2/config.vsh.yaml b/src/metrics/average_batch_r2/config.vsh.yaml
index 0d7d25b..3acdfd3 100644
--- a/src/metrics/average_batch_r2/config.vsh.yaml
+++ b/src/metrics/average_batch_r2/config.vsh.yaml
@@ -17,26 +17,20 @@ info:
       # A multi-line description of how this component works (required). Used
       # when rendering reference documentation.
       description: |
-        First, a simple linear model `sklearn.linear_model.LinearRegression` is fitted for each paired sample, marker (and cell type) to determine the fraction of variance (R^2) explained by the batch covariate B. |
-        The average batch R_squared is then computed as the average of the $R^2$ values across all paired samples, markers (and cell types). |
-        As a result, $\overline{R^2_B}$ quantifies how much of the total variability in the data is driven by batch effects. Consequently, a lower values are desirable. |
+        First, a simple linear model `sklearn.linear_model.LinearRegression` is fitted for each paired sample and marker to determine the fraction of variance (R^2) explained by the batch covariate B. |
+        The average batch R_squared is then computed as the average of the $R^2$ values across all paired samples, markers. |
+        As a result, $\overline{R^2_B}_{global}$ quantifies how much of the total variability in the data is driven by batch effects. Consequently, lower values are desirable. |
 
-        $\overline{R^2_B} \text{} = \frac{1}{N*C*M}\sum_{\substack{(x_{\mathrm{int}},\,x_{\mathrm{val}})\\ \text{paired samples}}}^{N} \sum_{j=1}^{C} \sum_{i=1}^{M}\,R^2\!\bigl(\mathrm{marker}_i \mid B\bigr)$
+        $\overline{R^2_B}_{global} = \frac{1}{N*M}\sum_{\substack{(x_{\mathrm{int}},\,x_{\mathrm{val}})\\ \text{paired samples}}}^{N} \sum_{i=1}^{M} \,R^2\!\bigl(\mathrm{marker}_i \mid B\bigr)$
 
         Where:
         - $N$ is the number of paired samples, where x_{\mathrm{int}} is the replicate that has been batch-corrected and x_{\mathrm{val}} is replicate used for validation. Paired samples belong to different batches.
-        - $C$ is the number of cell types
         - $M$ is the number of markers
         - $B$ is the batch covariate
 
-        The $\overline{Rˆ2_B}_{global}$ is a variation of the latter metric, where the average is computed across paired samples and markers only, without taking into account the cell types. |
+        A higher value of $\overline{R^2_B}_{global}$ indicates that the batch variable explains more of the variance in the data, which indicates a higher level of batch effects. |
 
-        $\overline{R^2_B}_{global} = \frac{1}{N*M}\sum_{\substack{(x_{\mathrm{int}},\,x_{\mathrm{val}})\\ \text{paired samples}}}^{N} \sum_{i=1}^{M} \,R^2\!\bigl(\mathrm{marker}_i \mid B\bigr)$
 
-        A higher value of $\overline{R^2_B}$ indicates that the batch variable explains more of the variance in the data, which indicates a higher level of batch effects. |
-        
-        A good performance on $\overline{R^2_B}_{global} but not on $\overline{R^2_B}$ might indicate that the batch effect correction is discarding cell type specific batch effects. |
-      
       references:
         bibtex:
           - |
@@ -56,16 +50,35 @@ info:
       max: 1
       # Whether a higher value represents a 'better' solution (required)
       maximize: false
+
+
     - name: average_batch_r2_ct
       # A relatively short label, used when rendering visualisarions (required)
       label: Average Batch R-squared Cell Type
       # A one sentence summary of how this metric works (required). Used when 
       # rendering summary tables.
-      summary: "TODO: add a summary for the average_batch_r2_ct metric"
+      summary: "The average batch R-squared Cell Type quantifies, on average, how strongly the batch variable B explains the variance in the data (by taking into account cell type effect)."
       # A multi-line description of how this component works (required). Used
       # when rendering reference documentation.
       description: |
-        TODO: add a description for the average_batch_r2_ct metric
+        First, a simple linear model `sklearn.linear_model.LinearRegression` is fitted for each paired sample, marker and cell type to determine the fraction of variance (R^2) explained by the batch covariate B. |
+        The average batch R_squared is then computed as the average of the $R^2$ values across all paired samples, markers and cell types. |
+        As a result, $\overline{R^2_B}_{cell\ type}$ quantifies how much of the total variability in the data is driven by batch effects. Consequently, lower values are desirable. |
+
+        $\overline{R^2_B}_{cell\ type} = \frac{1}{N*C*M}\sum_{\substack{(x_{\mathrm{int}},\,x_{\mathrm{val}})\\ \text{paired samples}}}^{N} \sum_{j=1}^{C} \sum_{i=1}^{M}\,R^2\!\bigl(\mathrm{marker}_i \mid B\bigr)$
+
+        Where:
+        - $N$ is the number of paired samples, where x_{\mathrm{int}} is the replicate that has been batch-corrected and x_{\mathrm{val}} is replicate used for validation. Paired samples belong to different batches.
+        - $C$ is the number of cell types
+        - $M$ is the number of markers
+        - $B$ is the batch covariate
+
+        The $\overline{Rˆ2_B}_{global}$ is a variation of the latter metric, where the average is computed across paired samples and markers only, without taking into account the cell types. |
+
+        A higher value of $\overline{R^2_B}_{global}$ or $\overline{R^2_B}_{cell\ type}$ indicates that the batch variable explains more of the variance in the data, which indicates a higher level of batch effects. |
+        
+        A good performance on $\overline{R^2_B}_{global}$ but not on $\overline{R^2_B}_{cell\ type}$ might indicate that the batch effect correction is discarding cell type specific batch effects. |
+      
       references:
         bibtex:
           - |
diff --git a/src/metrics/n_inconsistent_peaks/config.vsh.yaml b/src/metrics/n_inconsistent_peaks/config.vsh.yaml
index 544e017..e42a05c 100644
--- a/src/metrics/n_inconsistent_peaks/config.vsh.yaml
+++ b/src/metrics/n_inconsistent_peaks/config.vsh.yaml
@@ -7,17 +7,42 @@ name: n_inconsistent_peaks
 info:
   metrics:
     - name: n_inconsistent_peaks
-      label: Number of inconsistent peaks
+      label: Number of inconsistent peaks Global
       # A one sentence summary of how this metric works (required). Used when 
       # rendering summary tables.
-      summary: "Compare the number of marker-expression peaks between validation and batch-normalized data."
+      summary: "Comparison of the number of marker‑expression peaks between validation and batch‑normalized data."
       # A multi-line description of how this component works (required). Used
       # when rendering reference documentation.
       description: |
-        The metric compares the number of marker-expression peaks between the validation and batch-normalized data. 
+        The metric compares the number of marker expression peaks between the validation and batch-normalized data. 
         The number of peaks is calculated using the `scipy.signal.find_peaks` function. 
         The metric is calculated as the absolute difference between the number of peaks in the validation and batch-normalized data.
-        The marker-expression profiles are first smoothed using kernel density estimation (KDE) (`scipy.stats.gaussian_kde`),
+        The marker expression profiles are first smoothed using kernel density estimation (KDE) (`scipy.stats.gaussian_kde`),
+        and then peaks are then identified using the `scipy.signal.find_peaks` function.
+        For peak calling, the `prominence` parameter is set to 0.1 and the `height` parameter is set to 0.05*max_density.
+      references:
+        doi: 
+          - 10.1038/s41592-019-0686-2
+      links:
+        # URL to the documentation for this metric (required).
+        documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html#scipy.signal.find_peaks
+        # URL to the code repository for this metric (required).
+        repository: https://github.com/scipy/scipy/blob/v1.15.2/scipy/signal/_peak_finding.py#L0-L1
+      # The minimum possible value for this metric (required)
+      min: 0
+      # The maximum possible value for this metric (required)
+      max: +.inf
+      # Whether a higher value represents a 'better' solution (required)
+      maximize: false
+
+    - name: n_inconsistent_peaks_ct
+      label: Number of inconsistent peaks (Cell Type)
+      summary: "Comparison of the number of cell‑type marker‑expression peaks between validation and batch‑normalized data."
+      description: |
+        The metric compares the number of cell type specific marker expression peaks between the validation and batch-normalized data. 
+        The number of peaks is calculated using the `scipy.signal.find_peaks` function. 
+        The metric is calculated as the absolute difference between the number of peaks in the validation and batch-normalized data.
+        The (cell type) marker expression profiles are first smoothed using kernel density estimation (KDE) (`scipy.stats.gaussian_kde`),
         and then peaks are then identified using the `scipy.signal.find_peaks` function.
         For peak calling, the `prominence` parameter is set to 0.1 and the `height` parameter is set to 0.05*max_density.
       references: