From 1d5a28536eed99436478b71834fd68ffb7ac60f7 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 23 Jun 2025 17:02:04 +0200 Subject: [PATCH 01/20] add conforaml prediction from crepes along with tests and notebook --- .../experimental/uncertainty/__init__.py | 3 + .../experimental/uncertainty/conformal.py | 243 +++++ .../advanced_04_conformal_prediction.ipynb | 936 ++++++++++++++++++ pyproject.toml | 5 + .../test_uncertainty/__init__.py | 1 + .../test_uncertainty/test_conformal.py | 59 ++ tests/test_pipeline.py | 42 + 7 files changed, 1289 insertions(+) create mode 100644 molpipeline/experimental/uncertainty/__init__.py create mode 100644 molpipeline/experimental/uncertainty/conformal.py create mode 100644 notebooks/advanced_04_conformal_prediction.ipynb create mode 100644 tests/test_experimental/test_uncertainty/__init__.py create mode 100644 tests/test_experimental/test_uncertainty/test_conformal.py diff --git a/molpipeline/experimental/uncertainty/__init__.py b/molpipeline/experimental/uncertainty/__init__.py new file mode 100644 index 00000000..664bb2aa --- /dev/null +++ b/molpipeline/experimental/uncertainty/__init__.py @@ -0,0 +1,3 @@ +from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV + +__all__ = ["UnifiedConformalCV", "CrossConformalCV"] \ No newline at end of file diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py new file mode 100644 index 00000000..c8b8a68c --- /dev/null +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -0,0 +1,243 @@ +from crepes import WrapClassifier, WrapRegressor +from sklearn.model_selection import StratifiedKFold, KFold +from crepes.extras import hinge, margin, MondrianCategorizer +import numpy as np +from sklearn.base import BaseEstimator, clone +from scipy.stats import mode + +def bin_targets(y, n_bins=10): + """ + Bin continuous targets for stratified splitting in regression. + """ + y = np.asarray(y) + bins = np.linspace(np.min(y), np.max(y), n_bins + 1) + y_binned = np.digitize(y, bins) - 1 # bins start at 1 + y_binned[y_binned == n_bins] = n_bins - 1 # edge case + return y_binned + +class UnifiedConformalCV(BaseEstimator): + """ + One wrapper to rule them all: conformal prediction for both classifiers and regressors. + Uses crepes under the hood, so you know it's sweet. + + Parameters + ---------- + estimator : sklearn-like estimator + Your favorite model (or pipeline). + mondrian : bool/callable/MondrianCategorizer, optional + If True, use class-conditional (Mondrian) calibration. If callable or MondrianCategorizer, use as custom group function/categorizer. + confidence_level : float, optional + How confident should we be? (default: 0.9) + estimator_type : {'classifier', 'regressor'}, optional + What kind of model are we wrapping? + nonconformity : callable, optional + Nonconformity function for classification (e.g., hinge, margin, or custom). + difficulty_estimator : callable or DifficultyEstimator, optional + For regression: difficulty estimator for normalized conformal prediction. + binning : int or callable, optional + For regression: number of bins or binning function for Mondrian calibration. + n_jobs : int, optional + Parallelize all the things. + kwargs : dict + Extra toppings for crepes. + """ + def __init__( + self, + estimator, + mondrian=False, + confidence_level=0.9, + estimator_type="classifier", + nonconformity=None, + difficulty_estimator=None, + binning=None, + n_jobs=1, + **kwargs + ): + self.estimator = estimator + self.mondrian = mondrian + self.confidence_level = confidence_level + self.estimator_type = estimator_type + self.nonconformity = nonconformity + self.difficulty_estimator = difficulty_estimator + self.binning = binning + self.n_jobs = n_jobs + self.kwargs = kwargs + + def fit(self, X, y, **fit_params): + if self.estimator_type == "classifier": + self._conformal = WrapClassifier(clone(self.estimator)) + elif self.estimator_type == "regressor": + self._conformal = WrapRegressor(clone(self.estimator)) + else: + raise ValueError("estimator_type must be 'classifier' or 'regressor'") + self._conformal.fit(X, y, **fit_params) + self.fitted_ = True + return self + + def calibrate(self, X_calib, y_calib, **calib_params): + # --- Classification --- + if self.estimator_type == "classifier": + nc = self.nonconformity if self.nonconformity is not None else hinge + mondrian = self.mondrian + if isinstance(mondrian, MondrianCategorizer): + mc = mondrian + self._conformal.calibrate(X_calib, y_calib, nc=nc, mc=mc, **calib_params) + elif callable(mondrian): + mc = mondrian + self._conformal.calibrate(X_calib, y_calib, nc=nc, mc=mc, **calib_params) + elif mondrian is True: + self._conformal.calibrate(X_calib, y_calib, nc=nc, class_cond=True, **calib_params) + else: + self._conformal.calibrate(X_calib, y_calib, nc=nc, class_cond=False, **calib_params) + # --- Regression --- + elif self.estimator_type == "regressor": + de = self.difficulty_estimator + mondrian = self.mondrian + if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): + mc = mondrian + else: + mc = None + bin_opt = self.binning + self._conformal.calibrate( + X_calib, y_calib, de=de, mc=mc, **calib_params + ) + else: + raise ValueError("estimator_type must be 'classifier' or 'regressor'") + + def predict(self, X): + return self._conformal.predict(X) + + def predict_proba(self, X): + if self.estimator_type != "classifier": + raise NotImplementedError("predict_proba is for classifiers only.") + return self._conformal.predict_proba(X) + + def predict_conformal_set(self, X, confidence=None): + if self.estimator_type != "classifier": + raise NotImplementedError("predict_conformal_set is only for classification.") + conf = confidence if confidence is not None else self.confidence_level + return self._conformal.predict_set(X, confidence=conf) + + def predict_p(self, X, **kwargs): + if self.estimator_type != "classifier": + raise NotImplementedError("predict_p is only for classification.") + return self._conformal.predict_p(X, **kwargs) + + def predict_int(self, X, confidence=None): + if self.estimator_type != "regressor": + raise NotImplementedError("predict_interval is only for regression.") + conf = confidence if confidence is not None else self.confidence_level + return self._conformal.predict_int(X, confidence=conf) + + +class CrossConformalCV(BaseEstimator): + """ + Cross-conformal prediction for both classifiers and regressors using WrapClassifier/WrapRegressor. + Handles Mondrian (class_cond) logic as described. + + Parameters + ---------- + estimator : sklearn-like estimator + Your favorite model (or pipeline). + n_folds : int, optional + Number of cross-validation folds. + confidence_level : float, optional + Confidence level for prediction sets/intervals. + mondrian : bool/callable/MondrianCategorizer, optional + Mondrian calibration/grouping. + nonconformity : callable, optional + Nonconformity function for classification (e.g., hinge, margin, or custom). + difficulty_estimator : callable or DifficultyEstimator, optional + For regression: difficulty estimator for normalized conformal prediction. + binning : int or callable, optional + For regression: number of bins or binning function for Mondrian calibration. + estimator_type : {'classifier', 'regressor'}, optional + What kind of model are we wrapping? + n_bins : int, optional + Number of bins for stratified splitting in regression. + n_jobs : int, optional + Parallelize all the things. + kwargs : dict + Extra toppings for crepes. + """ + def __init__(self, estimator, n_folds=5, confidence_level=0.9, mondrian=False, nonconformity=None, binning=None, estimator_type="classifier", n_bins=10, **kwargs): + self.estimator = estimator + self.n_folds = n_folds + self.confidence_level = confidence_level + self.mondrian = mondrian + self.nonconformity = nonconformity + self.binning = binning + self.estimator_type = estimator_type + self.n_bins = n_bins + self.kwargs = kwargs + + def fit(self, X, y, **fit_params): + X = np.array(X) + y = np.array(y) + self.models_ = [] + if self.estimator_type == "classifier": + splitter = StratifiedKFold(n_splits=self.n_folds, shuffle=True, random_state=42) + y_split = y + elif self.estimator_type == "regressor": + splitter = KFold(n_splits=self.n_folds, shuffle=True, random_state=42) + y_split = bin_targets(y, n_bins=self.n_bins) + else: + raise ValueError("estimator_type must be 'classifier' or 'regressor'") + for train_idx, calib_idx in splitter.split(X, y_split): + if self.estimator_type == "classifier": + model = WrapClassifier(clone(self.estimator)) + model.fit(X[train_idx], y[train_idx]) + # Mondrian logic: only use class_cond=True if mondrian is True + if self.mondrian: + model.calibrate(X[calib_idx], y[calib_idx], nc=self.nonconformity or hinge, class_cond=True) + else: + model.calibrate(X[calib_idx], y[calib_idx], nc=self.nonconformity or hinge, class_cond=False) + else: + model = WrapRegressor(clone(self.estimator)) + model.fit(X[train_idx], y[train_idx]) + # Mondrian logic: use MondrianCategorizer with binning if mondrian + if self.mondrian: + if self.binning is not None: + mc = MondrianCategorizer() + mc.fit(X[calib_idx], f=lambda X: y[calib_idx], no_bins=self.binning) + else: + mc = MondrianCategorizer() + mc.fit(X[calib_idx], f=lambda X: y[calib_idx]) + model.calibrate(X[calib_idx], y[calib_idx], mc=mc) + else: + model.calibrate(X[calib_idx], y[calib_idx]) + self.models_.append(model) + return self + + def predict(self, X): + # Majority vote + result = np.array([m.predict(X) for m in self.models_]) + result = np.asarray(result) + if result.shape == (): + result = np.full((len(self.models_), len(X)), result) + if result.ndim == 1 and len(X) == 1: + result = result[:, np.newaxis] + pred_mode = mode(result, axis=0, keepdims=False) + return np.ravel(pred_mode.mode) + + def predict_proba(self, X): + # Average probabilities + result = np.array([m.predict_proba(X) for m in self.models_]) + if result.ndim == 2 and result.shape[1] == 2 and len(X) == 1: + result = result[:, np.newaxis, :] + proba = np.atleast_2d(np.mean(result, axis=0)) + if proba.shape[0] != len(X): + proba = np.full((len(X), proba.shape[1]), np.nan) + return proba + + def predict_conformal_set(self, X, confidence=None): + # Union of conformal sets from all folds. + sets = [m.predict_set(X, confidence) for m in self.models_] + n = len(X) + union_sets = [] + for i in range(n): + union = set() + for s in sets: + union.update(s[i]) + union_sets.append(list(union)) + return union_sets diff --git a/notebooks/advanced_04_conformal_prediction.ipynb b/notebooks/advanced_04_conformal_prediction.ipynb new file mode 100644 index 00000000..70b6e062 --- /dev/null +++ b/notebooks/advanced_04_conformal_prediction.ipynb @@ -0,0 +1,936 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9288a05c", + "metadata": {}, + "source": [ + "\n", + "# Real-World Example: Conformal Prediction on Renin Inhibitor Data\n", + "\n", + "This notebook demonstrates robust benchmarking of conformal prediction (CP) methods on a real molecular dataset (`renin_harren.csv`). We compare CP to standard uncertainty quantification (UQ) methods and ML models, using advanced metrics. All steps are NaN-safe and ready for direct use.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ab2b079b", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "## 1. Import Required Libraries and Define Utility Functions\n", + "import numpy as np\n", + "import pandas as pd\n", + "from molpipeline.any2mol import SmilesToMol\n", + "from molpipeline.error_handling import ErrorFilter, FilterReinserter\n", + "from molpipeline.mol2any.mol2morgan_fingerprint import MolToMorganFP\n", + "from molpipeline.pipeline import Pipeline\n", + "from molpipeline.post_prediction import PostPredictionWrapper\n", + "from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV\n", + "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n", + "from sklearn.model_selection import train_test_split, StratifiedKFold, KFold\n", + "from sklearn.metrics import (\n", + " log_loss, brier_score_loss, balanced_accuracy_score, roc_auc_score,\n", + " average_precision_score, f1_score, matthews_corrcoef\n", + ")\n", + "import matplotlib.pyplot as plt\n", + "\n", + "def compute_ece(y_true, probs, n_bins=10):\n", + " bins = np.linspace(0, 1, n_bins + 1)\n", + " binids = np.digitize(probs, bins) - 1\n", + " ece = 0.0\n", + " for i in range(n_bins):\n", + " mask = binids == i\n", + " if np.any(mask):\n", + " acc = np.mean(y_true[mask] == (probs[mask] >= 0.5))\n", + " conf = np.mean(probs[mask])\n", + " ece += np.abs(acc - conf) * np.sum(mask) / len(y_true)\n", + " return ece\n", + "\n", + "def compute_uncertainty_error_corr(y_true, probs):\n", + " eps = 1e-12\n", + " entropy = -probs * np.log(probs + eps) - (1 - probs) * np.log(1 - probs + eps)\n", + " error = np.abs(y_true - (probs >= 0.5))\n", + " return np.corrcoef(entropy, error)[0, 1]\n", + "\n", + "def compute_sharpness(probs):\n", + " eps = 1e-12\n", + " entropy = -probs * np.log(probs + eps) - (1 - probs) * np.log(1 - probs + eps)\n", + " return np.mean(entropy)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c2281174", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of X=(138, 256), y_class=(138,), y_reg=(138,)\n" + ] + } + ], + "source": [ + "\n", + "\n", + "## 2. Data Loading, Cleaning, and Featurization\n", + "# Load real data\n", + "df = pd.read_csv(\"example_data/renin_harren.csv\")\n", + "smiles = df[\"pubchem_smiles\"].values\n", + "y_reg = df[\"pIC50\"].values\n", + "\n", + "# Binarize for classification: top 20% as 'active'\n", + "threshold = np.nanquantile(y_reg, 0.8)\n", + "y_class = (y_reg >= threshold).astype(int)\n", + "\n", + "# Featurization pipeline (NaN-safe)\n", + "error_filter = ErrorFilter(filter_everything=True)\n", + "error_replacer = FilterReinserter.from_error_filter(error_filter, fill_value=np.nan)\n", + "featurizer = Pipeline([\n", + " (\"smi2mol\", SmilesToMol()),\n", + " (\"error_filter\", error_filter),\n", + " (\"morgan\", MolToMorganFP(radius=2, n_bits=256, return_as=\"dense\")),\n", + " (\"error_replacer\", PostPredictionWrapper(error_replacer)),\n", + "], n_jobs=1)\n", + "X_feat = featurizer.transform(smiles)\n", + "\n", + "print(f\"Shape of X={X_feat.shape}, y_class={y_class.shape}, y_reg={y_reg.shape}\")\n", + "\n", + "\n", + "\n", + "## 3. Classification: Splitting, Model Benchmarking, and Conformal Prediction\n", + "# Train/test split for classification\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X_feat, y_class, test_size=0.3, random_state=42, stratify=y_class\n", + ")\n", + "skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", + "\n", + "# Split for conformal pipeline (use SMILES)\n", + "smiles_train, smiles_test, y_train_cp, y_test_cp = train_test_split(\n", + " smiles, y_class, test_size=0.3, random_state=42, stratify=y_class\n", + ")\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e4b28946", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "object", + "type": "string" + }, + { + "name": "ensemble_xgb", + "rawType": "float64", + "type": "float" + }, + { + "name": "CrossConformalCV", + "rawType": "float64", + "type": "float" + } + ], + "ref": "2a093951-f074-4655-86ae-6a19ffe410e7", + "rows": [ + [ + "NLL", + "0.6005578592752531", + "0.4523152437780237" + ], + [ + "ECE", + "0.6421230924094008", + "0.5534285714285716" + ], + [ + "Brier", + "0.19658344924590318", + "0.150788" + ], + [ + "Uncertainty Error Correlation", + "0.20310534876453837", + "0.3572542579666429" + ], + [ + "Sharpness", + "0.3291429281234741", + "0.4779567203583455" + ], + [ + "Balanced Accuracy", + "0.6868686868686869", + "0.6212121212121212" + ], + [ + "AUROC", + "0.7255892255892256", + "0.771043771043771" + ], + [ + "AUPRC", + "0.3703203415170961", + "0.4412237544590486" + ], + [ + "F1 Score", + "0.5", + "0.4" + ], + [ + "MCC", + "0.34879284277296124", + "0.2842676218074806" + ] + ], + "shape": { + "columns": 2, + "rows": 10 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Modelensemble_xgbCrossConformalCV
NLL0.6005580.452315
ECE0.6421230.553429
Brier0.1965830.150788
Uncertainty Error Correlation0.2031050.357254
Sharpness0.3291430.477957
Balanced Accuracy0.6868690.621212
AUROC0.7255890.771044
AUPRC0.3703200.441224
F1 Score0.5000000.400000
MCC0.3487930.284268
\n", + "
" + ], + "text/plain": [ + "Model ensemble_xgb CrossConformalCV\n", + "NLL 0.600558 0.452315\n", + "ECE 0.642123 0.553429\n", + "Brier 0.196583 0.150788\n", + "Uncertainty Error Correlation 0.203105 0.357254\n", + "Sharpness 0.329143 0.477957\n", + "Balanced Accuracy 0.686869 0.621212\n", + "AUROC 0.725589 0.771044\n", + "AUPRC 0.370320 0.441224\n", + "F1 Score 0.500000 0.400000\n", + "MCC 0.348793 0.284268" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "### 3.1 Benchmarking Standard Models\n", + "\n", + "from xgboost import XGBClassifier\n", + "\n", + "model_dict = {\n", + " \"ensemble_xgb\": XGBClassifier(eval_metric='logloss', random_state=42),\n", + "}\n", + "metrics_list = [\n", + " \"NLL\", \"ECE\", \"Brier\", \"Uncertainty Error Correlation\", \"Sharpness\",\n", + " \"Balanced Accuracy\", \"AUROC\", \"AUPRC\", \"F1 Score\", \"MCC\"\n", + "]\n", + "results = []\n", + "\n", + "for model_name, model in model_dict.items():\n", + " probs = []\n", + " preds = []\n", + " for train_idx, _ in skf.split(X_train, y_train):\n", + " model.fit(X_train[train_idx], y_train[train_idx])\n", + " prob = model.predict_proba(X_test)\n", + " pred = model.predict(X_test)\n", + " probs.append(prob)\n", + " preds.append(pred)\n", + " probs = np.stack(probs)\n", + " preds = np.stack(preds)\n", + " mean_probs = probs.mean(axis=0)\n", + " mean_pred = np.round(mean_probs[:, 1]).astype(int)\n", + " y_true = y_test\n", + " p1 = mean_probs[:, 1]\n", + " metrics = {\n", + " \"Model\": model_name,\n", + " \"NLL\": log_loss(y_true, p1),\n", + " \"ECE\": compute_ece(y_true, p1),\n", + " \"Brier\": brier_score_loss(y_true, p1),\n", + " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_true, p1),\n", + " \"Sharpness\": compute_sharpness(p1),\n", + " \"Balanced Accuracy\": balanced_accuracy_score(y_true, mean_pred),\n", + " \"AUROC\": roc_auc_score(y_true, p1),\n", + " \"AUPRC\": average_precision_score(y_true, p1),\n", + " \"F1 Score\": f1_score(y_true, mean_pred),\n", + " \"MCC\": matthews_corrcoef(y_true, mean_pred)\n", + " }\n", + " results.append(metrics)\n", + "\n", + "\n", + "\n", + "### 3.2 Conformal Prediction (CrossConformalCV)\n", + "\n", + "rf = RandomForestClassifier(n_estimators=100, random_state=42)\n", + "rf_pipeline = Pipeline([\n", + " (\"featurizer\", featurizer),\n", + " (\"rf\", rf)\n", + "], n_jobs=1)\n", + "cc_clf = CrossConformalCV(\n", + " estimator=rf_pipeline,\n", + " n_folds=5,\n", + " confidence_level=0.9,\n", + " estimator_type=\"classifier\"\n", + ")\n", + "cc_clf.fit(smiles_train, y_train_cp)\n", + "probs_cp_ensemble = np.mean([m.predict_proba(smiles_test) for m in cc_clf.models_], axis=0)\n", + "mean_pred_cp = np.argmax(probs_cp_ensemble, axis=1)\n", + "y_true_cp = y_test_cp\n", + "p1_cp = probs_cp_ensemble[:, 1]\n", + "p1_cp = p1_cp / (p1_cp + (1 - p1_cp)) # Normalize to [0, 1]\n", + "metrics_cp = {\n", + " \"Model\": \"CrossConformalCV\",\n", + " \"NLL\": log_loss(y_true_cp, p1_cp),\n", + " \"ECE\": compute_ece(y_true_cp, p1_cp),\n", + " \"Brier\": brier_score_loss(y_true_cp, p1_cp),\n", + " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_true_cp, p1_cp),\n", + " \"Sharpness\": compute_sharpness(p1_cp),\n", + " \"Balanced Accuracy\": balanced_accuracy_score(y_true_cp, mean_pred_cp),\n", + " \"AUROC\": roc_auc_score(y_true_cp, p1_cp),\n", + " \"AUPRC\": average_precision_score(y_true_cp, p1_cp),\n", + " \"F1 Score\": f1_score(y_true_cp, mean_pred_cp),\n", + " \"MCC\": matthews_corrcoef(y_true_cp, mean_pred_cp)\n", + "}\n", + "results.append(metrics_cp)\n", + "\n", + "results_df = pd.DataFrame(results).set_index(\"Model\").T\n", + "display(results_df)\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2bcaf7d7", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAq4AAAGJCAYAAABLvrEVAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAWkdJREFUeJzt3Xlcjen/P/DXaTvtRVpJRQnZ920sI7IvY5R1iswY+zIM5mOJkDGYLMkwKoyRbZiZkIlhhuxLltFkC419hgrRev3+8Dv319Gicup083o+HufxcO/v+75OenWd676PQgghQERERERUxulouwAiIiIiosJgcCUiIiIiWWBwJSIiIiJZYHAlIiIiIllgcCUiIiIiWWBwJSIiIiJZYHAlIiIiIllgcCUiIiIiWWBwJSIiIiJZYHAles85OzvDz89Pmj548CAUCgUOHjyotZpe93qN2ubn5wdTU1ON7lOhUGD06NFvXC8iIgIKhQI3btyQ5rVt2xZt27aVpm/cuAGFQoGIiIhCHzsgIKBoBZey0nxfBgQEQKFQqM0rbPtoQl5tTEQvMbgSaZHqF5TqZWhoiGrVqmH06NG4f/++tssrkt27d2s9/Lx6LXV0dODg4ICOHTuWqRCuLWWhfVRUwVr10tfXR4UKFdCiRQt89dVXuHXrlsaONX/+fOzcuVNj+9OkslwbUVnF4EpUBsyZMwcbNmzAihUr0KJFC4SGhqJ58+ZIS0sr9Vpat26N58+fo3Xr1kXabvfu3Zg9e3YJVVV4HTp0wIYNG7Bu3Tp8/vnnOH/+PD788EPs2bNH26VpxODBg/H8+XM4OTnlu46TkxOeP3+OwYMHS/MKap/nz59j+vTpGq/1Tfr3748NGzZg7dq1mDFjBqpUqYLg4GDUqFEDkZGRausW931ZnHA4ffp0PH/+vEjbFEd+tRWmjYneV3raLoCIgM6dO6NRo0YAgGHDhsHKygpLlizBzz//jP79++e5zbNnz2BiYqLxWnR0dGBoaKjx/ZaWatWqYdCgQdJ07969UadOHQQHB6Nz5855bvPixQsYGBhAR6fs/y2vq6sLXV3dAtdR9d4Xlrbau0GDBmptBQA3b95Ex44d4evrixo1aqBu3boASud9qfqZ0tPTg56e9n49FqaNid5XZf9/aaL30IcffggASExMBPB/YyqvXbuGLl26wMzMDAMHDgQA5OTkIDg4GB4eHjA0NIStrS2GDx+Ox48fq+1TCIG5c+eiUqVKMDY2Rrt27fDXX3/lOnZ+YwmPHz+OLl26oFy5cjAxMUGdOnWwdOlSqb6QkBAA6h/Xq2i6xqKoXbs2KlSoIF1L1flFRkZi+vTpqFixIoyNjZGamgoA2Lp1Kxo2bAgjIyNUqFABgwYNwu3bt/Pc9/Xr1+Hl5QUTExM4ODhgzpw5EEKorbNo0SK0aNECVlZWMDIyQsOGDbFt27Z86924cSPc3d1haGiIhg0b4s8//1RbXpjxj6+PcX1T++Q1xvX27dsYOnQobG1toVQq4eHhgbCwsFzHWr58OTw8PGBsbIxy5cqhUaNG+PHHH/Ot7U2cnJwQERGBjIwMLFy4UJqf1/vyypUr6NOnD+zs7GBoaIhKlSqhX79+SElJkc7r2bNnWLdunXTOqrHSqnGsly5dwoABA1CuXDm0atVKbVle3tQ+fn5+cHZ2zrXd6/ssqLb82njlypXw8PCAUqmEg4MDRo0aheTkZLV12rZti1q1auHSpUto164djI2NUbFiRbVrSSRn7HElKoOuXbsGALCyspLmZWVlwcvLC61atcKiRYtgbGwMABg+fDgiIiIwZMgQjB07FomJiVixYgXOnj2L2NhY6OvrAwBmzpyJuXPnokuXLujSpQvOnDmDjh07IiMj4431xMTEoFu3brC3t8e4ceNgZ2eH+Ph4REVFYdy4cRg+fDju3LmDmJgYbNiwIdf2pVFjfh4/fozHjx/D1dVVbX5gYCAMDAwwadIkpKenw8DAQKqxcePGCAoKwv3797F06VLExsbi7NmzsLS0lLbPzs5Gp06d0KxZMyxcuBDR0dGYNWsWsrKyMGfOHGm9pUuXokePHhg4cCAyMjIQGRmJvn37IioqCl27dlWr6Y8//sDmzZsxduxYKJVKrFy5Ep06dcKJEydQq1atYl+DN7XP6+7fv49mzZpJNyRZW1tjz5498Pf3R2pqKsaPHw8AWLNmDcaOHYuPP/4Y48aNw4sXL3D+/HkcP34cAwYMKHa9zZs3R9WqVRETE5PvOhkZGfDy8kJ6ejrGjBkDOzs73L59G1FRUUhOToaFhQU2bNiAYcOGoUmTJvjss88AAFWrVlXbT9++feHm5ob58+fn+qPjdZpsn8LU9qqAgADMnj0bnp6eGDFiBBISEhAaGoqTJ0+q/QwBL9/znTp1wkcffQRvb29s27YNU6ZMQe3atfP91IFINgQRaU14eLgAIPbt2ycePnwokpKSRGRkpLCyshJGRkbin3/+EUII4evrKwCIqVOnqm1/6NAhAUBs3LhRbX50dLTa/AcPHggDAwPRtWtXkZOTI6331VdfCQDC19dXmnfgwAEBQBw4cEAIIURWVpZwcXERTk5O4vHjx2rHeXVfo0aNEnn9l1ISNeYHgPD39xcPHz4UDx48EMePHxft27cXAMTixYvVzq9KlSoiLS1N2jYjI0PY2NiIWrVqiefPn0vzo6KiBAAxc+ZMaZ6qPcaMGaN2Lbp27SoMDAzEw4cPpfmvHkN1nFq1aokPP/wwV+0AxKlTp6R5N2/eFIaGhqJ3797SPNV7JjExUZrXpk0b0aZNG2k6MTFRABDh4eHSvPzaR3XsWbNmSdP+/v7C3t5e/Pvvv2rr9evXT1hYWEjn1LNnT+Hh4ZHnPguiqu+bb77Jd52ePXsKACIlJUUIkft9efbsWQFAbN26tcBjmZiY5PnemTVrlgAg+vfvn++yVxW2fXx9fYWTk1Oh9plfba+3sepno2PHjiI7O1tab8WKFQKACAsLk+a1adNGABDr16+X5qWnpws7OzvRp0+fXMcikhsOFSAqAzw9PWFtbQ1HR0f069cPpqam2LFjBypWrKi23ogRI9Smt27dCgsLC3To0AH//vuv9GrYsCFMTU1x4MABAMC+ffuQkZGBMWPGqH1cqeo5K8jZs2eRmJiI8ePHq/U4Asj349TSrvFVa9euhbW1NWxsbNC0aVPExsZi4sSJufbj6+sLIyMjafrUqVN48OABRo4cqTaWsmvXrqhevTp27dqV61ivPh5J1TuZkZGBffv2SfNfPcbjx4+RkpKCDz74AGfOnMm1v+bNm6Nhw4bSdOXKldGzZ0/s3bsX2dnZRboOxSWEwPbt29G9e3cIIdTazMvLCykpKVLtlpaW+Oeff3Dy5EmN16F63NiTJ0/yXG5hYQEA2Lt371vdxPj5558Xel1ttY/qZ2P8+PFq47A//fRTmJub53pvmpqaqo0dNjAwQJMmTXD9+vUSq5GotHCoAFEZEBISgmrVqkFPTw+2trZwd3fPdaOQnp4eKlWqpDbvypUrSElJgY2NTZ77ffDgAYCXN7wAgJubm9pya2trlCtXrsDaVMMWivtRdWnU+KqePXti9OjRUCgUMDMzg4eHR543sbm4uKhNq47v7u6ea93q1avj8OHDavN0dHRQpUoVtXnVqlUDALWxiVFRUZg7dy7i4uKQnp4uzc8r9L9+7qp9pqWl4eHDh7Czs8u1XNMePnyI5ORkrF69GqtXr85zHVWbTZkyBfv27UOTJk3g6uqKjh07YsCAAWjZsuVb1/H06VMAgJmZWZ7LXVxcMHHiRCxZsgQbN27EBx98gB49emDQoEFSqC2M198HBdFW++T33jQwMECVKlWk5SqVKlXK9f4qV64czp8/XyL1EZUmBleiMqBJkybSUwXyo1Qqc4XZnJwc2NjYYOPGjXluY21trbEai6u0a6xUqRI8PT3fuN6rPaEl5dChQ+jRowdat26NlStXwt7eHvr6+ggPD3+rG5hKUk5ODgBg0KBB8PX1zXOdOnXqAABq1KiBhIQEREVFITo6Gtu3b8fKlSsxc+bMt3402sWLF2FjYwNzc/N811m8eDH8/Pzw888/47fffsPYsWMRFBSEY8eO5fojLz+afh/k9ylEafWYA8j3iQTiDWN4ieSAwZVIxqpWrYp9+/ahZcuWBf4CVj0P8sqVK2q9hA8fPsx1Z39exwBeBomCAmF+v7BLo0ZNUB0/ISFBeqqDSkJCQq5naubk5OD69etSLysAXL58GQCku8q3b98OQ0ND7N27F0qlUlovPDw8zxquXLmSa97ly5dhbGz81gG/MMM6gJd/SJiZmSE7O7tQfwCYmJjAx8cHPj4+yMjIwEcffYR58+Zh2rRpxX581dGjR3Ht2rVcj8rKS+3atVG7dm1Mnz4dR44cQcuWLbFq1SrMnTsXQOHPuzAK0z7lypXLdac/gFy9okWp7dX35qs/GxkZGUhMTCxUOxG9KzjGlUjGvL29kZ2djcDAwFzLsrKypF+gnp6e0NfXx/Lly9V6XYKDg994jAYNGsDFxQXBwcG5fiG/ui/Vx/Gvr1MaNWpCo0aNYGNjg1WrVql9pL9nzx7Ex8fnegIAAKxYsUL6txACK1asgL6+Ptq3bw/gZc+XQqFQ6227ceNGvg/EP3r0qNrY16SkJPz888/o2LHjWz/XM7/2eZ2uri769OmD7du34+LFi7mWP3z4UPr3f//9p7bMwMAANWvWhBACmZmZxarz5s2b8PPzg4GBASZPnpzveqmpqcjKylKbV7t2bejo6Ki1n4mJyRvPubAK0z5Vq1ZFSkqK2sfyd+/exY4dO3Ltr7C1eXp6wsDAAMuWLVP72Vi7di1SUlLyfG8SvavY40okY23atMHw4cMRFBSEuLg4dOzYEfr6+rhy5Qq2bt2KpUuX4uOPP4a1tTUmTZqEoKAgdOvWDV26dMHZs2exZ88eVKhQocBj6OjoIDQ0FN27d0e9evUwZMgQ2Nvb4++//8Zff/2FvXv3AoB008rYsWPh5eUFXV1d9OvXr1Rq1AR9fX18/fXXGDJkCNq0aYP+/ftLj8NydnbGhAkT1NY3NDREdHQ0fH190bRpU+zZswe7du3CV199JfW+de3aFUuWLEGnTp0wYMAAPHjwACEhIXB1dc1zvGGtWrXg5eWl9rglABr5RrL82icvCxYswIEDB9C0aVN8+umnqFmzJh49eoQzZ85g3759ePToEQCgY8eOsLOzQ8uWLWFra4v4+HisWLECXbt2zXds6qvOnDmDH374ATk5OUhOTsbJkyexfft2KBQKbNiwQRqSkJfff/8do0ePRt++fVGtWjVkZWVhw4YNUvB+9bz37duHJUuWwMHBAS4uLmjatGlRLp2kMO3Tr18/TJkyBb1798bYsWORlpaG0NBQVKtWLdcNeYWtzdraGtOmTcPs2bPRqVMn9OjRAwkJCVi5ciUaN25cqJ5poneG1p5nQETSY29OnjxZ4Hq+vr7CxMQk3+WrV68WDRs2FEZGRsLMzEzUrl1bfPnll+LOnTvSOtnZ2WL27NnC3t5eGBkZibZt24qLFy8KJyenAh+HpXL48GHRoUMHYWZmJkxMTESdOnXE8uXLpeVZWVlizJgxwtraWigUilyP/tFkjfkBIEaNGlXgOqrzy+8xSps3bxb169cXSqVSlC9fXgwcOFB6LJmKqj2uXbsmOnbsKIyNjYWtra2YNWuW2uOKhBBi7dq1ws3NTSiVSlG9enURHh6e7+OWRo0aJX744Qdp/fr16+dqh+I+Dqug9sFrj8MSQoj79++LUaNGCUdHR6Gvry/s7OxE+/btxerVq6V1vvvuO9G6dWthZWUllEqlqFq1qpg8ebL0CKv8qOpTvfT09ET58uVF06ZNxbRp08TNmzdzbfP6+/L69eti6NChomrVqsLQ0FCUL19etGvXTuzbt09tu7///lu0bt1aGBkZqT1WTdUGrz66TOVt2kcIIX777TdRq1YtYWBgINzd3cUPP/yQ5z7zqy2vNhbi5eOvqlevLvT19YWtra0YMWJErkfUtWnTJs9HlOX3mC4iuVEIwdHaRERERFT2cYwrEREREckCgysRERERyQKDKxERERHJAoMrEREREckCgysRERERyQKDKxERERHJwjv/BQQ5OTm4c+cOzMzMNPrVf0RERESkGUIIPHnyBA4ODtDRyb9f9Z0Prnfu3IGjo6O2yyAiIiKiN0hKSkKlSpXyXf7OB1fV1w4mJSXB3Nxcy9UQERER0etSU1Ph6Oj4xq+LfueDq2p4gLm5OYMrERERURn2pmGdvDmLiIiIiGSBwZWIiIiIZIHBlYiIiIhk4Z0f40pEROqEEMjKykJ2dra2SyGi94Suri709PTe+tGkDK5ERO+RjIwM3L17F2lpadouhYjeM8bGxrC3t4eBgUGx98HgSkT0nsjJyUFiYiJ0dXXh4OAAAwMDfjELEZU4IQQyMjLw8OFDJCYmws3NrcAvGSgIgysR0XsiIyMDOTk5cHR0hLGxsbbLIaL3iJGREfT19XHz5k1kZGTA0NCwWPvhzVlERO+Z4vZ0EBG9DU3838P/vYiIiIhIFhhciYiIiEgWGFyJiIhkQAiBzz77DOXLl4dCoUBcXJy2S8rFz88PvXr10nYZb02hUGDnzp1vtY+AgADUq1evwHVev15t27bF+PHjpWlnZ2cEBwe/9XHeJbw5qyT86FO6xxuwuXSPR0TvHP+Ik6V6vLV+jYu8zb179zBv3jzs2rULt2/fho2NDerVq4fx48ejffv2JVBl2aorOjoaEREROHjwIKpUqYIKFSpobN8lqaDr88EHH8DBwQGTJk3C1KlTc20bGBiIFStW4J9//oG+vn6u5a8+FcPc3By1atVCYGAgPvzwwxI9J01ZunQphBD5Lj958iRMTEykaYVCgR07dqiF3UmTJmHMmDElWWaZwh5XIiIq827cuIGGDRvi999/xzfffIMLFy4gOjoa7dq1w6hRo/LdLjMzs0zWVRzXrl2Dvb09WrRoATs7O+jpFb3vSfXlE6XlTdfHwMAAgwYNQnh4eJ61RkRE4JNPPskztKqEh4fj7t27iI2NRYUKFdCtWzdcv349z3VL+v1QVBYWFrC0tMx3ubW19RufAGJqagorKysNV1Z2MbgSEVGZN3LkSCgUCpw4cQJ9+vRBtWrV4OHhgYkTJ+LYsWPSegqFAqGhoejRowdMTEwwb948AEBoaCiqVq0KAwMDuLu7Y8OGDdI2QggEBASgcuXKUCqVcHBwwNixY6XlK1euhJubGwwNDWFra4uPP/64yHXdunULPXv2hKmpKczNzeHt7Y379+9Ly1Uf927YsAHOzs6wsLBAv3798OTJEwAvP1IeM2YMbt26BYVCAWdnZwBAeno6xo4dCxsbGxgaGqJVq1Y4efL/es8PHjwIhUKBPXv2oGHDhlAqlTh8+DDatm2LMWPGYPz48ShXrhxsbW2xZs0aPHv2DEOGDIGZmRlcXV2xZ88eaV/Z2dnw9/eHi4sLjIyM4O7ujqVLl751u/n7++Py5cs4fPiw2rZ//PEHrl+/Dn9//wKPYWlpCTs7O9SqVQuhoaF4/vw5YmJiABTv/aBy9+5ddO7cGUZGRqhSpQq2bdumtnzKlCmoVq0ajI2NUaVKFcyYMSPPYPzdd99Jj6Dz9vZGSkqKtOxNQyteHSqgavPevXurvQfyGirw/fffo0aNGjA0NET16tWxcuVKaVlGRgZGjx4Ne3t7GBoawsnJCUFBQfnWUNYwuBIRUZn26NEjREdHY9SoUWofm6q83mMVEBCA3r1748KFCxg6dCh27NiBcePG4YsvvsDFixcxfPhwDBkyBAcOHAAAbN++Hd9++y2+++47XLlyBTt37kTt2rUBAKdOncLYsWMxZ84cJCQkIDo6Gq1bty5SXTk5OejZsycePXqEP/74AzExMbh+/Tp8fNSHlV27dg07d+5EVFQUoqKi8Mcff2DBggUAXn6kPGfOHFSqVAl3796VwumXX36J7du3Y926dThz5gxcXV3h5eWFR48eqe176tSpWLBgAeLj41GnTh0AwLp161ChQgWcOHECY8aMwYgRI9C3b1+0aNECZ86cQceOHTF48GDpW9ZycnJQqVIlbN26FZcuXcLMmTPx1VdfYcuWLW/VbrVr10bjxo0RFhamtjw8PBwtWrRA9erV89x/XoyMjAC8DGcqRX0/qMyYMQN9+vTBuXPnMHDgQPTr1w/x8fHScjMzM0RERODSpUtYunQp1qxZg2+//VZtH1evXsWWLVvw66+/Ijo6GmfPnsXIkSMLfT6vUrW5qof51T9QXrVx40bMnDkT8+bNQ3x8PObPn48ZM2Zg3bp1AIBly5bhl19+wZYtW5CQkICNGzdKIVgOOMaViIjKtKtXr0IIUegAM2DAAAwZMkSa7t+/P/z8/KTAoOrtW7RoEdq1a4dbt27Bzs4Onp6e0NfXR+XKldGkSRMAL3tKTUxM0K1bN5iZmcHJyQn169cvUl379+/HhQsXkJiYCEdHRwDA+vXr4eHhgZMnT6Jx45fjfXNychAREQEzMzMAwODBg7F//37MmzcPFhYWMDMzg66uLuzs7AAAz549Q2hoKCIiItC5c2cAwJo1axATE4O1a9di8uTJUg1z5sxBhw4d1OqqW7cupk+fDgCYNm0aFixYgAoVKuDTTz8FAMycOROhoaE4f/48mjVrBn19fcyePVva3sXFBUePHsWWLVvg7e2d67yL0m7+/v6YNGkSli1bBlNTUzx58gTbtm3DsmXL3ritSlpaGqZPnw5dXV20adNGml/U94NK3759MWzYMAAvx9rGxMRg+fLlUu+l6toBL3tDJ02ahMjISHz55ZfS/BcvXmD9+vWoWLEiAGD58uXo2rUrFi9eLLVjYVlbWwP4vx7m/MyaNQuLFy/GRx99BOBlO126dAnfffcdfH19cevWLbi5uaFVq1ZQKBRwcnIqUh3axh5XIiIq0wq6eSUvjRo1UpuOj49Hy5Yt1ea1bNlS6j3r27cvnj9/jipVquDTTz/Fjh07pHGgHTp0gJOTE6pUqYLBgwdj48aNUg9kYeuKj4+Ho6OjFFoBoGbNmrC0tFTrwXN2dpZCKwDY29vjwYMH+e732rVryMzMVDs3fX19NGnSRG2/eV0TAFLPKwDo6urCyspK6mkGAFtbWwBQqyEkJAQNGzaEtbU1TE1NsXr1aty6dSvP+orSbv3790d2drbUe7t582bo6Ojk6pXOb1tTU1OYmZlh+/btWLt2rdq5FfX9oNK8efNc06+us3nzZrRs2RJ2dnYwNTXF9OnTc12LypUrS6FVtY+cnBwkJCS88byK49mzZ7h27Rr8/f1hamoqvebOnYtr164BeDk8IS4uDu7u7hg7dix+++23EqmlpDC4EhFRmebm5gaFQoG///67UOvn9bF0QRwdHZGQkICVK1fCyMgII0eOROvWrZGZmQkzMzOcOXMGmzZtgr29PWbOnIm6desiOTm5yHW9yes3ICkUCuTk5Ghk33ldk7yO9+o81R37qhoiIyMxadIk+Pv747fffkNcXByGDBmi9rH8q4pyfczNzfHxxx9LN2mFh4fD29sbpqamb9z222+/RVxcHO7du4d79+7B19dXbXlR3w+FcfToUQwcOBBdunRBVFQUzp49i//973/5XovS8vTpUwAve97j4uKk18WLF6UxxQ0aNEBiYiICAwPx/PlzeHt7q43bLusYXImIqEwrX748vLy8EBISgmfPnuVanpycXOD2NWrUQGxsrNq82NhY1KxZU5o2MjJC9+7dsWzZMhw8eBBHjx7FhQsXAAB6enrw9PTEwoULcf78edy4cQO///57oeuqUaMGkpKSkJSUJC27dOkSkpOT1WooKtXNRa+eW2ZmJk6ePPlW+81PbGwsWrRogZEjR6J+/fpwdXWVevHyUtR28/f3x+HDhxEVFYUjR4688aYsFTs7O7i6ukofpb9JYd4PANRurlNN16hRAwBw5MgRODk54X//+x8aNWoENzc33Lx5M9exbt26hTt37qjtQ0dHB+7u7oWq9XX6+vrIzs7Od7mtrS0cHBxw/fp1uLq6qr1cXFyk9czNzeHj44M1a9Zg8+bN2L59e65x0WUVx7gSEVGZFxISgpYtW6JJkyaYM2cO6tSpg6ysLMTExCA0NDTXx7yvmjx5Mry9vVG/fn14enri119/xU8//YR9+/YBACIiIpCdnY2mTZvC2NgYP/zwA4yMjODk5ISoqChcv34drVu3Rrly5bB7927k5ORIwaMwdXl6eqJ27doYOHAggoODkZWVhZEjR6JNmzZ5foRfWCYmJhgxYgQmT56M8uXLo3Llyli4cCHS0tIKHfqKws3NDevXr8fevXvh4uKCDRs24OTJk2qB6HVFabfWrVvD1dUVn3zyCapXr44WLVpo/ByAN78fVLZu3YpGjRqhVatW2LhxI06cOIG1a9dK1+LWrVuIjIxE48aNsWvXLuzYsSPXsQwNDeHr64tFixYhNTUVY8eOhbe3d5HHt6o4Oztj//79aNmyJZRKJcqVK5drndmzZ2Ps2LGwsLBAp06dkJ6ejlOnTuHx48eYOHEilixZAnt7e9SvXx86OjrYunUr7OzsCnwsV1nC4EpERMX6QoDSVKVKFZw5cwbz5s3DF198gbt378La2hoNGzZEaGhogdv26tULS5cuxaJFizBu3Di4uLggPDwcbdu2BfDyZpcFCxZg4sSJyM7ORu3atfHrr7/CysoKlpaW+OmnnxAQEIAXL17Azc0NmzZtgoeHR6HrUigU+PnnnzFmzBi0bt0aOjo66NSpE5YvX/7W12XBggXIycnB4MGD8eTJEzRq1Ah79+7NM9C8reHDh+Ps2bPw8fGBQqFA//79MXLkSLVHZr2uKO2mUCgwdOhQfPXVV5g2bZrG61d50/tBZfbs2YiMjMTIkSNhb2+PTZs2Sb2yPXr0wIQJEzB69Gikp6eja9eumDFjBgICAtT24erqio8++ghdunTBo0eP0K1bN7VHUxXV4sWLMXHiRKxZswYVK1bEjRs3cq0zbNgwGBsb45tvvsHkyZNhYmKC2rVrS9/IZWZmhoULF+LKlSvQ1dVF48aNsXv3bujoyONDeIUo6qh3mUlNTYWFhQVSUlJgbm5eOgflN2cRURn04sULJCYmwsXFBYaGhtouh4jeMwX9H1TYvCaPeE1ERERE7z0GVyIiIiKSBQZXIiIiIpIFBlciIiIikgUGVyIiIiKSBQZXIiIiIpIFBlciIiIikgUGVyIiIiKSBa0G1z///BPdu3eHg4MDFAoFdu7cKS3LzMzElClTULt2bZiYmMDBwQGffPKJ2nf+EhEREdH7Q6tf+frs2TPUrVsXQ4cOxUcffaS2LC0tDWfOnMGMGTNQt25dPH78GOPGjUOPHj1w6tQpLVVMRPSO4jf+vRcUCgV27NiBXr165bn8xo0bcHFxwdmzZ1GvXr1Sra2onJ2dMX78eOmrTN8kICAAO3fuRFxcXInW9TpNXVM/Pz8kJyerdfK9rm3btqhXrx6Cg4MB5L5Gb2r/wh5Hm7Ta49q5c2fMnTsXvXv3zrXMwsICMTEx8Pb2hru7O5o1a4YVK1bg9OnTuHXrlhaqJSIibfHz84NCoZBeVlZW6NSpE86fP6+xYwQEBBQqWAQEBKjVonpVr15dY7XI2cGDB6FQKFCuXDm8ePFCbdnJkyel61XW3LhxI9d7rGPHjjh79qy2Syu0n376CYGBgfkuv3v3Ljp37gzg/8739SC/dOlSRERElGCVb0dWY1xTUlKgUChgaWmZ7zrp6elITU1VexERkfx16tQJd+/exd27d7F//37o6emhW7duWqnFw8NDqkX1Onz4sFZqKavMzMywY8cOtXlr165F5cqVtVRR4ezbtw93797F3r178fTpU3Tu3BnJycl5rpuZmVm6xb1B+fLlYWZmlu9yOzs7KJXKAvdhYWFRYM7SNtkE1xcvXmDKlCno378/zM3N810vKCgIFhYW0svR0bEUqyQiopKiVCphZ2cHOzs71KtXD1OnTkVSUhIePnworZOUlARvb29YWlqifPny6NmzJ27cuCEtP3jwIJo0aQITExNYWlqiZcuWuHnzJiIiIjB79mycO3dO6nErqNdJT09PqkX1qlChgrTc2dkZ8+fPx9ChQ2FmZobKlStj9erV0vKMjAyMHj0a9vb2MDQ0hJOTE4KCgqTlycnJGDZsGKytrWFubo4PP/wQ586dk5areofDwsJQuXJlmJqaYuTIkcjOzsbChQthZ2cHGxsbzJs3L1ftql43IyMjVKlSBdu2bSvwul+8eBGdO3eGqakpbG1tMXjwYPz7778FbgMAvr6+CAsLk6afP3+OyMhI+Pr65lp3+/bt8PDwgFKphLOzMxYvXqy2/MGDB+jevTuMjIzg4uKCjRs35trHm65ZYVlZWcHOzg6NGjXCokWLcP/+fRw/flzqody8eTPatGkDQ0NDbNy4ETk5OZgzZw4qVaoEpVKJevXqITo6Otd+//77b7Ro0QKGhoaoVasW/vjjD2lZdnY2/P394eLiAiMjI7i7u2Pp0qV51jd79mzpHD///HNkZGRIy9q2bVvg0IlX7ydycXEBANSvXx8KhQJt27YF8PLTjVeHEuTk5CAoKEiqrW7dumrvmcePH2PgwIGwtraGkZER3NzcEB4e/qbLXGyyCK6ZmZnw9vaGEAKhoaEFrjtt2jSkpKRIr6SkpFKqkoiISsvTp0/xww8/wNXVFVZWVgBe/q7w8vKCmZkZDh06hNjYWJiamqJTp07IyMhAVlYWevXqhTZt2uD8+fM4evQoPvvsMygUCvj4+OCLL75Q60n18Xm7cb+LFy9Go0aNcPbsWYwcORIjRoxAQkICAGDZsmX45ZdfsGXLFiQkJGDjxo1wdnaWtu3bty8ePHiAPXv24PTp02jQoAHat2+PR48eSetcu3YNe/bsQXR0NDZt2oS1a9eia9eu+Oeff/DHH3/g66+/xvTp03H8+HG1umbMmIE+ffrg3LlzGDhwIPr164f4+Pg8zyE5ORkffvgh6tevj1OnTiE6Ohr379+Ht7f3G89/8ODBOHTokDS8b/v27XB2dkaDBg3U1jt9+jS8vb3Rr18/XLhwAQEBAZgxY4baHw5+fn5ISkrCgQMHsG3bNqxcuRIPHjxQ209hrllRGRkZAYBaOJw6dSrGjRuH+Ph4eHl5YenSpVi8eDEWLVqE8+fPw8vLCz169MCVK1fU9jV58mR88cUXOHv2LJo3b47u3bvjv//+A/AyHFaqVAlbt27FpUuXMHPmTHz11VfYsmWL2j7279+P+Ph4HDx4EJs2bcJPP/2E2bNnF+vcTpw4AeD/eph/+umnPNcLCgrC+vXrsWrVKvz111+YMGECBg0aJAXvGTNm4NKlS9izZw/i4+MRGhqq9kecpmn15qzCUIXWmzdv4vfffy+wtxV4+Rf5m7rBiYhIfqKiomBqagrg5c299vb2iIqKgo7Oyz6YzZs3IycnB99//700hjI8PByWlpY4ePAgGjVqhJSUFHTr1g1Vq1YFANSoUUPav6mpqdST+iYXLlyQalEZNGgQVq1aJU136dIFI0eOBABMmTIF3377LQ4cOAB3d3fcunULbm5uaNWqFRQKBZycnKTtDh8+jBMnTuDBgwfS77NFixZh586d2LZtGz777DMAL8NOWFgYzMzMULNmTbRr1w4JCQnYvXs3dHR04O7ujq+//hoHDhxA06ZNpf337dsXw4YNAwAEBgYiJiYGy5cvx8qVK3Od54oVK1C/fn3Mnz9fmhcWFgZHR0dcvnwZ1apVy/ca2djYoHPnzoiIiMDMmTMRFhaGoUOH5lpvyZIlaN++PWbMmAEAqFatGi5duoRvvvkGfn5+uHz5Mvbs2YMTJ06gcePGAF4OOXi17Qp7zYoiOTkZgYGBMDU1RZMmTfD8+XMAwPjx49VuKF+0aBGmTJmCfv36AYB0zYODgxESEiKtN3r0aPTp0wcAEBoaiujoaKxduxZffvkl9PX11QKoi4sLjh49ii1btqj9kWBgYICwsDAYGxvDw8MDc+bMweTJkxEYGCj9HBSWtbU1gP/rYc5Leno65s+fj3379qF58+YAgCpVquDw4cP47rvv0KZNG9y6dQv169dHo0aNAEDtD7CSUKaDqyq0XrlyBQcOHJD+qiYiovdPu3btpE/dHj9+jJUrV6Jz5844ceIEnJyccO7cOVy9ejXXGL8XL17g2rVr6NixI/z8/ODl5YUOHTrA09MT3t7esLe3L3It7u7u+OWXX9Tmvd6xUqdOHenfCoUCdnZ2Ui+hn58fOnToAHd3d3Tq1AndunVDx44dAQDnzp3D06dPc/3Oe/78Oa5duyZNOzs7q52rra0tdHV11QKMra1trp5JVQB5dTq/O+3PnTuHAwcO5ArpwMse34KCKwAMHToU48aNw6BBg3D06FFs3boVhw4dUlsnPj4ePXv2VJvXsmVLBAcHIzs7G/Hx8dDT00PDhg2l5dWrV1cbh1nYa1YYLVq0gI6ODp49e4YqVapg8+bNsLW1lYacqAIaAKSmpuLOnTto2bJlrvpfH6bw6nXX09NDo0aN1Hq6Q0JCEBYWhlu3buH58+fIyMjIdbNg3bp1YWxsrLbPp0+fIikpSe2PH025evUq0tLS0KFDB7X5GRkZqF+/PgBgxIgR6NOnD86cOYOOHTuiV69eaNGihcZrUdFqcH369CmuXr0qTScmJiIuLg7ly5eHvb09Pv74Y5w5cwZRUVHIzs7GvXv3ALwcfGxgYKCtsomISAtMTEzg6uoqTX///fewsLDAmjVrMHfuXDx9+hQNGzbMc/yjqncpPDwcY8eORXR0NDZv3ozp06cjJiYGzZo1K1ItBgYGarXkRV9fX21aoVAgJycHANCgQQMkJiZiz5492LdvH7y9veHp6Ylt27bh6dOnsLe3x8GDB3Pt89Wwltf+CzpmcTx9+hTdu3fH119/nWtZYQJ/586d8dlnn8Hf3x/du3cvsQ6owl6zwti8eTNq1qwJKyurPLc1MTEpXpEFiIyMxKRJk7B48WI0b94cZmZm+Oabb3IN8yhtT58+BQDs2rULFStWVFum6tnu3Lkzbt68id27dyMmJgbt27fHqFGjsGjRohKpSavB9dSpU2jXrp00PXHiRAAvB3QHBARIf82+/hfHgQMHpEHERET0flIoFNDR0ZE+wm3QoAE2b94MGxubAoeV1a9fH/Xr18e0adPQvHlz/Pjjj2jWrBkMDAyQnZ1dWuXD3NwcPj4+8PHxwccff4xOnTrh0aNHaNCgAe7duwc9Pb0S+dj12LFj+OSTT9SmVb1nr2vQoIE0NlVPr+iRQU9PD5988gkWLlyIPXv25LlOjRo1EBsbqzYvNjYW1apVg66uLqpXr46srCycPn1aGiqQkJCgdqe/Jq+Zo6OjNJTkTczNzeHg4IDY2Fi0adNGrf4mTZqorXvs2DG0bt0aAKTzGT16tLR+ixYtpKElAPLsKT537hyeP38ujb09duwYTE1Ni3UjuqoDsKD3fM2aNaFUKnHr1i2183udtbU1fH194evriw8++ACTJ09+N4Nr27ZtIYTId3lBy4iI6P2Snp4uffL2+PFjrFixQuoRBICBAwfim2++Qc+ePaW7vG/evImffvoJX375JTIzM7F69Wr06NEDDg4OSEhIwJUrV6QQ5+zsLH3yV6lSJZiZmeV7z0RWVpZUi4pCoYCtrW2hzmXJkiWwt7dH/fr1oaOjg61bt8LOzg6Wlpbw9PRE8+bN0atXLyxcuBDVqlXDnTt3sGvXLvTu3Vvto+ri2Lp1Kxo1aoRWrVph48aNOHHiBNauXZvnuqNGjcKaNWvQv39/fPnllyhfvjyuXr2KyMhIfP/999DV1X3j8QIDAzF58uR8e1u/+OILNG7cGIGBgfDx8cHRo0exYsUKacytajjF8OHDERoaCj09PYwfP14KbwBK/JoVZPLkyZg1axaqVq2KevXqITw8HHFxcbl6/kNCQuDm5oYaNWrg22+/xePHj6Uxv25ubli/fj327t0LFxcXbNiwASdPnpTu/FfJyMiAv78/pk+fjhs3bmDWrFkYPXp0kce3Ai/HIBsZGSE6OhqVKlWCoaEhLCws1NYxMzPDpEmTMGHCBOTk5KBVq1ZISUlBbGwszM3N4evri5kzZ6Jhw4bw8PBAeno6oqKi1MYfa1qZHuNKRESlRAbfZBUdHS19PG1mZobq1atj69at0idwxsbG+PPPPzFlyhR89NFHePLkCSpWrIj27dvD3Nwcz58/x99//41169bhv//+g729PUaNGoXhw4cDAPr06YOffvoJ7dq1Q3JyMsLDw+Hn55dnLX/99Veuj8qVSmWuB+7nx8zMDAsXLsSVK1egq6uLxo0bSzdVAcDu3bvxv//9D0OGDMHDhw9hZ2eH1q1bFzoYF2T27NmIjIzEyJEjYW9vj02bNqFmzZp5rqvqTZwyZQo6duyI9PR0ODk5oVOnToUOSwYGBgXeZd6gQQNs2bIFM2fORGBgIOzt7TFnzhy1ax8eHo5hw4ahTZs2sLW1xdy5c6WbuYCXfzSU5DUryNixY5GSkoIvvvgCDx48QM2aNfHLL7/Azc1Nbb0FCxZgwYIFiIuLg6urK3755RfpugwfPhxnz56Fj48PFAoF+vfvj5EjR+bqpW7fvj3c3NzQunVrpKeno3///ggICChW3Xp6eli2bBnmzJmDmTNn4oMPPshzqEVgYCCsra0RFBSE69evw9LSEg0aNMBXX30F4GX7Tps2DTdu3ICRkRE++OADREZGFqumwlCId7xbMzU1FRYWFkhJSXnjEwk0hl+dSERl0IsXL5CYmAgXFxcYGhpquxwies8U9H9QYfOaLJ7jSkRERETE4EpEREREssDgSkRERESywOBKRERERLLA4EpE9J55x+/JJaIyShP/9zC4EhG9J1TfqpSWlqblSojofaT6v+f1b3grCj7HlYjoPaGrqwtLS0vpu+uNjY2hUCi0XBURveuEEEhLS8ODBw9gaWlZqC+uyA+DKxHRe8TOzg4ApPBKRFRaLC0tpf+DiovBlYjoPaJQKGBvbw8bGxtkZmZquxwiek/o6+u/VU+rCoMrEdF7SFdXVyO/RIiIShNvziIiIiIiWWBwJSIiIiJZYHAlIiIiIllgcCUiIiIiWWBwJSIiIiJZYHAlIiIiIllgcCUiIiIiWWBwJSIiIiJZYHAlIiIiIllgcCUiIiIiWWBwJSIiIiJZYHAlIiIiIllgcCUiIiIiWWBwJSIiIiJZYHAlIiIiIllgcCUiIiIiWWBwJSIiIiJZYHAlIiIiIllgcCUiIiIiWWBwJSIiIiJZYHAlIiIiIllgcCUiIiIiWdBqcP3zzz/RvXt3ODg4QKFQYOfOnWrLhRCYOXMm7O3tYWRkBE9PT1y5ckU7xRIRERGRVmk1uD579gx169ZFSEhInssXLlyIZcuWYdWqVTh+/DhMTEzg5eWFFy9elHKlRERERKRteto8eOfOndG5c+c8lwkhEBwcjOnTp6Nnz54AgPXr18PW1hY7d+5Ev379SrNUIiIiItKyMjvGNTExEffu3YOnp6c0z8LCAk2bNsXRo0fz3S49PR2pqalqLyIiIiKSvzIbXO/duwcAsLW1VZtva2srLctLUFAQLCwspJejo2OJ1klEREREpaPMBtfimjZtGlJSUqRXUlKStksiIiIiIg0os8HVzs4OAHD//n21+ffv35eW5UWpVMLc3FztRURERETyV2aDq4uLC+zs7LB//35pXmpqKo4fP47mzZtrsTIiIiIi0gatPlXg6dOnuHr1qjSdmJiIuLg4lC9fHpUrV8b48eMxd+5cuLm5wcXFBTNmzICDgwN69eqlvaKJiIiISCu0GlxPnTqFdu3aSdMTJ04EAPj6+iIiIgJffvklnj17hs8++wzJyclo1aoVoqOjYWhoqK2SiYiIiEhLFEIIoe0iSlJqaiosLCyQkpJSeuNdf/QpneOoDNhcuscjIiIi0qDC5rUyO8aViIiIiOhVDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLZTq4ZmdnY8aMGXBxcYGRkRGqVq2KwMBACCG0XRoRERERlTI9bRdQkK+//hqhoaFYt24dPDw8cOrUKQwZMgQWFhYYO3astssjIiIiolJUpoPrkSNH0LNnT3Tt2hUA4OzsjE2bNuHEiRNaroyIiIiISluZHirQokUL7N+/H5cvXwYAnDt3DocPH0bnzp3z3SY9PR2pqalqLyIiIiKSvzLd4zp16lSkpqaievXq0NXVRXZ2NubNm4eBAwfmu01QUBBmz55dilWWAT/6lO7xBmwu1mb+ESc1WsZav8Ya3R8RERGVbWW6x3XLli3YuHEjfvzxR5w5cwbr1q3DokWLsG7duny3mTZtGlJSUqRXUlJSKVZMRERERCWlTPe4Tp48GVOnTkW/fv0AALVr18bNmzcRFBQEX1/fPLdRKpVQKpWlWSYRERERlYIy3eOalpYGHR31EnV1dZGTk6OlioiIiIhIW8p0j2v37t0xb948VK5cGR4eHjh79iyWLFmCoUOHars0IiIiIiplZTq4Ll++HDNmzMDIkSPx4MEDODg4YPjw4Zg5c6a2SyMiIiKiUlamg6uZmRmCg4MRHBys7VKIiIiISMvK9BhXIiIiIiIVBlciIiIikoViBdcqVargv//+yzU/OTkZVapUeeuiiIiIiIheV6zgeuPGDWRnZ+ean56ejtu3b791UUREREREryvSzVm//PKL9O+9e/fCwsJCms7Ozsb+/fvh7OysseKIiIiIiFSKFFx79eoFAFAoFLm+uUpfXx/Ozs5YvHixxoojIiIiIlIpUnBVfWOVi4sLTp48iQoVKpRIUUREREREryvWc1wTExM1XQcRERERUYGK/QUE+/fvx/79+/HgwQOpJ1YlLCzsrQsjIiIiInpVsYLr7NmzMWfOHDRq1Aj29vZQKBSarouIiIiISE2xguuqVasQERGBwYMHa7oeIiIiIqI8Fes5rhkZGWjRooWmayEiIiIiylexguuwYcPw448/aroWIiIiIqJ8FWuowIsXL7B69Wrs27cPderUgb6+vtryJUuWaKQ4IiIiIiKVYgXX8+fPo169egCAixcvqi3jjVpEREREVBKKFVwPHDig6TqIiIiIiApUrDGuRERERESlrVg9ru3atStwSMDvv/9e7IKIiIiIiPJSrOCqGt+qkpmZibi4OFy8eBG+vr6aqIuIiIiISE2xguu3336b5/yAgAA8ffr0rQoiIiIiIsqLRse4Dho0CGFhYZrcJRERERERAA0H16NHj8LQ0FCTuyQiIiIiAlDMoQIfffSR2rQQAnfv3sWpU6cwY8YMjRRGRERERPSqYgVXCwsLtWkdHR24u7tjzpw56Nixo0YKIyIiIiJ6VbGCa3h4uKbrICIiIiIqULGCq8rp06cRHx8PAPDw8ED9+vU1UhQRERER0euKFVwfPHiAfv364eDBg7C0tAQAJCcno127doiMjIS1tbUmayQiIiIiKt5TBcaMGYMnT57gr7/+wqNHj/Do0SNcvHgRqampGDt2rKZrJCIiIiIqXo9rdHQ09u3bhxo1akjzatasiZCQEN6cRUREREQlolg9rjk5OdDX1881X19fHzk5OW9dFBERERHR64oVXD/88EOMGzcOd+7ckebdvn0bEyZMQPv27TVWHBERERGRSrGC64oVK5CamgpnZ2dUrVoVVatWhYuLC1JTU7F8+XJN10hEREREVLwxro6Ojjhz5gz27duHv//+GwBQo0YNeHp6arQ4IiIiIiKVIvW4/v7776hZsyZSU1OhUCjQoUMHjBkzBmPGjEHjxo3h4eGBQ4cOlVStRERERPQeK1JwDQ4Oxqeffgpzc/NcyywsLDB8+HAsWbJEY8UBL8fODho0CFZWVjAyMkLt2rVx6tQpjR6DiIiIiMq+IgXXc+fOoVOnTvku79ixI06fPv3WRak8fvwYLVu2hL6+Pvbs2YNLly5h8eLFKFeunMaOQURERETyUKQxrvfv38/zMVjSzvT08PDhw7cuSuXrr7+Go6MjwsPDpXkuLi4a2z8RERERyUeRelwrVqyIixcv5rv8/PnzsLe3f+uiVH755Rc0atQIffv2hY2NDerXr481a9YUuE16ejpSU1PVXkREREQkf0UKrl26dMGMGTPw4sWLXMueP3+OWbNmoVu3bhor7vr16wgNDYWbmxv27t2LESNGYOzYsVi3bl2+2wQFBcHCwkJ6OTo6aqweIiIiItIehRBCFHbl+/fvo0GDBtDV1cXo0aPh7u4OAPj7778REhKC7OxsnDlzBra2thopzsDAAI0aNcKRI0ekeWPHjsXJkydx9OjRPLdJT09Henq6NJ2amgpHR0ekpKTkeVNZifjRp3SOoy0DNhdrM/+IkxotY61fY43uj4iIiLQjNTUVFhYWb8xrRRrjamtriyNHjmDEiBGYNm0aVJlXoVDAy8sLISEhGgutAGBvb4+aNWuqzatRowa2b9+e7zZKpRJKpVJjNRARERFR2VDkLyBwcnLC7t278fjxY1y9ehVCCLi5uZXInf4tW7ZEQkKC2rzLly/DyclJ48ciIiIiorKtWN+cBQDlypVD48Yl+1HthAkT0KJFC8yfPx/e3t44ceIEVq9ejdWrV5focYmIiIio7CnSzVmlrXHjxtixYwc2bdqEWrVqITAwEMHBwRg4cKC2SyMiIiKiUlbsHtfS0q1bN40+qYCIiIiI5KlM97gSEREREakwuBIRERGRLDC4EhEREZEsMLgSERERkSwwuBIRERGRLDC4EhEREZEsMLgSERERkSwwuBIRERGRLDC4EhEREZEsMLgSERERkSwwuBIRERGRLDC4EhEREZEsMLgSERERkSwwuBIRERGRLDC4EhEREZEsMLgSERERkSzoabsAentxScka32c9R0uN75OIiIjobbDHlYiIiIhkgcGViIiIiGSBwZWIiIiIZIHBlYiIiIhkgcGViIiIiGSBwZWIiIiIZIHBlYiIiIhkgcGViIiIiGSBwZWIiIiIZIHBlYiIiIhkgcGViIiIiGSBwZWIiIiIZIHBlYiIiIhkgcGViIiIiGSBwZWIiIiIZIHBlYiIiIhkgcGViIiIiGSBwZWIiIiIZEFWwXXBggVQKBQYP368tkshIiIiolImm+B68uRJfPfdd6hTp462SyEiIiIiLZBFcH369CkGDhyINWvWoFy5cgWum56ejtTUVLUXEREREcmfnrYLKIxRo0aha9eu8PT0xNy5cwtcNygoCLNnzy6lyqi0jLk/PffMHy1L7oADNpfcvqlM8Y84qdH9rfVrrNH9ERHR/ynzPa6RkZE4c+YMgoKCCrX+tGnTkJKSIr2SkpJKuEIiIiIiKg1lusc1KSkJ48aNQ0xMDAwNDQu1jVKphFKpLOHKiIiIiKi0lengevr0aTx48AANGjSQ5mVnZ+PPP//EihUrkJ6eDl1dXS1WSERERESlpUwH1/bt2+PChQtq84YMGYLq1atjypQpDK1ERERE75EyHVzNzMxQq1YttXkmJiawsrLKNZ+IiIiI3m1l/uYsIiIiIiKgjPe45uXgwYPaLoGIiIiItIA9rkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAsMrkREREQkCwyuRERERCQLDK5EREREJAt62i7gfROXlKztEgqloDqXR5wsvUJKmeq8NXmOa/0aa2xfJcW/BNpUDudNRETywh5XIiIiIpIFBlciIiIikgUGVyIiIiKSBQZXIiIiIpIFBlciIiIikgUGVyIiIiKSBQZXIiIiIpIFBlciIiIikgUGVyIiIiKSBQZXIiIiIpIFBlciIiIikgUGVyIiIiKSBQZXIiIiIpIFBlciIiIikgUGVyIiIiKSBQZXIiIiIpIFBlciIiIikgUGVyIiIiKShTIdXIOCgtC4cWOYmZnBxsYGvXr1QkJCgrbLIiIiIiItKNPB9Y8//sCoUaNw7NgxxMTEIDMzEx07dsSzZ8+0XRoRERERlTI9bRdQkOjoaLXpiIgI2NjY4PTp02jdurWWqiIiIiIibSjTwfV1KSkpAIDy5cvnu056ejrS09Ol6dTU1BKvi4iIiIhKnmyCa05ODsaPH4+WLVuiVq1a+a4XFBSE2bNnl2JlRKXPP+KktkugfJRW24y5P/2ttq/naFm0DQZsfqvjlRX5tc/bXM8iX0uVd+Sa5utHn9I9Xmlfz9I+P20og+/RMj3G9VWjRo3CxYsXERkZWeB606ZNQ0pKivRKSkoqpQqJiIiIqCTJosd19OjRiIqKwp9//olKlSoVuK5SqYRSqSylyoiIiIiotJTp4CqEwJgxY7Bjxw4cPHgQLi4u2i6JiIiIiLSkTAfXUaNG4ccff8TPP/8MMzMz3Lt3DwBgYWEBIyMjLVdHRERERKWpTI9xDQ0NRUpKCtq2bQt7e3vptXlz2RssTEREREQlq0z3uAohtF0CEREREZURZbrHlYiIiIhIhcGViIiIiGSBwZWIiIiIZIHBlYiIiIhkgcGViIiIiGSBwZWIiIiIZIHBlYiIiIhkgcGViIiIiGSBwZWIiIiIZIHBlYiIiIhkgcGViIiIiGSBwZWIiIiIZIHBlYiIiIhkgcGViIiIiGSBwZWIiIiIZEFP2wWQ/Iy5P13bJQAA4pKSS2zfGj3HHy01t6//b8z95LfafrntXM0UUlg/+pTu8QBgwObSP+a77A1tWJI/j5o0pgT2WdxzXx5xUrOFFGCtX2Pt/BwSaRh7XImIiIhIFhhciYiIiEgWGFyJiIiISBYYXImIiIhIFhhciYiIiEgWGFyJiIiISBYYXImIiIhIFhhciYiIiEgWGFyJiIiISBYYXImIiIhIFhhciYiIiEgWGFyJiIiISBYYXImIiIhIFhhciYiIiEgWGFyJiIiISBYYXImIiIhIFhhciYiIiEgWGFyJiIiISBZkEVxDQkLg7OwMQ0NDNG3aFCdOnNB2SURERERUysp8cN28eTMmTpyIWbNm4cyZM6hbty68vLzw4MEDbZdGRERERKWozAfXJUuW4NNPP8WQIUNQs2ZNrFq1CsbGxggLC9N2aURERERUivS0XUBBMjIycPr0aUybNk2ap6OjA09PTxw9ejTPbdLT05Geni5Np6SkAABSU1NLtthXpWXmu+jpi6zSq4PKhNQC3g/F9bbvo4znTzVUSf7UfuZK4BoUooBCrVYa16IkvO17QNPvS/7fVnSl+d5LTU3Vzs9haSrN3/PAu389gVK9pqrfGUKIglcUZdjt27cFAHHkyBG1+ZMnTxZNmjTJc5tZs2YJAHzxxRdffPHFF198yeyVlJRUYDYs0z2uxTFt2jRMnDhRms7JycGjR49gZWUFhUJRosdOTU2Fo6MjkpKSYG5uXqLHIu1gG7/72MbvPrbxu49tLD9CCDx58gQODg4Frlemg2uFChWgq6uL+/fvq82/f/8+7Ozs8txGqVRCqVSqzbO0tCypEvNkbm7OH5R3HNv43cc2fvexjd99bGN5sbCweOM6ZfrmLAMDAzRs2BD79++X5uXk5GD//v1o3ry5FisjIiIiotJWpntcAWDixInw9fVFo0aN0KRJEwQHB+PZs2cYMmSItksjIiIiolJU5oOrj48PHj58iJkzZ+LevXuoV68eoqOjYWtrq+3SclEqlZg1a1auoQr07mAbv/vYxu8+tvG7j2387lII8abnDhARERERaV+ZHuNKRERERKTC4EpEREREssDgSkRERESywOBKRERERLLA4FpEISEhcHZ2hqGhIZo2bYoTJ04UuP7WrVtRvXp1GBoaonbt2ti9e3cpVUrFVZQ2XrNmDT744AOUK1cO5cqVg6en5xvfE6R9Rf05VomMjIRCoUCvXr1KtkB6a0Vt4+TkZIwaNQr29vZQKpWoVq0a/78u44raxsHBwXB3d4eRkREcHR0xYcIEvHjxopSqJY0p8AthSU1kZKQwMDAQYWFh4q+//hKffvqpsLS0FPfv389z/djYWKGrqysWLlwoLl26JKZPny709fXFhQsXSrlyKqyitvGAAQNESEiIOHv2rIiPjxd+fn7CwsJC/PPPP6VcORVWUdtYJTExUVSsWFF88MEHomfPnqVTLBVLUds4PT1dNGrUSHTp0kUcPnxYJCYmioMHD4q4uLhSrpwKq6htvHHjRqFUKsXGjRtFYmKi2Lt3r7C3txcTJkwo5crpbTG4FkGTJk3EqFGjpOns7Gzh4OAggoKC8lzf29tbdO3aVW1e06ZNxfDhw0u0Tiq+orbx67KysoSZmZlYt25dSZVIb6k4bZyVlSVatGghvv/+e+Hr68vgWsYVtY1DQ0NFlSpVREZGRmmVSG+pqG08atQo8eGHH6rNmzhxomjZsmWJ1kmax6EChZSRkYHTp0/D09NTmqejowNPT08cPXo0z22OHj2qtj4AeHl55bs+aVdx2vh1aWlpyMzMRPny5UuqTHoLxW3jOXPmwMbGBv7+/qVRJr2F4rTxL7/8gubNm2PUqFGwtbVFrVq1MH/+fGRnZ5dW2VQExWnjFi1a4PTp09JwguvXr2P37t3o0qVLqdRMmlPmvzmrrPj333+RnZ2d6xu7bG1t8ffff+e5zb179/Jc/969eyVWJxVfcdr4dVOmTIGDg0OuP1iobChOGx8+fBhr165FXFxcKVRIb6s4bXz9+nX8/vvvGDhwIHbv3o2rV69i5MiRyMzMxKxZs0qjbCqC4rTxgAED8O+//6JVq1YQQiArKwuff/45vvrqq9IomTSIPa5EGrJgwQJERkZix44dMDQ01HY5pAFPnjzB4MGDsWbNGlSoUEHb5VAJycnJgY2NDVavXo2GDRvCx8cH//vf/7Bq1Sptl0YacvDgQcyfPx8rV67EmTNn8NNPP2HXrl0IDAzUdmlUROxxLaQKFSpAV1cX9+/fV5t///592NnZ5bmNnZ1dkdYn7SpOG6ssWrQICxYswL59+1CnTp2SLJPeQlHb+Nq1a7hx4wa6d+8uzcvJyQEA6OnpISEhAVWrVi3ZoqlIivNzbG9vD319fejq6krzatSogXv37iEjIwMGBgYlWjMVTXHaeMaMGRg8eDCGDRsGAKhduzaePXuGzz77DP/73/+go8N+PLlgSxWSgYEBGjZsiP3790vzcnJysH//fjRv3jzPbZo3b662PgDExMTkuz5pV3HaGAAWLlyIwMBAREdHo1GjRqVRKhVTUdu4evXquHDhAuLi4qRXjx490K5dO8TFxcHR0bE0y6dCKM7PccuWLXH16lXpjxIAuHz5Muzt7Rlay6DitHFaWlqucKr6Q0UIUXLFkuZp++4wOYmMjBRKpVJERESIS5cuic8++0xYWlqKe/fuCSGEGDx4sJg6daq0fmxsrNDT0xOLFi0S8fHxYtasWXwcVhlX1DZesGCBMDAwENu2bRN3796VXk+ePNHWKdAbFLWNX8enCpR9RW3jW7duCTMzMzF69GiRkJAgoqKihI2NjZg7d662ToHeoKhtPGvWLGFmZiY2bdokrl+/Ln777TdRtWpV4e3tra1ToGJicC2i5cuXi8qVKwsDAwPRpEkTcezYMWlZmzZthK+vr9r6W7ZsEdWqVRMGBgbCw8ND7Nq1q5QrpqIqShs7OTkJALles2bNKv3CqdCK+nP8KgZXeShqGx85ckQ0bdpUKJVKUaVKFTFv3jyRlZVVylVTURSljTMzM0VAQICoWrWqMDQ0FI6OjmLkyJHi8ePHpV84vRWFEOwjJyIiIqKyj2NciYiIiEgWGFyJiIiISBYYXImIiIhIFhhciYiIiEgWGFyJiIiISBYYXImIiIhIFhhciYiIiEgWGFyJiIiISBYYXImozPHz80OvXr2k6bZt22L8+PGlXsfBgwehUCiQnJxc6sd2dnZGcHDwW+0jIiIClpaWBa4TEBCAevXqSdOlee3/++8/2NjY4MaNGyWy/1dp4nq+6tKlS6hUqRKePXumsX0S0ZsxuBJRofj5+UGhUEChUMDAwACurq6YM2cOsrKySvzYP/30EwIDAwu1bmmHTWdnZ+m6mJiYoEGDBti6dWupHFsTJk2ahP379+e7/PVrr8kAOG/ePPTs2RPOzs65lnl5eUFXVxcnT54s0j7zC+snT57EZ599VsxKc6tZsyaaNWuGJUuWaGyfRPRmDK5EVGidOnXC3bt3ceXKFXzxxRcICAjAN998k+e6GRkZGjtu+fLlYWZmprH9adqcOXNw9+5dnD17Fo0bN4aPjw+OHDmS57qavC6aYGpqCisrq3yXl9S1T0tLw9q1a+Hv759r2a1bt3DkyBGMHj0aYWFhGjmetbU1jI2NNbIvlSFDhiA0NLRU/ngjopcYXImo0JRKJezs7ODk5IQRI0bA09MTv/zyC4D/+4h53rx5cHBwgLu7OwAgKSkJ3t7esLS0RPny5dGzZ0+1j4azs7MxceJEWFpawsrKCl9++SWEEGrHff3j6vT0dEyZMgWOjo5QKpVwdXXF2rVrcePGDbRr1w4AUK5cOSgUCvj5+QEAcnJyEBQUBBcXFxgZGaFu3brYtm2b2nF2796NatWqwcjICO3atSv0R9hmZmaws7NDtWrVEBISAiMjI/z6668AXvZQBgYG4pNPPoG5ubnU67d9+3Z4eHhAqVTC2dkZixcvzrXfJ0+eoH///jAxMUHFihUREhKitnzJkiWoXbs2TExM4OjoiJEjR+Lp06e59rNz5064ubnB0NAQXl5eSEpKkpa9PlTgda9e+7Zt2+LmzZuYMGGC1Mv87NkzmJub57qWO3fuhImJCZ48eZLnfnfv3g2lUolmzZrlWhYeHo5u3bphxIgR2LRpE54/f662PDk5GcOHD4etrS0MDQ1Rq1YtREVF4eDBgxgyZAhSUlKk+gICAgCo9xQPGDAAPj4+avvMzMxEhQoVsH79egCFe7906NABjx49wh9//JHv9SMizWJwJaJiMzIyUutB3L9/PxISEhATE4OoqChkZmbCy8sLZmZmOHToEGJjY2FqaopOnTpJ2y1evBgREREICwvD4cOH8ejRI+zYsaPA437yySfYtGkTli1bhvj4eHz33XcwNTWFo6Mjtm/fDgBISEjA3bt3sXTpUgBAUFAQ1q9fj1WrVuGvv/7ChAkTMGjQICl0JCUl4aOPPkL37t0RFxeHYcOGYerUqUW+Jnp6etDX11e7LosWLULdunVx9uxZzJgxA6dPn4a3tzf69euHCxcuICAgADNmzEBERITavr755htpu6lTp2LcuHGIiYmRluvo6GDZsmX466+/sG7dOvz+++/48ssv1faRlpaGefPmYf369YiNjUVycjL69etX5PMCXg4bqFSpktTDfPfuXZiYmKBfv34IDw9XWzc8PBwff/xxvr21hw4dQsOGDXPNF0IgPDwcgwYNQvXq1eHq6qoWGHNyctC5c2fExsbihx9+wKVLl7BgwQLo6uqiRYsWCA4Ohrm5uVTfpEmTch1j4MCB+PXXX9VC/t69e5GWlobevXsDePP7BQAMDAxQr149HDp0qGgXkoiKTxARFYKvr6/o2bOnEEKInJwcERMTI5RKpZg0aZK03NbWVqSnp0vbbNiwQbi7u4ucnBxpXnp6ujAyMhJ79+4VQghhb28vFi5cKC3PzMwUlSpVko4lhBBt2rQR48aNE0IIkZCQIACImJiYPOs8cOCAACAeP34szXvx4oUwNjYWR44cUVvX399f9O/fXwghxLRp00TNmjXVlk+ZMiXXvl7n5OQkvv32W+nc5s+fLwCIqKgoaXmvXr3UthkwYIDo0KGD2rzJkyerHd/JyUl06tRJbR0fHx/RuXPnfGvZunWrsLKykqbDw8MFAHHs2DFpXnx8vAAgjh8/LoQQYtasWaJu3brS8lfbWQj1a//6+aocP35c6Orqijt37gghhLh//77Q09MTBw8ezLfWnj17iqFDh+aa/9tvvwlra2uRmZkphBDi22+/FW3atJGW7927V+jo6IiEhIQ89xseHi4sLCxyzX+17szMTFGhQgWxfv16aXn//v2Fj4+PEKJw7xeV3r17Cz8/v3zPk4g0iz2uRFRoUVFRMDU1haGhITp37gwfHx/po1gAqF27NgwMDKTpc+fO4erVqzAzM4OpqSlMTU1Rvnx5vHjxAteuXUNKSgru3r2Lpk2bStvo6emhUaNG+dYQFxcHXV1dtGnTptB1X716FWlpaejQoYNUh6mpKdavX49r164BAOLj49XqAIDmzZsXav9TpkyBqakpjI2N8fXXX2PBggXo2rWrtPz184mPj0fLli3V5rVs2RJXrlxBdnZ2vsdv3rw54uPjpel9+/ahffv2qFixIszMzDB48GD8999/SEtLk9bR09ND48aNpenq1avD0tJSbT9vq0mTJvDw8MC6desAAD/88AOcnJzQunXrfLd5/vw5DA0Nc80PCwuDj48P9PT0AAD9+/dHbGys1E5xcXGoVKkSqlWrVux69fT04O3tjY0bNwIAnj17hp9//hkDBw4EULj3i4qRkZHa9SaikqWn7QKISD7atWuH0NBQGBgYwMHBQQoXKiYmJmrTT58+RcOGDaWA8Cpra+ti1WBkZFTkbVQfCe/atQsVK1ZUW6ZUKotVx6smT54MPz8/mJqawtbWFgqFQm3569dFE27cuCGNA503bx7Kly+Pw4cPw9/fHxkZGRq/EelNhg0bhpCQEEydOhXh4eEYMmRIruvwqgoVKuDx48dq81TDRDIzMxEaGirNz87ORlhYGObNm1es9s/LwIED0aZNGzx48AAxMTEwMjJCp06dABTt/fLo0SNUrVpVIzUR0Zuxx5WICs3ExASurq6oXLlyrtCalwYNGuDKlSuwsbGBq6ur2svCwgIWFhawt7fH8ePHpW2ysrJw+vTpfPdZu3Zt5OTk5HtDjKrH99Wey5o1a0KpVOLWrVu56nB0dAQA1KhRAydOnFDb17Fjx954jsDLEObq6go7O7sCw5pKjRo1EBsbqzYvNjYW1apVg66ubr7HP3bsGGrUqAEAOH36NHJycrB48WI0a9YM1apVw507d3IdKysrC6dOnZKmExISkJycLO2nqAwMDNSurcqgQYNw8+ZNLFu2DJcuXYKvr2+B+6lfvz4uXbqkNm/jxo2oVKkSzp07h7i4OOmlGgednZ2NOnXq4J9//sHly5eLVN/rWrRoAUdHR2zevBkbN25E3759oa+vD6Bw7xeVixcvon79+m88HhFpBoMrEZWYgQMHokKFCujZsycOHTqExMREHDx4EGPHjsU///wDABg3bhwWLFiAnTt34u+//8bIkSMLfAars7MzfH19MXToUOzcuVPa55YtWwAATk5OUCgUiIqKwsOHD/H06VOYmZlh0qRJmDBhAtatW4dr167hzJkzWL58ufTx9ueff44rV65g8uTJSEhIwI8//pjrZilN+eKLL7B//34EBgbi8uXLWLduHVasWJHrRqLY2FgsXLgQly9fRkhICLZu3Ypx48YBAFxdXZGZmYnly5fj+vXr2LBhA1atWpXrWPr6+hgzZgyOHz+O06dPw8/PD82aNUOTJk2KVbuzszP+/PNP3L59G//++680v1y5cvjoo48wefJkdOzYEZUqVSpwP15eXvjrr7/Uel3Xrl2Ljz/+GLVq1VJ7+fv7499//0V0dDTatGmD1q1bo0+fPoiJiUFiYiL27NmD6Ohoqb6nT59i//79+Pfffwv8GH/AgAFYtWoVYmJipGECAAr1fgFe9nrfvn0bnp6eRb6ORFRM2h5kS0Ty8PpNO4VdfvfuXfHJJ5+IChUqCKVSKapUqSI+/fRTkZKSIoR4eaPMuHHjhLm5ubC0tBQTJ04Un3zySYE3CD1//lxMmDBB2NvbCwMDA+Hq6irCwsKk5XPmzBF2dnZCoVAIX19fIcTLG8qCg4OFu7u70NfXF9bW1sLLy0v88ccf0na//vqrcHV1FUqlUnzwwQciLCysSDdnFWX5tm3bRM2aNYW+vr6oXLmy+Oabb3JtN3v2bNG3b19hbGws7OzsxNKlS9XWWbJkibC3txdGRkbCy8tLrF+/Xq1e1Y1K27dvF1WqVBFKpVJ4enqKmzdvSvso6s1ZR48eFXXq1BFKpVK8/itk//79AoDYsmVLvtfjVU2aNBGrVq0SQghx6tQpAUCcOHEiz3U7d+4sevfuLYQQ4r///hNDhgwRVlZWwtDQUNSqVUu6GU4IIT7//HNhZWUlAIhZs2ZJ1/P1drh06ZIAIJycnNRuIBSicO+X+fPnCy8vr0KdKxFphkKI1x6YSEREVAwbNmzAhAkTcOfOHbWb9PKza9cuTJ48GRcvXoSOjrw+AMzIyICbmxt+/PHHXDfaEVHJ4c1ZRET0VtLS0nD37l0sWLAAw4cPL1RoBYCuXbviypUruH37dq6xo2XdrVu38NVXXzG0EpUy9rgSEdFbCQgIwLx589C6dWv8/PPPMDU11XZJRPSOYnAlIiIiIlmQ16AiIiIiInpvMbgSERERkSwwuBIRERGRLDC4EhEREZEsMLgSERERkSwwuBIRERGRLDC4EhEREZEsMLgSERERkSz8P1IafbLbJuCtAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "SMILES", + "rawType": "object", + "type": "string" + }, + { + "name": "p0", + "rawType": "float64", + "type": "float" + }, + { + "name": "p1", + "rawType": "float64", + "type": "float" + }, + { + "name": "p1_norm", + "rawType": "float64", + "type": "float" + }, + { + "name": "conformal_set", + "rawType": "object", + "type": "unknown" + }, + { + "name": "true_label", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "c11a7bec-d070-4065-a38e-793ba02a5c5b", + "rows": [ + [ + "0", + "CC1=CC=CC=C1OC2=C(C3=C(N2C4=CC=CC=C4)N=CC=C3)C(=O)N5CCNCC5", + "0.882058574975068", + "0.04655439540710138", + "0.05013325991762946", + "[0, 1]", + "0" + ], + [ + "1", + "C1CCN(CC1)C2=C(C3=CC=CC=C3N2C4=CC=CC=C4)C(=O)N5CCNCC5", + "0.6254642414170489", + "0.04320167593384251", + "0.06460876023849627", + "[0, 1]", + "0" + ], + [ + "2", + "CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)C=C(C=C3)O)C(=O)N5CCNCC5", + "0.22385245244687813", + "0.30252834643235366", + "0.5747328684403408", + "[1]", + "1" + ], + [ + "3", + "C1CN(CCN1)C(=O)C2=C(N(C3=C2N=CC=C3)C4=CC=CC=C4)CC5=C(C(=CC=C5)F)F", + "0.32348499449082535", + "0.23406706926178478", + "0.4198120399482948", + "[1]", + "0" + ], + [ + "4", + "CC1=C(C=CC=C1F)CC2=C(C3=CN=C(C=C3N2C4CCCCC4)OC)C(=O)N5CCNC(C5)CO", + "0.644347075681564", + "0.05727451601737295", + "0.08163163262778775", + "[0, 1]", + "0" + ] + ], + "shape": { + "columns": 6, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SMILESp0p1p1_normconformal_settrue_label
0CC1=CC=CC=C1OC2=C(C3=C(N2C4=CC=CC=C4)N=CC=C3)C...0.8820590.0465540.050133[0, 1]0
1C1CCN(CC1)C2=C(C3=CC=CC=C3N2C4=CC=CC=C4)C(=O)N...0.6254640.0432020.064609[0, 1]0
2CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)C=C(C=...0.2238520.3025280.574733[1]1
3C1CN(CCN1)C(=O)C2=C(N(C3=C2N=CC=C3)C4=CC=CC=C4...0.3234850.2340670.419812[1]0
4CC1=C(C=CC=C1F)CC2=C(C3=CN=C(C=C3N2C4CCCCC4)OC...0.6443470.0572750.081632[0, 1]0
\n", + "
" + ], + "text/plain": [ + " SMILES p0 p1 \\\n", + "0 CC1=CC=CC=C1OC2=C(C3=C(N2C4=CC=CC=C4)N=CC=C3)C... 0.882059 0.046554 \n", + "1 C1CCN(CC1)C2=C(C3=CC=CC=C3N2C4=CC=CC=C4)C(=O)N... 0.625464 0.043202 \n", + "2 CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)C=C(C=... 0.223852 0.302528 \n", + "3 C1CN(CCN1)C(=O)C2=C(N(C3=C2N=CC=C3)C4=CC=CC=C4... 0.323485 0.234067 \n", + "4 CC1=C(C=CC=C1F)CC2=C(C3=CN=C(C=C3N2C4CCCCC4)OC... 0.644347 0.057275 \n", + "\n", + " p1_norm conformal_set true_label \n", + "0 0.050133 [0, 1] 0 \n", + "1 0.064609 [0, 1] 0 \n", + "2 0.574733 [1] 1 \n", + "3 0.419812 [1] 0 \n", + "4 0.081632 [0, 1] 0 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Conformal set coverage: 0.833\n", + "Conformal set average size: 1.690\n", + "Conformal set error: 0.167\n", + "Fraction of empty sets: 0.000\n", + "NLL: 0.4487081892488713\n", + "Brier: 0.14933955252658113\n", + "AUROC: 0.771043771043771\n", + "F1: 0.42857142857142855\n", + "MCC: 0.34555798270379956\n" + ] + } + ], + "source": [ + "### 3.3 Visualizing Uncertainty and Prediction Sets\n", + "\n", + "plt.figure(figsize=(8, 4))\n", + "plt.hist(p1_cp, bins=20, alpha=0.7, label=\"CrossConformalCV Probabilities\")\n", + "plt.hist(p1, bins=20, alpha=0.7, label=\"Best Ensemble Model Probabilities\")\n", + "plt.xlabel(\"Predicted Probability (Active)\")\n", + "plt.ylabel(\"Count\")\n", + "plt.legend()\n", + "plt.title(\"Predicted Probabilities Distribution\")\n", + "plt.show()\n", + "\n", + "\n", + "\n", + "# Get conformal prediction sets (list of sets per sample)\n", + "conf_pred_sets = cc_clf.predict_conformal_set(smiles_test, confidence=0.9)\n", + "\n", + "# Get p-values for each class (p0, p1)\n", + "p_vals = cc_clf.models_[0].predict_p(smiles_test)\n", + "if hasattr(cc_clf, \"models_\") and len(cc_clf.models_) > 1:\n", + " p_vals = np.mean([m.predict_p(smiles_test) for m in cc_clf.models_], axis=0)\n", + "\n", + "p0 = p_vals[:, 0]\n", + "p1 = p_vals[:, 1]\n", + "p1_norm = p1 / (p0 + p1 + 1e-12)\n", + "\n", + "df_cp_class = pd.DataFrame({\n", + " \"SMILES\": smiles_test,\n", + " \"p0\": p0,\n", + " \"p1\": p1,\n", + " \"p1_norm\": p1_norm,\n", + " \"conformal_set\": conf_pred_sets,\n", + " \"true_label\": y_test_cp\n", + "})\n", + "display(df_cp_class.head())\n", + "\n", + "def coverage_and_set_size(y_true, conf_sets):\n", + " covered = [y in s for y, s in zip(y_true, conf_sets)]\n", + " avg_size = np.mean([len(s) for s in conf_sets])\n", + " return np.mean(covered), avg_size\n", + "\n", + "coverage, avg_set_size = coverage_and_set_size(y_test_cp, conf_pred_sets)\n", + "error = 1 - coverage\n", + "empty = np.mean([len(s) == 0 for s in conf_pred_sets])\n", + "\n", + "print(f\"Conformal set coverage: {coverage:.3f}\")\n", + "print(f\"Conformal set average size: {avg_set_size:.3f}\")\n", + "print(f\"Conformal set error: {error:.3f}\")\n", + "print(f\"Fraction of empty sets: {empty:.3f}\")\n", + "print(\"NLL:\", log_loss(y_test_cp, p1_norm))\n", + "print(\"Brier:\", brier_score_loss(y_test_cp, p1_norm))\n", + "print(\"AUROC:\", roc_auc_score(y_test_cp, p1_norm))\n", + "print(\"F1:\", f1_score(y_test_cp, (p1_norm >= 0.5).astype(int)))\n", + "print(\"MCC:\", matthews_corrcoef(y_test_cp, (p1_norm >= 0.5).astype(int)))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6cd8a8da", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "pubchem_smiles", + "rawType": "object", + "type": "string" + }, + { + "name": "pIC50", + "rawType": "float64", + "type": "float" + }, + { + "name": "pred_lower", + "rawType": "float64", + "type": "float" + }, + { + "name": "pred_upper", + "rawType": "float64", + "type": "float" + }, + { + "name": "point_pred", + "rawType": "float64", + "type": "float" + } + ], + "ref": "f965cae9-1066-4502-88ff-4d9ec7c9226a", + "rows": [ + [ + "0", + "CC1=C(C=C(C=C1)F)OC2=C(C3=C(N2C4=CC=CC=C4)N=CC=C3)C(=O)N5CCNCC5", + "6.4023", + "4.701805199999997", + "8.831095599999994", + "6.766450399999995" + ], + [ + "1", + "C1CN(CCN1)C(=O)C2=C(N(C3=C2C=CN=C3)C4=CC=CC=C4)CC5=CC=CC=C5", + "6.1186", + "3.9571802", + "8.086470599999998", + "6.021825399999999" + ], + [ + "2", + "CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)N=CC(=C3)O)C(=O)N5CCNCC5", + "8.2218", + "5.641988400000004", + "9.771278800000003", + "7.7066336000000035" + ], + [ + "3", + "C1CN(CCN1)C(=O)C2=C(N(C3=CC=CC=C32)C4=CC=CC=C4)CC5=C(C=CC=C5Cl)F", + "7.7447", + "4.515626999999999", + "8.644917399999997", + "6.580272199999999" + ], + [ + "4", + "CC1=C(C=CC=C1F)CC2=C(C3=CNC(=O)C=C3N2C4CCCCC4)C(=O)N5CCNCC5", + "6.9355", + "4.9534574", + "9.082747799999998", + "7.018102599999999" + ] + ], + "shape": { + "columns": 5, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pubchem_smilespIC50pred_lowerpred_upperpoint_pred
0CC1=C(C=C(C=C1)F)OC2=C(C3=C(N2C4=CC=CC=C4)N=CC...6.40234.7018058.8310966.766450
1C1CN(CCN1)C(=O)C2=C(N(C3=C2C=CN=C3)C4=CC=CC=C4...6.11863.9571808.0864716.021825
2CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)N=CC(=...8.22185.6419889.7712797.706634
3C1CN(CCN1)C(=O)C2=C(N(C3=CC=CC=C32)C4=CC=CC=C4...7.74474.5156278.6449176.580272
4CC1=C(C=CC=C1F)CC2=C(C3=CNC(=O)C=C3N2C4CCCCC4)...6.93554.9534579.0827487.018103
\n", + "
" + ], + "text/plain": [ + " pubchem_smiles pIC50 pred_lower \\\n", + "0 CC1=C(C=C(C=C1)F)OC2=C(C3=C(N2C4=CC=CC=C4)N=CC... 6.4023 4.701805 \n", + "1 C1CN(CCN1)C(=O)C2=C(N(C3=C2C=CN=C3)C4=CC=CC=C4... 6.1186 3.957180 \n", + "2 CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)N=CC(=... 8.2218 5.641988 \n", + "3 C1CN(CCN1)C(=O)C2=C(N(C3=CC=CC=C32)C4=CC=CC=C4... 7.7447 4.515627 \n", + "4 CC1=C(C=CC=C1F)CC2=C(C3=CNC(=O)C=C3N2C4CCCCC4)... 6.9355 4.953457 \n", + "\n", + " pred_upper point_pred \n", + "0 8.831096 6.766450 \n", + "1 8.086471 6.021825 \n", + "2 9.771279 7.706634 \n", + "3 8.644917 6.580272 \n", + "4 9.082748 7.018103 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Interval coverage: 1.000\n", + "Average interval width: 4.129\n", + "MAE (point prediction): 0.662\n" + ] + } + ], + "source": [ + "\n", + "## 4. Regression: Conformal Prediction and Interval Evaluation\n", + "\n", + "\n", + "\n", + "# --- Prepare regression data (filter NaNs as before) ---\n", + "mask_reg = ~np.isnan(X_feat).any(axis=1) & ~np.isnan(y_reg)\n", + "X_feat_reg = X_feat[mask_reg]\n", + "y_reg_clean = y_reg[mask_reg]\n", + "smiles_reg = np.array(smiles)[mask_reg]\n", + "\n", + "# Split for regression\n", + "X_train_reg, X_test_reg, y_train_reg, y_test_reg, smiles_train_reg, smiles_test_reg = train_test_split(\n", + " X_feat_reg, y_reg_clean, smiles_reg, test_size=0.3, random_state=42\n", + ")\n", + "\n", + "# --- Wrap regressor with CrossConformalCV ---\n", + "rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)\n", + "rf_reg_pipeline = Pipeline([\n", + " (\"rf\", rf_reg)\n", + "], n_jobs=1)\n", + "\n", + "cc_reg = CrossConformalCV(\n", + " estimator=rf_reg_pipeline,\n", + " n_folds=5,\n", + " confidence_level=0.95,\n", + " estimator_type=\"regressor\"\n", + ")\n", + "cc_reg.fit(X_train_reg, y_train_reg)\n", + "\n", + "# --- Predict intervals and point predictions ---\n", + "intervals = np.array([m.predict_int(X_test_reg) for m in cc_reg.models_])\n", + "intervals_mean = intervals.mean(axis=0)\n", + "lower = intervals_mean[:, 0]\n", + "upper = intervals_mean[:, 1]\n", + "point_pred = np.mean([m.predict(X_test_reg) for m in cc_reg.models_], axis=0)\n", + "\n", + "df_cp_reg = pd.DataFrame({\n", + " \"pubchem_smiles\": smiles_test_reg,\n", + " \"pIC50\": y_test_reg,\n", + " \"pred_lower\": lower,\n", + " \"pred_upper\": upper,\n", + " \"point_pred\": point_pred\n", + "})\n", + "display(df_cp_reg.head())\n", + "\n", + "# --- Regression: Evaluate coverage and interval width ---\n", + "coverage_reg = np.mean((y_test_reg >= lower) & (y_test_reg <= upper))\n", + "avg_width = np.mean(upper - lower)\n", + "mae = np.mean(np.abs(point_pred - y_test_reg))\n", + "\n", + "print(f\"Interval coverage: {coverage_reg:.3f}\")\n", + "print(f\"Average interval width: {avg_width:.3f}\")\n", + "print(f\"MAE (point prediction): {mae:.3f}\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 2eb94220..1da88569 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ authors = [ description = "Integration of rdkit functionality into sklearn pipelines." readme = "README.md" dependencies = [ + "crepes>=0.8.0", "joblib>=1.3.0", "loguru>=0.7.3", "matplotlib>=3.10.1", @@ -203,6 +204,9 @@ exclude = ["tests", "docs"] [tool.setuptools.package-data] "molpipeline" = ["py.typed"] +[tool.uv.sources] +molpipeline = { workspace = true } + [dependency-groups] dev = [ "bandit>=1.8.3", @@ -212,6 +216,7 @@ dev = [ "flake8>=7.2.0", "interrogate>=1.7.0", "isort>=6.0.1", + "molpipeline[chemprop]", "mypy>=1.15.0", "pydocstyle>=6.3.0", "pylint>=3.3.6", diff --git a/tests/test_experimental/test_uncertainty/__init__.py b/tests/test_experimental/test_uncertainty/__init__.py new file mode 100644 index 00000000..4d9cb3a5 --- /dev/null +++ b/tests/test_experimental/test_uncertainty/__init__.py @@ -0,0 +1 @@ +"Uncertainty test module" \ No newline at end of file diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py new file mode 100644 index 00000000..d6cdec67 --- /dev/null +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -0,0 +1,59 @@ +import unittest +import numpy as np +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.datasets import make_classification, make_regression +from sklearn.model_selection import train_test_split + +from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV + +class TestConformalCV(unittest.TestCase): + def test_unified_conformal_classifier(self): + X, y = make_classification(n_samples=100, n_features=10, random_state=42) + X_train, X_calib, y_train, y_calib = train_test_split(X, y, test_size=0.2, random_state=42) + clf = RandomForestClassifier(random_state=42) + cp = UnifiedConformalCV(clf, estimator_type="classifier") + cp.fit(X_train, y_train) + cp.calibrate(X_calib, y_calib) + preds = cp.predict(X_calib) + probs = cp.predict_proba(X_calib) + sets = cp.predict_conformal_set(X_calib) + self.assertEqual(len(preds), len(y_calib)) + self.assertEqual(probs.shape[0], len(y_calib)) + self.assertEqual(len(sets), len(y_calib)) + + def test_unified_conformal_regressor(self): + X, y = make_regression(n_samples=100, n_features=10, random_state=42) + X_train, X_calib, y_train, y_calib = train_test_split(X, y, test_size=0.2, random_state=42) + reg = RandomForestRegressor(random_state=42) + cp = UnifiedConformalCV(reg, estimator_type="regressor") + cp.fit(X_train, y_train) + cp.calibrate(X_calib, y_calib) + intervals = cp.predict_int(X_calib) + self.assertEqual(intervals.shape[0], len(y_calib)) + self.assertEqual(intervals.shape[1], 2) + + def test_cross_conformal_classifier(self): + X, y = make_classification(n_samples=100, n_features=10, random_state=42) + clf = RandomForestClassifier(random_state=42) + ccp = CrossConformalCV(clf, estimator_type="classifier", n_folds=3) + ccp.fit(X, y) + preds = ccp.predict(X) + probs = ccp.predict_proba(X) + sets = ccp.predict_conformal_set(X) + self.assertEqual(len(preds), len(y)) + self.assertEqual(probs.shape[0], len(y)) + self.assertEqual(len(sets), len(y)) + + def test_cross_conformal_regressor(self): + X, y = make_regression(n_samples=100, n_features=10, random_state=42) + reg = RandomForestRegressor(random_state=42) + ccp = CrossConformalCV(reg, estimator_type="regressor", n_folds=3) + ccp.fit(X, y) + # Each model should produce intervals for all samples + for model in ccp.models_: + intervals = model.predict_int(X) + self.assertEqual(intervals.shape[0], len(y)) + self.assertEqual(intervals.shape[1], 2) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 84eb6ae4..7ac91bf4 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -376,6 +376,48 @@ def test_calibrated_classifier(self) -> None: self.assertEqual(predicted_value_array.shape, (len(TEST_SMILES),)) self.assertEqual(predicted_proba_array.shape, (len(TEST_SMILES), 2)) + def test_conformal_pipeline_classifier(self): + """Test conformal prediction with a pipeline on SMILES data.""" + from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV + + # Use the global test data + smiles = TEST_SMILES + y = np.array(CONTAINS_OX) + + # Build a pipeline: SMILES -> Mol -> MorganFP -> RF + smi2mol = SmilesToMol() + mol2morgan = MolToMorganFP(radius=2, n_bits=128) + rf = RandomForestClassifier(n_estimators=10, random_state=42) + pipeline = Pipeline([ + ("smi2mol", smi2mol), + ("morgan", mol2morgan), + ("rf", rf) + ]) + + # Split data + from sklearn.model_selection import train_test_split + X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42) + + # UnifiedConformalCV + cp = UnifiedConformalCV(pipeline, estimator_type="classifier") + cp.fit(X_train, y_train) + cp.calibrate(X_calib, y_calib) + preds = cp.predict(X_calib) + probs = cp.predict_proba(X_calib) + sets = cp.predict_conformal_set(X_calib) + self.assertEqual(len(preds), len(y_calib)) + self.assertEqual(probs.shape[0], len(y_calib)) + self.assertEqual(len(sets), len(y_calib)) + + # CrossConformalCV + ccp = CrossConformalCV(pipeline, estimator_type="classifier", n_folds=3) + ccp.fit(smiles, y) + preds_ccp = ccp.predict(smiles) + probs_ccp = ccp.predict_proba(smiles) + sets_ccp = ccp.predict_conformal_set(smiles) + self.assertEqual(len(preds_ccp), len(y)) + self.assertEqual(probs_ccp.shape[0], len(y)) + self.assertEqual(len(sets_ccp), len(y)) if __name__ == "__main__": unittest.main() From 6de0e48c98681b909d0dfd032535c9ea44d43d96 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Wed, 2 Jul 2025 12:49:20 +0200 Subject: [PATCH 02/20] ruffed it --- .../experimental/uncertainty/__init__.py | 7 +- .../experimental/uncertainty/conformal.py | 440 ++++++++++++++---- .../advanced_04_conformal_prediction.ipynb | 392 ++++++++-------- .../test_uncertainty/test_conformal.py | 74 +-- tests/test_pipeline.py | 83 ++-- 5 files changed, 652 insertions(+), 344 deletions(-) diff --git a/molpipeline/experimental/uncertainty/__init__.py b/molpipeline/experimental/uncertainty/__init__.py index 664bb2aa..27bb2ba4 100644 --- a/molpipeline/experimental/uncertainty/__init__.py +++ b/molpipeline/experimental/uncertainty/__init__.py @@ -1,3 +1,6 @@ -from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV +from molpipeline.experimental.uncertainty.conformal import ( + CrossConformalCV, + UnifiedConformalCV, +) -__all__ = ["UnifiedConformalCV", "CrossConformalCV"] \ No newline at end of file +__all__ = ["CrossConformalCV", "UnifiedConformalCV"] diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py index c8b8a68c..3d802003 100644 --- a/molpipeline/experimental/uncertainty/conformal.py +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -1,13 +1,33 @@ -from crepes import WrapClassifier, WrapRegressor -from sklearn.model_selection import StratifiedKFold, KFold -from crepes.extras import hinge, margin, MondrianCategorizer +"""Conformal prediction wrappers for classification and regression using crepes. + +Provides unified and cross-conformal prediction with Mondrian and nonconformity options. +""" + +from typing import Any, cast + import numpy as np -from sklearn.base import BaseEstimator, clone +from crepes import WrapClassifier, WrapRegressor +from crepes.extras import MondrianCategorizer from scipy.stats import mode +from sklearn.base import BaseEstimator, clone +from sklearn.model_selection import KFold, StratifiedKFold + + +def bin_targets(y: np.ndarray, n_bins: int = 10) -> np.ndarray: + """Bin continuous targets for stratified splitting in regression. + + Parameters + ---------- + y : np.ndarray + Target values. + n_bins : int, optional + Number of bins (default: 10). + + Returns + ------- + np.ndarray + Binned targets. -def bin_targets(y, n_bins=10): - """ - Bin continuous targets for stratified splitting in regression. """ y = np.asarray(y) bins = np.linspace(np.min(y), np.max(y), n_bins + 1) @@ -15,9 +35,11 @@ def bin_targets(y, n_bins=10): y_binned[y_binned == n_bins] = n_bins - 1 # edge case return y_binned + class UnifiedConformalCV(BaseEstimator): - """ - One wrapper to rule them all: conformal prediction for both classifiers and regressors. + """One wrapper to rule them all: conformal prediction for both classifiers and + regressors. + Uses crepes under the hood, so you know it's sweet. Parameters @@ -25,7 +47,8 @@ class UnifiedConformalCV(BaseEstimator): estimator : sklearn-like estimator Your favorite model (or pipeline). mondrian : bool/callable/MondrianCategorizer, optional - If True, use class-conditional (Mondrian) calibration. If callable or MondrianCategorizer, use as custom group function/categorizer. + If True, use class-conditional (Mondrian) calibration. If callable or + MondrianCategorizer, use as custom group function/categorizer. confidence_level : float, optional How confident should we be? (default: 0.9) estimator_type : {'classifier', 'regressor'}, optional @@ -40,19 +63,22 @@ class UnifiedConformalCV(BaseEstimator): Parallelize all the things. kwargs : dict Extra toppings for crepes. + """ + def __init__( self, - estimator, - mondrian=False, - confidence_level=0.9, - estimator_type="classifier", - nonconformity=None, - difficulty_estimator=None, - binning=None, - n_jobs=1, - **kwargs - ): + estimator: Any, + mondrian: Any = False, + confidence_level: float = 0.9, + estimator_type: str = "classifier", + nonconformity: Any | None = None, + difficulty_estimator: Any | None = None, + binning: Any | None = None, + n_jobs: int = 1, + **kwargs: Any, + ) -> None: + """Initialize UnifiedConformalCV.""" self.estimator = estimator self.mondrian = mondrian self.confidence_level = confidence_level @@ -63,76 +89,204 @@ def __init__( self.n_jobs = n_jobs self.kwargs = kwargs - def fit(self, X, y, **fit_params): + def fit(self, x: np.ndarray, y: np.ndarray) -> "UnifiedConformalCV": + """Fit the conformal predictor. + + Parameters + ---------- + x : np.ndarray + Training features. + y : np.ndarray + Training targets. + + Returns + ------- + UnifiedConformalCV + Self. + + Raises + ------ + ValueError + If estimator_type is not 'classifier' or 'regressor'. + + """ if self.estimator_type == "classifier": self._conformal = WrapClassifier(clone(self.estimator)) elif self.estimator_type == "regressor": self._conformal = WrapRegressor(clone(self.estimator)) else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") - self._conformal.fit(X, y, **fit_params) + self._conformal.fit(x, y) self.fitted_ = True return self - def calibrate(self, X_calib, y_calib, **calib_params): - # --- Classification --- + def calibrate( + self, x_calib: np.ndarray, y_calib: np.ndarray, **calib_params: Any, + ) -> None: + """Calibrate the conformal predictor. + + Parameters + ---------- + x_calib : np.ndarray + Calibration features. + y_calib : np.ndarray + Calibration targets. + calib_params : dict + Additional calibration parameters. + + Raises + ------ + ValueError + If estimator_type is not 'classifier' or 'regressor'. + + """ if self.estimator_type == "classifier": - nc = self.nonconformity if self.nonconformity is not None else hinge mondrian = self.mondrian - if isinstance(mondrian, MondrianCategorizer): - mc = mondrian - self._conformal.calibrate(X_calib, y_calib, nc=nc, mc=mc, **calib_params) - elif callable(mondrian): - mc = mondrian - self._conformal.calibrate(X_calib, y_calib, nc=nc, mc=mc, **calib_params) + if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): + self._conformal.calibrate(x_calib, y_calib, mc=mondrian, **calib_params) elif mondrian is True: - self._conformal.calibrate(X_calib, y_calib, nc=nc, class_cond=True, **calib_params) + # Use class labels as Mondrian categories + self._conformal.calibrate(x_calib, y_calib, mc=y_calib, **calib_params) else: - self._conformal.calibrate(X_calib, y_calib, nc=nc, class_cond=False, **calib_params) - # --- Regression --- + self._conformal.calibrate(x_calib, y_calib, **calib_params) elif self.estimator_type == "regressor": - de = self.difficulty_estimator mondrian = self.mondrian if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): mc = mondrian else: mc = None - bin_opt = self.binning - self._conformal.calibrate( - X_calib, y_calib, de=de, mc=mc, **calib_params - ) + self._conformal.calibrate(x_calib, y_calib, mc=mc, **calib_params) else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") - def predict(self, X): - return self._conformal.predict(X) + def predict(self, x: np.ndarray) -> np.ndarray: + """Predict using the conformal predictor. + + Parameters + ---------- + x : np.ndarray + Features to predict. + + Returns + ------- + np.ndarray + Predictions. + + """ + return self._conformal.predict(x) + + def predict_proba(self, x: np.ndarray) -> np.ndarray: + """Predict probabilities using the conformal predictor. - def predict_proba(self, X): + Parameters + ---------- + x : np.ndarray + Features to predict. + + Returns + ------- + np.ndarray + Predicted probabilities. + + Raises + ------ + NotImplementedError + If called for a regressor. + + """ if self.estimator_type != "classifier": raise NotImplementedError("predict_proba is for classifiers only.") - return self._conformal.predict_proba(X) + conformal = cast("WrapClassifier", self._conformal) + return conformal.predict_proba(x) + + def predict_conformal_set( + self, x: np.ndarray, confidence: float | None = None, + ) -> Any: + """Predict conformal sets. + + Parameters + ---------- + x : np.ndarray + Features to predict. + confidence : float, optional + Confidence level. - def predict_conformal_set(self, X, confidence=None): + Returns + ------- + Any + Conformal prediction sets. + + Raises + ------ + NotImplementedError + If called for a regressor. + + """ if self.estimator_type != "classifier": - raise NotImplementedError("predict_conformal_set is only for classification.") + raise NotImplementedError( + "predict_conformal_set is only for classification.", + ) conf = confidence if confidence is not None else self.confidence_level - return self._conformal.predict_set(X, confidence=conf) + conformal = cast("WrapClassifier", self._conformal) + return conformal.predict_set(x, confidence=conf) + + def predict_p(self, x: np.ndarray, **kwargs: Any) -> Any: + """Predict p-values. + + Parameters + ---------- + x : np.ndarray + Features to predict. + kwargs : dict + Additional parameters. - def predict_p(self, X, **kwargs): + Returns + ------- + Any + p-values. + + Raises + ------ + NotImplementedError + If called for a regressor. + + """ if self.estimator_type != "classifier": raise NotImplementedError("predict_p is only for classification.") - return self._conformal.predict_p(X, **kwargs) + return self._conformal.predict_p(x, **kwargs) + + def predict_int(self, x: np.ndarray, confidence: float | None = None) -> Any: + """Predict intervals. + + Parameters + ---------- + x : np.ndarray + Features to predict. + confidence : float, optional + Confidence level. + + Returns + ------- + Any + Prediction intervals. - def predict_int(self, X, confidence=None): + Raises + ------ + NotImplementedError + If called for a classifier. + + """ if self.estimator_type != "regressor": raise NotImplementedError("predict_interval is only for regression.") conf = confidence if confidence is not None else self.confidence_level - return self._conformal.predict_int(X, confidence=conf) + conformal = cast("WrapRegressor", self._conformal) + return conformal.predict_int(x, confidence=conf) class CrossConformalCV(BaseEstimator): - """ - Cross-conformal prediction for both classifiers and regressors using WrapClassifier/WrapRegressor. + """Cross-conformal prediction for both classifiers and regressors using + WrapClassifier/WrapRegressor. + Handles Mondrian (class_cond) logic as described. Parameters @@ -159,8 +313,22 @@ class CrossConformalCV(BaseEstimator): Parallelize all the things. kwargs : dict Extra toppings for crepes. + """ - def __init__(self, estimator, n_folds=5, confidence_level=0.9, mondrian=False, nonconformity=None, binning=None, estimator_type="classifier", n_bins=10, **kwargs): + + def __init__( + self, + estimator: Any, + n_folds: int = 5, + confidence_level: float = 0.9, + mondrian: Any = False, + nonconformity: Any | None = None, + binning: Any | None = None, + estimator_type: str = "classifier", + n_bins: int = 10, + **kwargs: Any, + ) -> None: + """Initialize CrossConformalCV.""" self.estimator = estimator self.n_folds = n_folds self.confidence_level = confidence_level @@ -171,69 +339,165 @@ def __init__(self, estimator, n_folds=5, confidence_level=0.9, mondrian=False, n self.n_bins = n_bins self.kwargs = kwargs - def fit(self, X, y, **fit_params): - X = np.array(X) + def fit( + self, + x: np.ndarray, + y: np.ndarray, + ) -> "CrossConformalCV": + """Fit the cross-conformal predictor. + + Parameters + ---------- + x : np.ndarray + Training features. + y : np.ndarray + Training targets. + + Returns + ------- + CrossConformalCV + Self. + + Raises + ------ + ValueError + If estimator_type is not 'classifier' or 'regressor'. + + """ + x = np.array(x) y = np.array(y) self.models_ = [] if self.estimator_type == "classifier": - splitter = StratifiedKFold(n_splits=self.n_folds, shuffle=True, random_state=42) + splitter = StratifiedKFold( + n_splits=self.n_folds, shuffle=True, random_state=42, + ) y_split = y elif self.estimator_type == "regressor": splitter = KFold(n_splits=self.n_folds, shuffle=True, random_state=42) y_split = bin_targets(y, n_bins=self.n_bins) else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") - for train_idx, calib_idx in splitter.split(X, y_split): + for train_idx, calib_idx in splitter.split(x, y_split): if self.estimator_type == "classifier": model = WrapClassifier(clone(self.estimator)) - model.fit(X[train_idx], y[train_idx]) - # Mondrian logic: only use class_cond=True if mondrian is True - if self.mondrian: - model.calibrate(X[calib_idx], y[calib_idx], nc=self.nonconformity or hinge, class_cond=True) + model.fit(x[train_idx], y[train_idx]) + mondrian = self.mondrian + if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): + model.calibrate(x[calib_idx], y[calib_idx], mc=mondrian) + elif mondrian is True: + model.calibrate(x[calib_idx], y[calib_idx], mc=y[calib_idx]) else: - model.calibrate(X[calib_idx], y[calib_idx], nc=self.nonconformity or hinge, class_cond=False) + model.calibrate(x[calib_idx], y[calib_idx]) else: model = WrapRegressor(clone(self.estimator)) - model.fit(X[train_idx], y[train_idx]) - # Mondrian logic: use MondrianCategorizer with binning if mondrian - if self.mondrian: - if self.binning is not None: - mc = MondrianCategorizer() - mc.fit(X[calib_idx], f=lambda X: y[calib_idx], no_bins=self.binning) - else: - mc = MondrianCategorizer() - mc.fit(X[calib_idx], f=lambda X: y[calib_idx]) - model.calibrate(X[calib_idx], y[calib_idx], mc=mc) + model.fit(x[train_idx], y[train_idx]) + mondrian = self.mondrian + if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): + mc = mondrian else: - model.calibrate(X[calib_idx], y[calib_idx]) + mc = None + if self.binning is not None: + mc_obj = MondrianCategorizer() + calib_idx_val = calib_idx + + def _bin_func( + _: Any, calib_idx_val: Any = calib_idx_val, + ) -> Any: + return y[calib_idx_val] + + mc_obj.fit(x[calib_idx], f=_bin_func, no_bins=self.binning) + mc = mc_obj + model.calibrate(x[calib_idx], y[calib_idx], mc=mc) self.models_.append(model) return self - def predict(self, X): - # Majority vote - result = np.array([m.predict(X) for m in self.models_]) + def predict(self, x: np.ndarray) -> np.ndarray: + """Predict using the cross-conformal predictor. + + Parameters + ---------- + x : np.ndarray + Features to predict. + + Returns + ------- + np.ndarray + Predictions (majority vote). + + """ + result = np.array([m.predict(x) for m in self.models_]) result = np.asarray(result) if result.shape == (): - result = np.full((len(self.models_), len(X)), result) - if result.ndim == 1 and len(X) == 1: + result = np.full((len(self.models_), len(x)), result) + if result.ndim == 1 and len(x) == 1: result = result[:, np.newaxis] pred_mode = mode(result, axis=0, keepdims=False) return np.ravel(pred_mode.mode) - def predict_proba(self, X): - # Average probabilities - result = np.array([m.predict_proba(X) for m in self.models_]) - if result.ndim == 2 and result.shape[1] == 2 and len(X) == 1: + def predict_proba(self, x: np.ndarray) -> np.ndarray: + """Predict probabilities using the cross-conformal predictor. + + Parameters + ---------- + x : np.ndarray + Features to predict. + + Returns + ------- + np.ndarray + Predicted probabilities (averaged). + + Raises + ------ + NotImplementedError + If called for a regressor. + + """ + if self.estimator_type != "classifier": + raise NotImplementedError("predict_proba is for classifiers only.") + binary_class_dim = 2 + result = np.array([m.predict_proba(x) for m in self.models_]) + if ( + result.ndim == binary_class_dim + and result.shape[1] == binary_class_dim + and len(x) == 1 + ): result = result[:, np.newaxis, :] proba = np.atleast_2d(np.mean(result, axis=0)) - if proba.shape[0] != len(X): - proba = np.full((len(X), proba.shape[1]), np.nan) + if proba.shape[0] != len(x): + proba = np.full((len(x), proba.shape[1]), np.nan) return proba - def predict_conformal_set(self, X, confidence=None): - # Union of conformal sets from all folds. - sets = [m.predict_set(X, confidence) for m in self.models_] - n = len(X) + def predict_conformal_set( + self, x: np.ndarray, confidence: float | None = None, + ) -> list[list[Any]]: + """Predict conformal sets using the cross-conformal predictor. + + Parameters + ---------- + x : np.ndarray + Features to predict. + confidence : float, optional + Confidence level. + + Returns + ------- + list[list[Any]] + Union of conformal sets from all folds. + + Raises + ------ + NotImplementedError + If called for a regressor. + + """ + if self.estimator_type != "classifier": + raise NotImplementedError( + "predict_conformal_set is only for classification.", + ) + conf = confidence if confidence is not None else self.confidence_level + sets = [m.predict_set(x, confidence=conf) for m in self.models_] + n = len(x) union_sets = [] for i in range(n): union = set() diff --git a/notebooks/advanced_04_conformal_prediction.ipynb b/notebooks/advanced_04_conformal_prediction.ipynb index 70b6e062..c379a0eb 100644 --- a/notebooks/advanced_04_conformal_prediction.ipynb +++ b/notebooks/advanced_04_conformal_prediction.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "ab2b079b", "metadata": {}, "outputs": [], @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "c2281174", "metadata": {}, "outputs": [ @@ -103,27 +103,49 @@ "\n", "\n", "\n", - "## 3. Classification: Splitting, Model Benchmarking, and Conformal Prediction\n", - "# Train/test split for classification\n", - "X_train, X_test, y_train, y_test = train_test_split(\n", - " X_feat, y_class, test_size=0.3, random_state=42, stratify=y_class\n", - ")\n", - "skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", + "# ## 3. Classification: Splitting, Model Benchmarking, and Conformal Prediction\n", + "# # Train/test split for classification\n", + "# X_train, X_test, y_train, y_test = train_test_split(\n", + "# X_feat, y_class, test_size=0.3, random_state=42, stratify=y_class\n", + "# )\n", + "# skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", + "\n", + "# # Split for conformal pipeline (use SMILES)\n", + "# smiles_train, smiles_test, y_train_cp, y_test_cp = train_test_split(\n", + "# smiles, y_class, test_size=0.3, random_state=42, stratify=y_class\n", + "# )\n", + "from sklearn.model_selection import train_test_split\n", "\n", - "# Split for conformal pipeline (use SMILES)\n", - "smiles_train, smiles_test, y_train_cp, y_test_cp = train_test_split(\n", - " smiles, y_class, test_size=0.3, random_state=42, stratify=y_class\n", + "# Generate indices for a single split\n", + "indices = np.arange(len(y_class))\n", + "train_idx, test_idx = train_test_split(\n", + " indices, test_size=0.3, random_state=42, stratify=y_class\n", ")\n", "\n", + "# Use these indices for all splits\n", + "X_train, X_test = X_feat[train_idx], X_feat[test_idx]\n", + "y_train, y_test = y_class[train_idx], y_class[test_idx]\n", + "smiles_train, smiles_test = smiles[train_idx], smiles[test_idx]\n", "\n" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "e4b28946", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fold 1\n", + "Fold 2\n", + "Fold 3\n", + "Fold 4\n", + "Fold 5\n" + ] + }, { "data": { "application/vnd.microsoft.datawrangler.viewer.v0+json": { @@ -134,67 +156,67 @@ "type": "string" }, { - "name": "ensemble_xgb", + "name": "ensemble_xgb (OOF)", "rawType": "float64", "type": "float" }, { - "name": "CrossConformalCV", + "name": "CrossConformalCV (OOF)", "rawType": "float64", "type": "float" } ], - "ref": "2a093951-f074-4655-86ae-6a19ffe410e7", + "ref": "3e1f2e2b-8668-4240-9252-e16c5bbdb434", "rows": [ [ "NLL", - "0.6005578592752531", - "0.4523152437780237" + "0.65271811198485", + "0.484719057953362" ], [ "ECE", - "0.6421230924094008", - "0.5534285714285716" + "0.707331120872065", + "0.6036250000000001" ], [ "Brier", - "0.19658344924590318", - "0.150788" + "0.19066274454865353", + "0.16170891666666665" ], [ "Uncertainty Error Correlation", - "0.20310534876453837", - "0.3572542579666429" + "0.18735090090238612", + "0.40508529649564395" ], [ "Sharpness", - "0.3291429281234741", - "0.4779567203583455" + "0.2463051521032824", + "0.44035714473370763" ], [ "Balanced Accuracy", - "0.6868686868686869", - "0.6212121212121212" + "0.6059466848940533", + "0.507177033492823" ], [ "AUROC", - "0.7255892255892256", - "0.771043771043771" + "0.6903622693096377", + "0.7084757347915241" ], [ "AUPRC", - "0.3703203415170961", - "0.4412237544590486" + "0.33035817923550315", + "0.3133013787846372" ], [ "F1 Score", - "0.5", - "0.4" + "0.36363636363636365", + "0.14285714285714285" ], [ "MCC", - "0.34879284277296124", - "0.2842676218074806" + "0.2392040820868914", + "0.019620779205386296" ] ], "shape": { @@ -221,77 +243,77 @@ " \n", " \n", " Model\n", - " ensemble_xgb\n", - " CrossConformalCV\n", + " ensemble_xgb (OOF)\n", + " CrossConformalCV (OOF)\n", " \n", " \n", " \n", " \n", " NLL\n", - " 0.600558\n", - " 0.452315\n", + " 0.652718\n", + " 0.484719\n", " \n", " \n", " ECE\n", - " 0.642123\n", - " 0.553429\n", + " 0.707331\n", + " 0.603625\n", " \n", " \n", " Brier\n", - " 0.196583\n", - " 0.150788\n", + " 0.190663\n", + " 0.161709\n", " \n", " \n", " Uncertainty Error Correlation\n", - " 0.203105\n", - " 0.357254\n", + " 0.187351\n", + " 0.405085\n", " \n", " \n", " Sharpness\n", - " 0.329143\n", - " 0.477957\n", + " 0.246305\n", + " 0.440357\n", " \n", " \n", " Balanced Accuracy\n", - " 0.686869\n", - " 0.621212\n", + " 0.605947\n", + " 0.507177\n", " \n", " \n", " AUROC\n", - " 0.725589\n", - " 0.771044\n", + " 0.690362\n", + " 0.708476\n", " \n", " \n", " AUPRC\n", - " 0.370320\n", - " 0.441224\n", + " 0.330358\n", + " 0.313301\n", " \n", " \n", " F1 Score\n", - " 0.500000\n", - " 0.400000\n", + " 0.363636\n", + " 0.142857\n", " \n", " \n", " MCC\n", - " 0.348793\n", - " 0.284268\n", + " 0.239204\n", + " 0.019621\n", " \n", " \n", "\n", "" ], "text/plain": [ - "Model ensemble_xgb CrossConformalCV\n", - "NLL 0.600558 0.452315\n", - "ECE 0.642123 0.553429\n", - "Brier 0.196583 0.150788\n", - "Uncertainty Error Correlation 0.203105 0.357254\n", - "Sharpness 0.329143 0.477957\n", - "Balanced Accuracy 0.686869 0.621212\n", - "AUROC 0.725589 0.771044\n", - "AUPRC 0.370320 0.441224\n", - "F1 Score 0.500000 0.400000\n", - "MCC 0.348793 0.284268" + "Model ensemble_xgb (OOF) CrossConformalCV (OOF)\n", + "NLL 0.652718 0.484719\n", + "ECE 0.707331 0.603625\n", + "Brier 0.190663 0.161709\n", + "Uncertainty Error Correlation 0.187351 0.405085\n", + "Sharpness 0.246305 0.440357\n", + "Balanced Accuracy 0.605947 0.507177\n", + "AUROC 0.690362 0.708476\n", + "AUPRC 0.330358 0.313301\n", + "F1 Score 0.363636 0.142857\n", + "MCC 0.239204 0.019621" ] }, "metadata": {}, @@ -299,11 +321,13 @@ } ], "source": [ - "\n", - "### 3.1 Benchmarking Standard Models\n", + "### 3.1 Cross-Validation Benchmarking: Standard Models and Conformal Prediction\n", "\n", "from xgboost import XGBClassifier\n", "\n", + "# Use StratifiedKFold on the training set\n", + "skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", + "\n", "model_dict = {\n", " \"ensemble_xgb\": XGBClassifier(eval_metric='logloss', random_state=42),\n", "}\n", @@ -312,84 +336,82 @@ " \"Balanced Accuracy\", \"AUROC\", \"AUPRC\", \"F1 Score\", \"MCC\"\n", "]\n", "results = []\n", + "results_cp = []\n", + "\n", + "# Arrays to collect out-of-fold predictions\n", + "oof_preds = np.zeros_like(y_train, dtype=float)\n", + "oof_preds_cp = np.zeros_like(y_train, dtype=float)\n", + "\n", + "for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):\n", + " print(f\"Fold {fold+1}\")\n", + " X_tr, X_val = X_train[train_idx], X_train[val_idx]\n", + " y_tr, y_val = y_train[train_idx], y_train[val_idx]\n", + " smiles_tr, smiles_val = smiles_train[train_idx], smiles_train[val_idx]\n", + "\n", + " # --- Standard Model ---\n", + " for model_name, model in model_dict.items():\n", + " model.fit(X_tr, y_tr)\n", + " prob = model.predict_proba(X_val)[:, 1]\n", + " oof_preds[val_idx] = prob\n", + "\n", + " # --- Conformal Prediction (CrossConformalCV) ---\n", + " rf = RandomForestClassifier(n_estimators=100, random_state=42)\n", + " rf_pipeline = Pipeline([\n", + " (\"featurizer\", featurizer),\n", + " (\"rf\", rf)\n", + " ], n_jobs=1)\n", + " cc_clf = CrossConformalCV(\n", + " estimator=rf_pipeline,\n", + " n_folds=5,\n", + " confidence_level=0.9,\n", + " estimator_type=\"classifier\"\n", + " )\n", + " cc_clf.fit(smiles_tr, y_tr)\n", + " # Average ensemble probabilities for the validation fold\n", + " probs_cp_ensemble = np.mean([m.predict_conformal_proba(smiles_val) for m in cc_clf.models_], axis=0)\n", + " oof_preds_cp[val_idx] = probs_cp_ensemble[:, 1]\n", + "\n", + "# Compute metrics for out-of-fold predictions (standard model)\n", + "mean_pred = (oof_preds >= 0.5).astype(int)\n", + "metrics = {\n", + " \"Model\": \"ensemble_xgb (OOF)\",\n", + " \"NLL\": log_loss(y_train, oof_preds),\n", + " \"ECE\": compute_ece(y_train, oof_preds),\n", + " \"Brier\": brier_score_loss(y_train, oof_preds),\n", + " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_train, oof_preds),\n", + " \"Sharpness\": compute_sharpness(oof_preds),\n", + " \"Balanced Accuracy\": balanced_accuracy_score(y_train, mean_pred),\n", + " \"AUROC\": roc_auc_score(y_train, oof_preds),\n", + " \"AUPRC\": average_precision_score(y_train, oof_preds),\n", + " \"F1 Score\": f1_score(y_train, mean_pred),\n", + " \"MCC\": matthews_corrcoef(y_train, mean_pred)\n", + "}\n", + "results.append(metrics)\n", "\n", - "for model_name, model in model_dict.items():\n", - " probs = []\n", - " preds = []\n", - " for train_idx, _ in skf.split(X_train, y_train):\n", - " model.fit(X_train[train_idx], y_train[train_idx])\n", - " prob = model.predict_proba(X_test)\n", - " pred = model.predict(X_test)\n", - " probs.append(prob)\n", - " preds.append(pred)\n", - " probs = np.stack(probs)\n", - " preds = np.stack(preds)\n", - " mean_probs = probs.mean(axis=0)\n", - " mean_pred = np.round(mean_probs[:, 1]).astype(int)\n", - " y_true = y_test\n", - " p1 = mean_probs[:, 1]\n", - " metrics = {\n", - " \"Model\": model_name,\n", - " \"NLL\": log_loss(y_true, p1),\n", - " \"ECE\": compute_ece(y_true, p1),\n", - " \"Brier\": brier_score_loss(y_true, p1),\n", - " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_true, p1),\n", - " \"Sharpness\": compute_sharpness(p1),\n", - " \"Balanced Accuracy\": balanced_accuracy_score(y_true, mean_pred),\n", - " \"AUROC\": roc_auc_score(y_true, p1),\n", - " \"AUPRC\": average_precision_score(y_true, p1),\n", - " \"F1 Score\": f1_score(y_true, mean_pred),\n", - " \"MCC\": matthews_corrcoef(y_true, mean_pred)\n", - " }\n", - " results.append(metrics)\n", - "\n", - "\n", - "\n", - "### 3.2 Conformal Prediction (CrossConformalCV)\n", - "\n", - "rf = RandomForestClassifier(n_estimators=100, random_state=42)\n", - "rf_pipeline = Pipeline([\n", - " (\"featurizer\", featurizer),\n", - " (\"rf\", rf)\n", - "], n_jobs=1)\n", - "cc_clf = CrossConformalCV(\n", - " estimator=rf_pipeline,\n", - " n_folds=5,\n", - " confidence_level=0.9,\n", - " estimator_type=\"classifier\"\n", - ")\n", - "cc_clf.fit(smiles_train, y_train_cp)\n", - "probs_cp_ensemble = np.mean([m.predict_proba(smiles_test) for m in cc_clf.models_], axis=0)\n", - "mean_pred_cp = np.argmax(probs_cp_ensemble, axis=1)\n", - "y_true_cp = y_test_cp\n", - "p1_cp = probs_cp_ensemble[:, 1]\n", - "p1_cp = p1_cp / (p1_cp + (1 - p1_cp)) # Normalize to [0, 1]\n", + "# Compute metrics for out-of-fold predictions (conformal)\n", + "mean_pred_cp = (oof_preds_cp >= 0.5).astype(int)\n", "metrics_cp = {\n", - " \"Model\": \"CrossConformalCV\",\n", - " \"NLL\": log_loss(y_true_cp, p1_cp),\n", - " \"ECE\": compute_ece(y_true_cp, p1_cp),\n", - " \"Brier\": brier_score_loss(y_true_cp, p1_cp),\n", - " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_true_cp, p1_cp),\n", - " \"Sharpness\": compute_sharpness(p1_cp),\n", - " \"Balanced Accuracy\": balanced_accuracy_score(y_true_cp, mean_pred_cp),\n", - " \"AUROC\": roc_auc_score(y_true_cp, p1_cp),\n", - " \"AUPRC\": average_precision_score(y_true_cp, p1_cp),\n", - " \"F1 Score\": f1_score(y_true_cp, mean_pred_cp),\n", - " \"MCC\": matthews_corrcoef(y_true_cp, mean_pred_cp)\n", + " \"Model\": \"CrossConformalCV (OOF)\",\n", + " \"NLL\": log_loss(y_train, oof_preds_cp),\n", + " \"ECE\": compute_ece(y_train, oof_preds_cp),\n", + " \"Brier\": brier_score_loss(y_train, oof_preds_cp),\n", + " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_train, oof_preds_cp),\n", + " \"Sharpness\": compute_sharpness(oof_preds_cp),\n", + " \"Balanced Accuracy\": balanced_accuracy_score(y_train, mean_pred_cp),\n", + " \"AUROC\": roc_auc_score(y_train, oof_preds_cp),\n", + " \"AUPRC\": average_precision_score(y_train, oof_preds_cp),\n", + " \"F1 Score\": f1_score(y_train, mean_pred_cp),\n", + " \"MCC\": matthews_corrcoef(y_train, mean_pred_cp)\n", "}\n", - "results.append(metrics_cp)\n", - "\n", - "results_df = pd.DataFrame(results).set_index(\"Model\").T\n", - "display(results_df)\n", - "\n", + "results_cp.append(metrics_cp)\n", "\n", - "\n", - "\n" + "results_df = pd.DataFrame(results + results_cp).set_index(\"Model\").T\n", + "display(results_df)\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "2bcaf7d7", "metadata": {}, "outputs": [ @@ -443,50 +465,50 @@ "type": "integer" } ], - "ref": "c11a7bec-d070-4065-a38e-793ba02a5c5b", + "ref": "c2eec36b-ea3e-40f3-94f4-dd7ee17654c1", "rows": [ [ "0", "CC1=CC=CC=C1OC2=C(C3=C(N2C4=CC=CC=C4)N=CC=C3)C(=O)N5CCNCC5", - "0.882058574975068", - "0.04655439540710138", - "0.05013325991762946", + "0.8524618705697028", + "0.034328378006639994", + "0.038710820356574985", "[0, 1]", "0" ], [ "1", "C1CCN(CC1)C2=C(C3=CC=CC=C3N2C4=CC=CC=C4)C(=O)N5CCNCC5", - "0.6254642414170489", - "0.04320167593384251", - "0.06460876023849627", + "0.6324145223238109", + "0.06271762947875074", + "0.09022403771142816", "[0, 1]", "0" ], [ "2", "CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)C=C(C=C3)O)C(=O)N5CCNCC5", - "0.22385245244687813", - "0.30252834643235366", - "0.5747328684403408", + "0.21540320870091612", + "0.3071940554024495", + "0.5878217826664724", "[1]", "1" ], [ "3", "C1CN(CCN1)C(=O)C2=C(N(C3=C2N=CC=C3)C4=CC=CC=C4)CC5=C(C(=CC=C5)F)F", - "0.32348499449082535", - "0.23406706926178478", - "0.4198120399482948", + "0.3248329740540647", + "0.24656932448754013", + "0.43151615790911185", "[1]", "0" ], [ "4", "CC1=C(C=CC=C1F)CC2=C(C3=CN=C(C=C3N2C4CCCCC4)OC)C(=O)N5CCNC(C5)CO", - "0.644347075681564", - "0.05727451601737295", - "0.08163163262778775", + "0.6475921388608785", + "0.059184034522427174", + "0.08373801601012118", "[0, 1]", "0" ] @@ -527,45 +549,45 @@ " \n", " 0\n", " CC1=CC=CC=C1OC2=C(C3=C(N2C4=CC=CC=C4)N=CC=C3)C...\n", - " 0.882059\n", - " 0.046554\n", - " 0.050133\n", + " 0.852462\n", + " 0.034328\n", + " 0.038711\n", " [0, 1]\n", " 0\n", " \n", " \n", " 1\n", " C1CCN(CC1)C2=C(C3=CC=CC=C3N2C4=CC=CC=C4)C(=O)N...\n", - " 0.625464\n", - " 0.043202\n", - " 0.064609\n", + " 0.632415\n", + " 0.062718\n", + " 0.090224\n", " [0, 1]\n", " 0\n", " \n", " \n", " 2\n", " CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)C=C(C=...\n", - " 0.223852\n", - " 0.302528\n", - " 0.574733\n", + " 0.215403\n", + " 0.307194\n", + " 0.587822\n", " [1]\n", " 1\n", " \n", " \n", " 3\n", " C1CN(CCN1)C(=O)C2=C(N(C3=C2N=CC=C3)C4=CC=CC=C4...\n", - " 0.323485\n", - " 0.234067\n", - " 0.419812\n", + " 0.324833\n", + " 0.246569\n", + " 0.431516\n", " [1]\n", " 0\n", " \n", " \n", " 4\n", " CC1=C(C=CC=C1F)CC2=C(C3=CN=C(C=C3N2C4CCCCC4)OC...\n", - " 0.644347\n", - " 0.057275\n", - " 0.081632\n", + " 0.647592\n", + " 0.059184\n", + " 0.083738\n", " [0, 1]\n", " 0\n", " \n", @@ -575,18 +597,18 @@ ], "text/plain": [ " SMILES p0 p1 \\\n", - "0 CC1=CC=CC=C1OC2=C(C3=C(N2C4=CC=CC=C4)N=CC=C3)C... 0.882059 0.046554 \n", - "1 C1CCN(CC1)C2=C(C3=CC=CC=C3N2C4=CC=CC=C4)C(=O)N... 0.625464 0.043202 \n", - "2 CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)C=C(C=... 0.223852 0.302528 \n", - "3 C1CN(CCN1)C(=O)C2=C(N(C3=C2N=CC=C3)C4=CC=CC=C4... 0.323485 0.234067 \n", - "4 CC1=C(C=CC=C1F)CC2=C(C3=CN=C(C=C3N2C4CCCCC4)OC... 0.644347 0.057275 \n", + "0 CC1=CC=CC=C1OC2=C(C3=C(N2C4=CC=CC=C4)N=CC=C3)C... 0.852462 0.034328 \n", + "1 C1CCN(CC1)C2=C(C3=CC=CC=C3N2C4=CC=CC=C4)C(=O)N... 0.632415 0.062718 \n", + "2 CC1=C(C=CC=C1F)CC2=C(C3=C(N2C4=CC=CC=C4)C=C(C=... 0.215403 0.307194 \n", + "3 C1CN(CCN1)C(=O)C2=C(N(C3=C2N=CC=C3)C4=CC=CC=C4... 0.324833 0.246569 \n", + "4 CC1=C(C=CC=C1F)CC2=C(C3=CN=C(C=C3N2C4CCCCC4)OC... 0.647592 0.059184 \n", "\n", " p1_norm conformal_set true_label \n", - "0 0.050133 [0, 1] 0 \n", - "1 0.064609 [0, 1] 0 \n", - "2 0.574733 [1] 1 \n", - "3 0.419812 [1] 0 \n", - "4 0.081632 [0, 1] 0 " + "0 0.038711 [0, 1] 0 \n", + "1 0.090224 [0, 1] 0 \n", + "2 0.587822 [1] 1 \n", + "3 0.431516 [1] 0 \n", + "4 0.083738 [0, 1] 0 " ] }, "metadata": {}, @@ -597,12 +619,12 @@ "output_type": "stream", "text": [ "Conformal set coverage: 0.833\n", - "Conformal set average size: 1.690\n", + "Conformal set average size: 1.667\n", "Conformal set error: 0.167\n", "Fraction of empty sets: 0.000\n", - "NLL: 0.4487081892488713\n", - "Brier: 0.14933955252658113\n", - "AUROC: 0.771043771043771\n", + "NLL: 0.4559115484789948\n", + "Brier: 0.15137191487078414\n", + "AUROC: 0.7643097643097643\n", "F1: 0.42857142857142855\n", "MCC: 0.34555798270379956\n" ] diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index d6cdec67..87cbc420 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -1,59 +1,77 @@ +"""Unit tests for conformal prediction wrappers in +molpipeline.experimental.uncertainty.conformal. +""" import unittest -import numpy as np -from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor + from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.model_selection import train_test_split -from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV +from molpipeline.experimental.uncertainty.conformal import ( + CrossConformalCV, + UnifiedConformalCV, +) + class TestConformalCV(unittest.TestCase): - def test_unified_conformal_classifier(self): - X, y = make_classification(n_samples=100, n_features=10, random_state=42) - X_train, X_calib, y_train, y_calib = train_test_split(X, y, test_size=0.2, random_state=42) + """Unit tests for UnifiedConformalCV and CrossConformalCV wrappers.""" + + def test_unified_conformal_classifier(self) -> None: + """Test UnifiedConformalCV with a classifier.""" + x, y = make_classification(n_samples=100, n_features=10, random_state=42) + x_train, x_calib, y_train, y_calib = train_test_split( + x, y, test_size=0.2, random_state=42, + ) clf = RandomForestClassifier(random_state=42) cp = UnifiedConformalCV(clf, estimator_type="classifier") - cp.fit(X_train, y_train) - cp.calibrate(X_calib, y_calib) - preds = cp.predict(X_calib) - probs = cp.predict_proba(X_calib) - sets = cp.predict_conformal_set(X_calib) + cp.fit(x_train, y_train) + cp.calibrate(x_calib, y_calib) + preds = cp.predict(x_calib) + probs = cp.predict_proba(x_calib) + sets = cp.predict_conformal_set(x_calib) self.assertEqual(len(preds), len(y_calib)) self.assertEqual(probs.shape[0], len(y_calib)) self.assertEqual(len(sets), len(y_calib)) - def test_unified_conformal_regressor(self): - X, y = make_regression(n_samples=100, n_features=10, random_state=42) - X_train, X_calib, y_train, y_calib = train_test_split(X, y, test_size=0.2, random_state=42) + def test_unified_conformal_regressor(self) -> None: + """Test UnifiedConformalCV with a regressor.""" + x, y = make_regression(n_samples=100, n_features=10, random_state=42) + x_train, x_calib, y_train, y_calib = train_test_split( + x, y, test_size=0.2, random_state=42, + ) reg = RandomForestRegressor(random_state=42) cp = UnifiedConformalCV(reg, estimator_type="regressor") - cp.fit(X_train, y_train) - cp.calibrate(X_calib, y_calib) - intervals = cp.predict_int(X_calib) + cp.fit(x_train, y_train) + cp.calibrate(x_calib, y_calib) + intervals = cp.predict_int(x_calib) self.assertEqual(intervals.shape[0], len(y_calib)) self.assertEqual(intervals.shape[1], 2) - def test_cross_conformal_classifier(self): - X, y = make_classification(n_samples=100, n_features=10, random_state=42) + def test_cross_conformal_classifier(self) -> None: + """Test CrossConformalCV with a classifier.""" + x, y = make_classification(n_samples=100, n_features=10, random_state=42) clf = RandomForestClassifier(random_state=42) ccp = CrossConformalCV(clf, estimator_type="classifier", n_folds=3) - ccp.fit(X, y) - preds = ccp.predict(X) - probs = ccp.predict_proba(X) - sets = ccp.predict_conformal_set(X) + ccp.fit(x, y) + preds = ccp.predict(x) + probs = ccp.predict_proba(x) + sets = ccp.predict_conformal_set(x) self.assertEqual(len(preds), len(y)) self.assertEqual(probs.shape[0], len(y)) self.assertEqual(len(sets), len(y)) - def test_cross_conformal_regressor(self): - X, y = make_regression(n_samples=100, n_features=10, random_state=42) + def test_cross_conformal_regressor(self) -> None: + """Test CrossConformalCV with a regressor.""" + x, y = make_regression(n_samples=100, n_features=10, random_state=42) reg = RandomForestRegressor(random_state=42) ccp = CrossConformalCV(reg, estimator_type="regressor", n_folds=3) - ccp.fit(X, y) + ccp.fit(x, y) # Each model should produce intervals for all samples for model in ccp.models_: - intervals = model.predict_int(X) + intervals = model.predict_int(x) self.assertEqual(intervals.shape[0], len(y)) self.assertEqual(intervals.shape[1], 2) + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 7ac91bf4..4b54ae10 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -376,48 +376,49 @@ def test_calibrated_classifier(self) -> None: self.assertEqual(predicted_value_array.shape, (len(TEST_SMILES),)) self.assertEqual(predicted_proba_array.shape, (len(TEST_SMILES), 2)) - def test_conformal_pipeline_classifier(self): - """Test conformal prediction with a pipeline on SMILES data.""" - from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV +def test_conformal_pipeline_classifier(self): + """Test conformal prediction with a pipeline on SMILES data.""" + from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV + + # Use the global test data + smiles = TEST_SMILES + y = np.array(CONTAINS_OX) + + # Build a pipeline: SMILES -> Mol -> MorganFP -> RF + smi2mol = SmilesToMol() + mol2morgan = MolToMorganFP(radius=2, n_bits=128) + rf = RandomForestClassifier(n_estimators=10, random_state=42) + pipeline = Pipeline([ + ("smi2mol", smi2mol), + ("morgan", mol2morgan), + ("rf", rf) + ]) + + # Split data + from sklearn.model_selection import train_test_split + X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42) + + # UnifiedConformalCV + cp = UnifiedConformalCV(pipeline, estimator_type="classifier") + cp.fit(X_train, y_train) + cp.calibrate(X_calib, y_calib) + preds = cp.predict(X_calib) + probs = cp.predict_proba(X_calib) + sets = cp.predict_conformal_set(X_calib) + self.assertEqual(len(preds), len(y_calib)) + self.assertEqual(probs.shape[0], len(y_calib)) + self.assertEqual(len(sets), len(y_calib)) + + # CrossConformalCV + ccp = CrossConformalCV(pipeline, estimator_type="classifier", n_folds=3) + ccp.fit(smiles, y) + preds_ccp = ccp.predict(smiles) + probs_ccp = ccp.predict_proba(smiles) + sets_ccp = ccp.predict_conformal_set(smiles) + self.assertEqual(len(preds_ccp), len(y)) + self.assertEqual(probs_ccp.shape[0], len(y)) + self.assertEqual(len(sets_ccp), len(y)) - # Use the global test data - smiles = TEST_SMILES - y = np.array(CONTAINS_OX) - - # Build a pipeline: SMILES -> Mol -> MorganFP -> RF - smi2mol = SmilesToMol() - mol2morgan = MolToMorganFP(radius=2, n_bits=128) - rf = RandomForestClassifier(n_estimators=10, random_state=42) - pipeline = Pipeline([ - ("smi2mol", smi2mol), - ("morgan", mol2morgan), - ("rf", rf) - ]) - - # Split data - from sklearn.model_selection import train_test_split - X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42) - - # UnifiedConformalCV - cp = UnifiedConformalCV(pipeline, estimator_type="classifier") - cp.fit(X_train, y_train) - cp.calibrate(X_calib, y_calib) - preds = cp.predict(X_calib) - probs = cp.predict_proba(X_calib) - sets = cp.predict_conformal_set(X_calib) - self.assertEqual(len(preds), len(y_calib)) - self.assertEqual(probs.shape[0], len(y_calib)) - self.assertEqual(len(sets), len(y_calib)) - - # CrossConformalCV - ccp = CrossConformalCV(pipeline, estimator_type="classifier", n_folds=3) - ccp.fit(smiles, y) - preds_ccp = ccp.predict(smiles) - probs_ccp = ccp.predict_proba(smiles) - sets_ccp = ccp.predict_conformal_set(smiles) - self.assertEqual(len(preds_ccp), len(y)) - self.assertEqual(probs_ccp.shape[0], len(y)) - self.assertEqual(len(sets_ccp), len(y)) if __name__ == "__main__": unittest.main() From aedc290d8361b498833e41a40a1619e8b0714eb8 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Wed, 2 Jul 2025 13:36:04 +0200 Subject: [PATCH 03/20] mypy docstyle etc --- .../experimental/uncertainty/__init__.py | 5 ++ .../experimental/uncertainty/conformal.py | 56 ++++++++++-- .../test_uncertainty/__init__.py | 3 + tests/test_pipeline.py | 87 ++++++++++--------- 4 files changed, 102 insertions(+), 49 deletions(-) diff --git a/molpipeline/experimental/uncertainty/__init__.py b/molpipeline/experimental/uncertainty/__init__.py index 27bb2ba4..1dbfef58 100644 --- a/molpipeline/experimental/uncertainty/__init__.py +++ b/molpipeline/experimental/uncertainty/__init__.py @@ -1,3 +1,8 @@ +"""Experimental uncertainty wrappers for conformal prediction in MolPipeline. + +Provides CrossConformalCV and UnifiedConformalCV for robust uncertainty quantification. +""" + from molpipeline.experimental.uncertainty.conformal import ( CrossConformalCV, UnifiedConformalCV, diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py index 3d802003..0ecb6669 100644 --- a/molpipeline/experimental/uncertainty/conformal.py +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -37,8 +37,7 @@ def bin_targets(y: np.ndarray, n_bins: int = 10) -> np.ndarray: class UnifiedConformalCV(BaseEstimator): - """One wrapper to rule them all: conformal prediction for both classifiers and - regressors. + """One wrapper to rule them all: conformal prediction for both classifiers and regressors. Uses crepes under the hood, so you know it's sweet. @@ -78,7 +77,29 @@ def __init__( n_jobs: int = 1, **kwargs: Any, ) -> None: - """Initialize UnifiedConformalCV.""" + """Initialize UnifiedConformalCV. + + Parameters + ---------- + estimator : Any + The base estimator or pipeline to wrap. + mondrian : Any, optional + Mondrian calibration/grouping (default: False). + confidence_level : float, optional + Confidence level for prediction sets/intervals (default: 0.9). + estimator_type : str, optional + Type of estimator: 'classifier' or 'regressor' (default: 'classifier'). + nonconformity : Any, optional + Nonconformity function for classification. + difficulty_estimator : Any, optional + Difficulty estimator for normalized conformal prediction (regression). + binning : Any, optional + Number of bins or binning function for Mondrian calibration (regression). + n_jobs : int, optional + Number of parallel jobs (default: 1). + **kwargs : Any + Additional keyword arguments for crepes. + """ self.estimator = estimator self.mondrian = mondrian self.confidence_level = confidence_level @@ -284,8 +305,7 @@ def predict_int(self, x: np.ndarray, confidence: float | None = None) -> Any: class CrossConformalCV(BaseEstimator): - """Cross-conformal prediction for both classifiers and regressors using - WrapClassifier/WrapRegressor. + """Cross-conformal prediction for both classifiers and regressors using WrapClassifier/WrapRegressor. Handles Mondrian (class_cond) logic as described. @@ -315,7 +335,7 @@ class CrossConformalCV(BaseEstimator): Extra toppings for crepes. """ - + def __init__( self, estimator: Any, @@ -328,7 +348,29 @@ def __init__( n_bins: int = 10, **kwargs: Any, ) -> None: - """Initialize CrossConformalCV.""" + """Initialize CrossConformalCV. + + Parameters + ---------- + estimator : Any + The base estimator or pipeline to wrap. + n_folds : int, optional + Number of cross-validation folds (default: 5). + confidence_level : float, optional + Confidence level for prediction sets/intervals (default: 0.9). + mondrian : Any, optional + Mondrian calibration/grouping (default: False). + nonconformity : Any, optional + Nonconformity function for classification. + binning : Any, optional + Number of bins or binning function for Mondrian calibration (regression). + estimator_type : str, optional + Type of estimator: 'classifier' or 'regressor' (default: 'classifier'). + n_bins : int, optional + Number of bins for stratified splitting in regression (default: 10). + **kwargs : Any + Additional keyword arguments for crepes. + """ self.estimator = estimator self.n_folds = n_folds self.confidence_level = confidence_level diff --git a/tests/test_experimental/test_uncertainty/__init__.py b/tests/test_experimental/test_uncertainty/__init__.py index 4d9cb3a5..b5f6abf0 100644 --- a/tests/test_experimental/test_uncertainty/__init__.py +++ b/tests/test_experimental/test_uncertainty/__init__.py @@ -1 +1,4 @@ +"""Unit tests for conformal prediction wrappers in molpipeline.experimental.uncertainty.conformal. +""" + "Uncertainty test module" \ No newline at end of file diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 4b54ae10..9fd85a94 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -376,48 +376,51 @@ def test_calibrated_classifier(self) -> None: self.assertEqual(predicted_value_array.shape, (len(TEST_SMILES),)) self.assertEqual(predicted_proba_array.shape, (len(TEST_SMILES), 2)) -def test_conformal_pipeline_classifier(self): - """Test conformal prediction with a pipeline on SMILES data.""" - from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV - - # Use the global test data - smiles = TEST_SMILES - y = np.array(CONTAINS_OX) - - # Build a pipeline: SMILES -> Mol -> MorganFP -> RF - smi2mol = SmilesToMol() - mol2morgan = MolToMorganFP(radius=2, n_bits=128) - rf = RandomForestClassifier(n_estimators=10, random_state=42) - pipeline = Pipeline([ - ("smi2mol", smi2mol), - ("morgan", mol2morgan), - ("rf", rf) - ]) - - # Split data - from sklearn.model_selection import train_test_split - X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42) - - # UnifiedConformalCV - cp = UnifiedConformalCV(pipeline, estimator_type="classifier") - cp.fit(X_train, y_train) - cp.calibrate(X_calib, y_calib) - preds = cp.predict(X_calib) - probs = cp.predict_proba(X_calib) - sets = cp.predict_conformal_set(X_calib) - self.assertEqual(len(preds), len(y_calib)) - self.assertEqual(probs.shape[0], len(y_calib)) - self.assertEqual(len(sets), len(y_calib)) - - # CrossConformalCV - ccp = CrossConformalCV(pipeline, estimator_type="classifier", n_folds=3) - ccp.fit(smiles, y) - preds_ccp = ccp.predict(smiles) - probs_ccp = ccp.predict_proba(smiles) - sets_ccp = ccp.predict_conformal_set(smiles) - self.assertEqual(len(preds_ccp), len(y)) - self.assertEqual(probs_ccp.shape[0], len(y)) - self.assertEqual(len(sets_ccp), len(y)) + def test_conformal_pipeline_classifier(self) -> None: + """Test conformal prediction with a pipeline on SMILES data. + + This test does not take any parameters and does not return a value. + """ + from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV + + # Use the global test data + smiles = TEST_SMILES + y = np.array(CONTAINS_OX) + + # Build a pipeline: SMILES -> Mol -> MorganFP -> RF + smi2mol = SmilesToMol() + mol2morgan = MolToMorganFP(radius=2, n_bits=128) + rf = RandomForestClassifier(n_estimators=10, random_state=42) + pipeline = Pipeline([ + ("smi2mol", smi2mol), + ("morgan", mol2morgan), + ("rf", rf) + ]) + + # Split data + from sklearn.model_selection import train_test_split + X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42) + + # UnifiedConformalCV + cp = UnifiedConformalCV(pipeline, estimator_type="classifier") + cp.fit(X_train, y_train) + cp.calibrate(X_calib, y_calib) + preds = cp.predict(X_calib) + probs = cp.predict_proba(X_calib) + sets = cp.predict_conformal_set(X_calib) + self.assertEqual(len(preds), len(y_calib)) + self.assertEqual(probs.shape[0], len(y_calib)) + self.assertEqual(len(sets), len(y_calib)) + + # CrossConformalCV + ccp = CrossConformalCV(pipeline, estimator_type="classifier", n_folds=3) + ccp.fit(smiles, y) + preds_ccp = ccp.predict(smiles) + probs_ccp = ccp.predict_proba(smiles) + sets_ccp = ccp.predict_conformal_set(smiles) + self.assertEqual(len(preds_ccp), len(y)) + self.assertEqual(probs_ccp.shape[0], len(y)) + self.assertEqual(len(sets_ccp), len(y)) if __name__ == "__main__": From 6947efe2d0e9fcde1b576dd4b1717c2052b995a6 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 7 Jul 2025 11:03:26 +0200 Subject: [PATCH 04/20] pull first --- .../experimental/uncertainty/conformal.py | 35 ++- .../advanced_04_conformal_prediction.ipynb | 297 +++++++++++++----- pyproject.toml | 6 +- .../test_uncertainty/__init__.py | 5 +- .../test_uncertainty/test_conformal.py | 4 +- tests/test_pipeline.py | 9 +- 6 files changed, 247 insertions(+), 109 deletions(-) diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py index 0ecb6669..9f948ede 100644 --- a/molpipeline/experimental/uncertainty/conformal.py +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -3,18 +3,22 @@ Provides unified and cross-conformal prediction with Mondrian and nonconformity options. """ +# pylint: disable=too-many-instance-attributes, attribute-defined-outside-init + from typing import Any, cast import numpy as np from crepes import WrapClassifier, WrapRegressor from crepes.extras import MondrianCategorizer +from numpy.typing import NDArray from scipy.stats import mode from sklearn.base import BaseEstimator, clone from sklearn.model_selection import KFold, StratifiedKFold -def bin_targets(y: np.ndarray, n_bins: int = 10) -> np.ndarray: - """Bin continuous targets for stratified splitting in regression. +def bin_targets(y: NDArray[Any], n_bins: int = 10) -> NDArray[np.int_]: + """ + Bin continuous targets for stratified splitting in regression. Parameters ---------- @@ -27,7 +31,6 @@ def bin_targets(y: np.ndarray, n_bins: int = 10) -> np.ndarray: ------- np.ndarray Binned targets. - """ y = np.asarray(y) bins = np.linspace(np.min(y), np.max(y), n_bins + 1) @@ -99,6 +102,7 @@ def __init__( Number of parallel jobs (default: 1). **kwargs : Any Additional keyword arguments for crepes. + """ self.estimator = estimator self.mondrian = mondrian @@ -110,7 +114,7 @@ def __init__( self.n_jobs = n_jobs self.kwargs = kwargs - def fit(self, x: np.ndarray, y: np.ndarray) -> "UnifiedConformalCV": + def fit(self, x: NDArray[Any], y: NDArray[Any]) -> "UnifiedConformalCV": """Fit the conformal predictor. Parameters @@ -142,7 +146,7 @@ def fit(self, x: np.ndarray, y: np.ndarray) -> "UnifiedConformalCV": return self def calibrate( - self, x_calib: np.ndarray, y_calib: np.ndarray, **calib_params: Any, + self, x_calib: NDArray[Any], y_calib: NDArray[Any], **calib_params: Any, ) -> None: """Calibrate the conformal predictor. @@ -180,7 +184,7 @@ def calibrate( else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") - def predict(self, x: np.ndarray) -> np.ndarray: + def predict(self, x: NDArray[Any]) -> NDArray[Any]: """Predict using the conformal predictor. Parameters @@ -196,7 +200,7 @@ def predict(self, x: np.ndarray) -> np.ndarray: """ return self._conformal.predict(x) - def predict_proba(self, x: np.ndarray) -> np.ndarray: + def predict_proba(self, x: NDArray[Any]) -> NDArray[Any]: """Predict probabilities using the conformal predictor. Parameters @@ -221,7 +225,7 @@ def predict_proba(self, x: np.ndarray) -> np.ndarray: return conformal.predict_proba(x) def predict_conformal_set( - self, x: np.ndarray, confidence: float | None = None, + self, x: NDArray[Any], confidence: float | None = None, ) -> Any: """Predict conformal sets. @@ -251,7 +255,7 @@ def predict_conformal_set( conformal = cast("WrapClassifier", self._conformal) return conformal.predict_set(x, confidence=conf) - def predict_p(self, x: np.ndarray, **kwargs: Any) -> Any: + def predict_p(self, x: NDArray[Any], **kwargs: Any) -> Any: """Predict p-values. Parameters @@ -276,7 +280,7 @@ def predict_p(self, x: np.ndarray, **kwargs: Any) -> Any: raise NotImplementedError("predict_p is only for classification.") return self._conformal.predict_p(x, **kwargs) - def predict_int(self, x: np.ndarray, confidence: float | None = None) -> Any: + def predict_int(self, x: NDArray[Any], confidence: float | None = None) -> Any: """Predict intervals. Parameters @@ -370,6 +374,7 @@ def __init__( Number of bins for stratified splitting in regression (default: 10). **kwargs : Any Additional keyword arguments for crepes. + """ self.estimator = estimator self.n_folds = n_folds @@ -383,8 +388,8 @@ def __init__( def fit( self, - x: np.ndarray, - y: np.ndarray, + x: NDArray[Any], + y: NDArray[Any], ) -> "CrossConformalCV": """Fit the cross-conformal predictor. @@ -453,7 +458,7 @@ def _bin_func( self.models_.append(model) return self - def predict(self, x: np.ndarray) -> np.ndarray: + def predict(self, x: NDArray[Any]) -> NDArray[Any]: """Predict using the cross-conformal predictor. Parameters @@ -476,7 +481,7 @@ def predict(self, x: np.ndarray) -> np.ndarray: pred_mode = mode(result, axis=0, keepdims=False) return np.ravel(pred_mode.mode) - def predict_proba(self, x: np.ndarray) -> np.ndarray: + def predict_proba(self, x: NDArray[Any]) -> NDArray[Any]: """Predict probabilities using the cross-conformal predictor. Parameters @@ -511,7 +516,7 @@ def predict_proba(self, x: np.ndarray) -> np.ndarray: return proba def predict_conformal_set( - self, x: np.ndarray, confidence: float | None = None, + self, x: NDArray[Any], confidence: float | None = None, ) -> list[list[Any]]: """Predict conformal sets using the cross-conformal predictor. diff --git a/notebooks/advanced_04_conformal_prediction.ipynb b/notebooks/advanced_04_conformal_prediction.ipynb index c379a0eb..2afd872d 100644 --- a/notebooks/advanced_04_conformal_prediction.ipynb +++ b/notebooks/advanced_04_conformal_prediction.ipynb @@ -19,22 +19,31 @@ "outputs": [], "source": [ "\n", - "## 1. Import Required Libraries and Define Utility Functions\n", + "# 1. Import Required Libraries and Define Utility Functions\n", + "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", + "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n", + "from sklearn.metrics import (\n", + " average_precision_score,\n", + " balanced_accuracy_score,\n", + " brier_score_loss,\n", + " f1_score,\n", + " log_loss,\n", + " matthews_corrcoef,\n", + " roc_auc_score,\n", + ")\n", + "from sklearn.model_selection import StratifiedKFold, train_test_split\n", + "\n", "from molpipeline.any2mol import SmilesToMol\n", "from molpipeline.error_handling import ErrorFilter, FilterReinserter\n", + "from molpipeline.experimental.uncertainty.conformal import (\n", + " CrossConformalCV,\n", + ")\n", "from molpipeline.mol2any.mol2morgan_fingerprint import MolToMorganFP\n", "from molpipeline.pipeline import Pipeline\n", "from molpipeline.post_prediction import PostPredictionWrapper\n", - "from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV\n", - "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n", - "from sklearn.model_selection import train_test_split, StratifiedKFold, KFold\n", - "from sklearn.metrics import (\n", - " log_loss, brier_score_loss, balanced_accuracy_score, roc_auc_score,\n", - " average_precision_score, f1_score, matthews_corrcoef\n", - ")\n", - "import matplotlib.pyplot as plt\n", + "\n", "\n", "def compute_ece(y_true, probs, n_bins=10):\n", " bins = np.linspace(0, 1, n_bins + 1)\n", @@ -48,17 +57,18 @@ " ece += np.abs(acc - conf) * np.sum(mask) / len(y_true)\n", " return ece\n", "\n", + "\n", "def compute_uncertainty_error_corr(y_true, probs):\n", " eps = 1e-12\n", " entropy = -probs * np.log(probs + eps) - (1 - probs) * np.log(1 - probs + eps)\n", " error = np.abs(y_true - (probs >= 0.5))\n", " return np.corrcoef(entropy, error)[0, 1]\n", "\n", + "\n", "def compute_sharpness(probs):\n", " eps = 1e-12\n", " entropy = -probs * np.log(probs + eps) - (1 - probs) * np.log(1 - probs + eps)\n", - " return np.mean(entropy)\n", - "\n" + " return np.mean(entropy)\n" ] }, { @@ -78,7 +88,7 @@ "source": [ "\n", "\n", - "## 2. Data Loading, Cleaning, and Featurization\n", + "# 2. Data Loading, Cleaning, and Featurization\n", "# Load real data\n", "df = pd.read_csv(\"example_data/renin_harren.csv\")\n", "smiles = df[\"pubchem_smiles\"].values\n", @@ -102,7 +112,6 @@ "print(f\"Shape of X={X_feat.shape}, y_class={y_class.shape}, y_reg={y_reg.shape}\")\n", "\n", "\n", - "\n", "# ## 3. Classification: Splitting, Model Benchmarking, and Conformal Prediction\n", "# # Train/test split for classification\n", "# X_train, X_test, y_train, y_test = train_test_split(\n", @@ -114,7 +123,6 @@ "# smiles_train, smiles_test, y_train_cp, y_test_cp = train_test_split(\n", "# smiles, y_class, test_size=0.3, random_state=42, stratify=y_class\n", "# )\n", - "from sklearn.model_selection import train_test_split\n", "\n", "# Generate indices for a single split\n", "indices = np.arange(len(y_class))\n", @@ -125,13 +133,12 @@ "# Use these indices for all splits\n", "X_train, X_test = X_feat[train_idx], X_feat[test_idx]\n", "y_train, y_test = y_class[train_idx], y_class[test_idx]\n", - "smiles_train, smiles_test = smiles[train_idx], smiles[test_idx]\n", - "\n" + "smiles_train, smiles_test = smiles[train_idx], smiles[test_idx]\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "e4b28946", "metadata": {}, "outputs": [ @@ -161,66 +168,81 @@ "type": "float" }, { - "name": "CrossConformalCV (OOF)", + "name": "CrossConformalCV (OOF, norm)", + "rawType": "float64", + "type": "float" + }, + { + "name": "CrossConformalCV (OOF, raw)", "rawType": "float64", "type": "float" } ], - "ref": "3e1f2e2b-8668-4240-9252-e16c5bbdb434", + "ref": "19a2d863-8031-48e1-86bd-f6970dfaed4f", "rows": [ [ "NLL", - "0.65271811198485", + "0.5082971245539286", + "0.4948980269013652", "0.484719057953362" ], [ "ECE", - "0.707331120872065", + "0.6230208333333332", + "0.6428021891094953", "0.6036250000000001" ], [ "Brier", - "0.19066274454865353", + "0.17212395833333335", + "0.16448031114059614", "0.16170891666666665" ], [ "Uncertainty Error Correlation", - "0.18735090090238612", + "0.4139304487779154", + "0.45798897710100606", "0.40508529649564395" ], [ "Sharpness", - "0.2463051521032824", + "0.4164086587962599", + "0.4037236852211876", "0.44035714473370763" ], [ "Balanced Accuracy", - "0.6059466848940533", + "0.49419002050580996", + "0.5006835269993164", "0.507177033492823" ], [ "AUROC", - "0.6903622693096377", + "0.7053998632946001", + "0.7006151742993848", "0.7084757347915241" ], [ "AUPRC", - "0.33035817923550315", + "0.306106679300729", + "0.3094994417942496", "0.3133013787846372" ], [ "F1 Score", - "0.36363636363636365", + "0.13333333333333333", + "0.13793103448275862", "0.14285714285714285" ], [ "MCC", - "0.2392040820868914", + "-0.014535198024344553", + "0.0017830298218644615", "0.019620779205386296" ] ], "shape": { - "columns": 2, + "columns": 3, "rows": 10 } }, @@ -244,58 +266,69 @@ " \n", " Model\n", " ensemble_xgb (OOF)\n", - " CrossConformalCV (OOF)\n", + " CrossConformalCV (OOF, norm)\n", + " CrossConformalCV (OOF, raw)\n", " \n", " \n", " \n", " \n", " NLL\n", - " 0.652718\n", + " 0.508297\n", + " 0.494898\n", " 0.484719\n", " \n", " \n", " ECE\n", - " 0.707331\n", + " 0.623021\n", + " 0.642802\n", " 0.603625\n", " \n", " \n", " Brier\n", - " 0.190663\n", + " 0.172124\n", + " 0.164480\n", " 0.161709\n", " \n", " \n", " Uncertainty Error Correlation\n", - " 0.187351\n", + " 0.413930\n", + " 0.457989\n", " 0.405085\n", " \n", " \n", " Sharpness\n", - " 0.246305\n", + " 0.416409\n", + " 0.403724\n", " 0.440357\n", " \n", " \n", " Balanced Accuracy\n", - " 0.605947\n", + " 0.494190\n", + " 0.500684\n", " 0.507177\n", " \n", " \n", " AUROC\n", - " 0.690362\n", + " 0.705400\n", + " 0.700615\n", " 0.708476\n", " \n", " \n", " AUPRC\n", - " 0.330358\n", + " 0.306107\n", + " 0.309499\n", " 0.313301\n", " \n", " \n", " F1 Score\n", - " 0.363636\n", + " 0.133333\n", + " 0.137931\n", " 0.142857\n", " \n", " \n", " MCC\n", - " 0.239204\n", + " -0.014535\n", + " 0.001783\n", " 0.019621\n", " \n", " \n", @@ -303,17 +336,41 @@ "" ], "text/plain": [ - "Model ensemble_xgb (OOF) CrossConformalCV (OOF)\n", - "NLL 0.652718 0.484719\n", - "ECE 0.707331 0.603625\n", - "Brier 0.190663 0.161709\n", - "Uncertainty Error Correlation 0.187351 0.405085\n", - "Sharpness 0.246305 0.440357\n", - "Balanced Accuracy 0.605947 0.507177\n", - "AUROC 0.690362 0.708476\n", - "AUPRC 0.330358 0.313301\n", - "F1 Score 0.363636 0.142857\n", - "MCC 0.239204 0.019621" + "Model ensemble_xgb (OOF) \\\n", + "NLL 0.508297 \n", + "ECE 0.623021 \n", + "Brier 0.172124 \n", + "Uncertainty Error Correlation 0.413930 \n", + "Sharpness 0.416409 \n", + "Balanced Accuracy 0.494190 \n", + "AUROC 0.705400 \n", + "AUPRC 0.306107 \n", + "F1 Score 0.133333 \n", + "MCC -0.014535 \n", + "\n", + "Model CrossConformalCV (OOF, norm) \\\n", + "NLL 0.494898 \n", + "ECE 0.642802 \n", + "Brier 0.164480 \n", + "Uncertainty Error Correlation 0.457989 \n", + "Sharpness 0.403724 \n", + "Balanced Accuracy 0.500684 \n", + "AUROC 0.700615 \n", + "AUPRC 0.309499 \n", + "F1 Score 0.137931 \n", + "MCC 0.001783 \n", + "\n", + "Model CrossConformalCV (OOF, raw) \n", + "NLL 0.484719 \n", + "ECE 0.603625 \n", + "Brier 0.161709 \n", + "Uncertainty Error Correlation 0.405085 \n", + "Sharpness 0.440357 \n", + "Balanced Accuracy 0.507177 \n", + "AUROC 0.708476 \n", + "AUPRC 0.313301 \n", + "F1 Score 0.142857 \n", + "MCC 0.019621 " ] }, "metadata": {}, @@ -321,15 +378,15 @@ } ], "source": [ - "### 3.1 Cross-Validation Benchmarking: Standard Models and Conformal Prediction\n", + "# 3.1 Cross-Validation Benchmarking: Standard Models and Conformal Prediction\n", "\n", - "from xgboost import XGBClassifier\n", "\n", "# Use StratifiedKFold on the training set\n", "skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", "\n", "model_dict = {\n", - " \"ensemble_xgb\": XGBClassifier(eval_metric='logloss', random_state=42),\n", + " # \"ensemble_xgb\": XGBClassifier(eval_metric='logloss', random_state=42),\n", + " \"ensemble_rf\": RandomForestClassifier(n_estimators=100, random_state=42)\n", "}\n", "metrics_list = [\n", " \"NLL\", \"ECE\", \"Brier\", \"Uncertainty Error Correlation\", \"Sharpness\",\n", @@ -337,13 +394,15 @@ "]\n", "results = []\n", "results_cp = []\n", + "# ...existing code...\n", "\n", "# Arrays to collect out-of-fold predictions\n", "oof_preds = np.zeros_like(y_train, dtype=float)\n", - "oof_preds_cp = np.zeros_like(y_train, dtype=float)\n", + "oof_preds_cp_norm = np.zeros_like(y_train, dtype=float)\n", + "oof_preds_cp_raw = np.zeros_like(y_train, dtype=float)\n", "\n", "for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):\n", - " print(f\"Fold {fold+1}\")\n", + " print(f\"Fold {fold + 1}\")\n", " X_tr, X_val = X_train[train_idx], X_train[val_idx]\n", " y_tr, y_val = y_train[train_idx], y_train[val_idx]\n", " smiles_tr, smiles_val = smiles_train[train_idx], smiles_train[val_idx]\n", @@ -368,8 +427,22 @@ " )\n", " cc_clf.fit(smiles_tr, y_tr)\n", " # Average ensemble probabilities for the validation fold\n", - " probs_cp_ensemble = np.mean([m.predict_conformal_proba(smiles_val) for m in cc_clf.models_], axis=0)\n", - " oof_preds_cp[val_idx] = probs_cp_ensemble[:, 1]\n", + " probs_cp_ensemble = np.mean([m.predict_p(smiles_val) for m in cc_clf.models_], axis=0)\n", + " probs_cp_ensemble_raw = np.mean([m.predict_proba(smiles_val) for m in cc_clf.models_], axis=0)\n", + " p0 = probs_cp_ensemble[:, 0]\n", + " p1 = probs_cp_ensemble[:, 1]\n", + " p1_norm = p1 / (p0 + p1 + 1e-12)\n", + " oof_preds_cp_norm[val_idx] = p1_norm\n", + " oof_preds_cp_raw[val_idx] = probs_cp_ensemble_raw[:, 1]\n", + "\n", + "# Create a DataFrame to compare raw and normalized conformal probabilities\n", + "df_oof_compare = pd.DataFrame({\n", + " \"y_true\": y_train,\n", + " \"StandardModel\": oof_preds,\n", + " \"ConformalRaw\": oof_preds_cp_raw,\n", + " \"ConformalNorm\": oof_preds_cp_norm\n", + "})\n", + "# display(df_oof_compare.head())\n", "\n", "# Compute metrics for out-of-fold predictions (standard model)\n", "mean_pred = (oof_preds >= 0.5).astype(int)\n", @@ -388,25 +461,88 @@ "}\n", "results.append(metrics)\n", "\n", - "# Compute metrics for out-of-fold predictions (conformal)\n", - "mean_pred_cp = (oof_preds_cp >= 0.5).astype(int)\n", - "metrics_cp = {\n", - " \"Model\": \"CrossConformalCV (OOF)\",\n", - " \"NLL\": log_loss(y_train, oof_preds_cp),\n", - " \"ECE\": compute_ece(y_train, oof_preds_cp),\n", - " \"Brier\": brier_score_loss(y_train, oof_preds_cp),\n", - " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_train, oof_preds_cp),\n", - " \"Sharpness\": compute_sharpness(oof_preds_cp),\n", - " \"Balanced Accuracy\": balanced_accuracy_score(y_train, mean_pred_cp),\n", - " \"AUROC\": roc_auc_score(y_train, oof_preds_cp),\n", - " \"AUPRC\": average_precision_score(y_train, oof_preds_cp),\n", - " \"F1 Score\": f1_score(y_train, mean_pred_cp),\n", - " \"MCC\": matthews_corrcoef(y_train, mean_pred_cp)\n", + "# Compute metrics for out-of-fold predictions (conformal, both raw and norm)\n", + "mean_pred_cp_norm = (oof_preds_cp_norm >= 0.5).astype(int)\n", + "metrics_cp_norm = {\n", + " \"Model\": \"CrossConformalCV (OOF, norm)\",\n", + " \"NLL\": log_loss(y_train, oof_preds_cp_norm),\n", + " \"ECE\": compute_ece(y_train, oof_preds_cp_norm),\n", + " \"Brier\": brier_score_loss(y_train, oof_preds_cp_norm),\n", + " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_train, oof_preds_cp_norm),\n", + " \"Sharpness\": compute_sharpness(oof_preds_cp_norm),\n", + " \"Balanced Accuracy\": balanced_accuracy_score(y_train, mean_pred_cp_norm),\n", + " \"AUROC\": roc_auc_score(y_train, oof_preds_cp_norm),\n", + " \"AUPRC\": average_precision_score(y_train, oof_preds_cp_norm),\n", + " \"F1 Score\": f1_score(y_train, mean_pred_cp_norm),\n", + " \"MCC\": matthews_corrcoef(y_train, mean_pred_cp_norm)\n", + "}\n", + "results_cp.append(metrics_cp_norm)\n", + "\n", + "mean_pred_cp_raw = (oof_preds_cp_raw >= 0.5).astype(int)\n", + "metrics_cp_raw = {\n", + " \"Model\": \"CrossConformalCV (OOF, raw)\",\n", + " \"NLL\": log_loss(y_train, oof_preds_cp_raw),\n", + " \"ECE\": compute_ece(y_train, oof_preds_cp_raw),\n", + " \"Brier\": brier_score_loss(y_train, oof_preds_cp_raw),\n", + " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_train, oof_preds_cp_raw),\n", + " \"Sharpness\": compute_sharpness(oof_preds_cp_raw),\n", + " \"Balanced Accuracy\": balanced_accuracy_score(y_train, mean_pred_cp_raw),\n", + " \"AUROC\": roc_auc_score(y_train, oof_preds_cp_raw),\n", + " \"AUPRC\": average_precision_score(y_train, oof_preds_cp_raw),\n", + " \"F1 Score\": f1_score(y_train, mean_pred_cp_raw),\n", + " \"MCC\": matthews_corrcoef(y_train, mean_pred_cp_raw)\n", "}\n", - "results_cp.append(metrics_cp)\n", + "results_cp.append(metrics_cp_raw)\n", "\n", "results_df = pd.DataFrame(results + results_cp).set_index(\"Model\").T\n", - "display(results_df)\n" + "display(results_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "ad5d684e", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAHqCAYAAACZcdjsAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAA4ENJREFUeJzs3Xd4FNXXwPHvpvdKGhCSACF0CEiXjvSiIoiIFBERKSIWxJ+KqLyAHRVRLERERBFsKCAgICC9I6GFkgAJgfSQvjvvH0OWbOom2U3jfJ4nD7OzM3NPdpKwZ+8992oURVEQQgghhBBCiHKwqOwAhBBCCCGEENWfJBZCCCGEEEKIcpPEQgghhBBCCFFuklgIIYQQQgghyk0SCyGEEEIIIUS5SWIhhBBCCCGEKDdJLIQQQgghhBDlJomFEEIIIYQQotwksRBCCCGEEEKUmyQWQohqZePGjbRu3Ro7Ozs0Gg2JiYmVHVKRAgMDGT9+vP7x9u3b0Wg0bN++vdJiyi9/jJVt/PjxODk5mfSaGo2GadOmlXhcWFgYGo2GS5cu6ff16NGDHj166B9funQJjUZDWFiY0W2//vrrpQu4HEobX1WV+7vy008/VUh75rpPAwcOZNKkSSa/rql07NiRF198sbLDEDWIJBbirpX7JiL3y87OjkaNGjFt2jSuX79e2eGV26lTp3j99dcN3iRVd3FxcYwcORJ7e3uWLFnCt99+i6OjY6HH1qT7++eff1bom9PC5H0tLSwsqF27Nn379q1SSVJlqQr3x5x69OhhcP/zfjVu3LiywytU/t9/jUaDt7c3PXv2ZMOGDRUSw+7du/nrr7+YPXt2geciIyN56qmnCAwMxNbWFm9vb+6//352795d5PVKc05uYlbY16hRo/THzZ49myVLlhATE2Oab1rc9awqOwAhKtsbb7xBUFAQGRkZ7Nq1i6VLl/Lnn39y8uRJHBwcKju8Mjt16hTz5s2jR48eBAYGVnY4JnHgwAFSUlJ488036dOnj1HnVKX7261bN9LT07GxsSnVeX/++SdLliyp9Dev9913H2PHjkVRFC5evMinn35Kr169+OOPPxgwYEClxmYKjz32GKNGjcLW1rbIYwICAkhPT8fa2lq/r7j7k56ejpVVxf1XW1h8plC3bl0WLFhQYL+rq6tJ2zG13N9/RVG4fv06YWFhDBw4kN9//53BgwfrjzPHfXrnnXfo3bs3DRs2NNi/e/duBg4cCMATTzxB06ZNiYmJISwsjK5du7J48WKmT59e7nMAZsyYQbt27Qz25f3/YNiwYbi4uPDpp5/yxhtvmOLbFnc5SSzEXW/AgAHcc889gPoH29PTk/fff59ff/2VRx55pFzXTktLq9bJSVUTGxsLgJubm9HnlOX+3rp1q8iekPKwsLDAzs7O5NetKI0aNWLMmDH6xw888AAtW7bkww8/LDKxyMjIwMbGBguLqt9BbmlpiaWlZbHH5PZ+Gaui73dp4zOWq6urwb2vLvL+/gNMnDgRHx8fvv/+e4PEwtSvWWxsLH/88QefffaZwf6EhAQeeugh7O3t2b17Nw0aNNA/N2vWLPr168fMmTNp27YtnTt3LvM5ubp27cpDDz1UZJwWFhY89NBDrFixgnnz5qHRaEzx7Yu7WNX/Sy9EBevVqxcAFy9e1O9buXIlbdu2xd7eHg8PD0aNGkVUVJTBeT169KB58+YcOnSIbt264eDgwMsvvwyob65ef/11GjVqhJ2dHX5+fjz44INEREToz9fpdHz44Yc0a9YMOzs7fHx8mDx5MgkJCQbtBAYGMnjwYHbt2kX79u2xs7Ojfv36rFixQn9MWFgYI0aMAKBnz576LvDcYSu//vorgwYNonbt2tja2tKgQQPefPNNtFptgddjyZIl1K9fH3t7e9q3b8/OnTsLjDsHyMzMZO7cuTRs2BBbW1v8/f158cUXyczMNOp1X7Nmjf41rlWrFmPGjOHq1asGr++4ceMAaNeuHRqNpky1Afnvb+6Y/oiICAYOHIizszOPPvooYPw9URSFt956i7p16+Lg4EDPnj3577//CrRdVI3Fvn37GDhwIO7u7jg6OtKyZUsWL16sj2/JkiWA4XCkXKaOsTRatGhBrVq19K9l7ve3evVqXnnlFerUqYODgwPJyclAyfc4rwsXLtCvXz8cHR2pXbs2b7zxBoqiGBzz7rvv0rlzZzw9PbG3t6dt27bFjsn/7rvvCAkJwc7OjrZt2/LPP/8YPF9YjUV++WsYSro/hY3dv3r1Ko8//jg+Pj7Y2trSrFkzvv766wJtffzxxzRr1gwHBwfc3d255557WLVqVZGxFRZfboxOTk5cvXqV+++/HycnJ7y8vHj++ecL/Z0vq8uXL/P0008TEhKCvb09np6ejBgxotDXMzExkWeffVY/rKdu3bqMHTuWmzdvGhyn0+mYP38+devWxc7Ojt69e3P+/Pkyx+jm5oa9vX2B3on89+n1119Ho9Fw/vx5xo8fj5ubG66urkyYMIG0tLQS2/njjz/Iyckp0LP6+eefExMTwzvvvGOQIADY29vzzTffoNFoDHoPynJOadx3331cvnyZo0ePlul8IfKSHgsh8sl9s+/p6QnA/PnzefXVVxk5ciRPPPEEN27c4OOPP6Zbt24cOXLE4NPzuLg4BgwYwKhRoxgzZgw+Pj5otVoGDx7M1q1bGTVqFM888wwpKSls3ryZkydP6v+jmDx5MmFhYUyYMIEZM2Zw8eJFPvnkE44cOcLu3bsNhjacP3+ehx56iIkTJzJu3Di+/vprxo8fT9u2bWnWrBndunVjxowZfPTRR7z88ss0adIEQP9vWFgYTk5OzJo1CycnJ/7++29ee+01kpOTeeedd/TtLF26lGnTptG1a1eeffZZLl26xP3334+7uzt169bVH6fT6Rg6dCi7du3iySefpEmTJpw4cYIPPviAs2fP8ssvvxT7mud+3+3atWPBggVcv36dxYsXs3v3bv1r/L///Y+QkBCWLVumH96Q/z/ZstxfgJycHPr168e9997Lu+++q+9lMvaevPbaa7z11lsMHDiQgQMHcvjwYfr27UtWVlaJ8WzevJnBgwfj5+fHM888g6+vL+Hh4axfv55nnnmGyZMnc+3aNTZv3sy3335b4PyKiLEoCQkJJCQkFBjq8eabb2JjY8Pzzz9PZmYmNjY2Rt3jXFqtlv79+9OxY0fefvttNm7cyNy5c8nJyTF487R48WKGDh3Ko48+SlZWFqtXr2bEiBGsX7+eQYMGGcS0Y8cOfvjhB2bMmIGtrS2ffvop/fv3Z//+/TRv3rzMr0FJ9ye/69ev07FjR31BuZeXFxs2bGDixIkkJyczc+ZMAL744gtmzJjBQw89xDPPPENGRgbHjx9n3759jB49utRxarVa+vXrR4cOHXj33XfZsmUL7733Hg0aNGDKlClGnZ//TT+ob2xze/cOHDjAv//+y6hRo6hbty6XLl1i6dKl9OjRg1OnTul/r1JTU+natSvh4eE8/vjjtGnThps3b/Lbb79x5coVatWqpb/+woULsbCw4PnnnycpKYm3336bRx99lH379hn1fSclJXHz5k0URSE2NpaPP/6Y1NRUo3tfRo4cSVBQEAsWLODw4cN8+eWXeHt7s2jRomLP+/fff/H09CQgIMBg/++//46dnR0jR44s9LygoCDuvfde/v77b9LT07G3ty/TOblSUlIK3DcPDw+DHsS2bdsC6nCr0NDQYr8vIUqkCHGXWr58uQIoW7ZsUW7cuKFERUUpq1evVjw9PRV7e3vlypUryqVLlxRLS0tl/vz5BueeOHFCsbKyMtjfvXt3BVA+++wzg2O//vprBVDef//9AjHodDpFURRl586dCqB89913Bs9v3LixwP6AgAAFUP755x/9vtjYWMXW1lZ57rnn9PvWrFmjAMq2bdsKtJuWllZg3+TJkxUHBwclIyNDURRFyczMVDw9PZV27dop2dnZ+uPCwsIUQOnevbt+37fffqtYWFgoO3fuNLjmZ599pgDK7t27C7SXKysrS/H29laaN2+upKen6/evX79eAZTXXntNvy/3nh04cKDI6+U/trj7qyiKMm7cOAVQXnrpJYPzjb0nsbGxio2NjTJo0CD9/VQURXn55ZcVQBk3bpx+37Zt2wzuSU5OjhIUFKQEBAQoCQkJBu3kvdbUqVOVwv5cmyPGogDKxIkTlRs3biixsbHKvn37lN69eyuA8t577xl8f/Xr1zf4GSvNPc69H9OnTzd4LQYNGqTY2NgoN27c0O/P/3OclZWlNG/eXOnVq1eB2AHl4MGD+n2XL19W7OzslAceeEC/L/dn5uLFi/p93bt3N/hZv3jxogIoy5cv1+8r6v7ktj137lz944kTJyp+fn7KzZs3DY4bNWqU4urqqv+ehg0bpjRr1qzQaxansPhyX9M33njD4NjQ0FClbdu2JV4z929bYV+TJ0/WH1fY35U9e/YogLJixQr9vtdee00BlHXr1hU4PvfnM/dnqUmTJkpmZqb++cWLFyuAcuLEiWJjzr2X+b9sbW2VsLCwAsfnv09z585VAOXxxx83OO6BBx5QPD09i21bURTl3nvvLfS1dXNzU1q1alXsuTNmzFAA5fjx42U+J/f1K+wr7893LhsbG2XKlCklfl9ClESGQom7Xp8+ffDy8sLf359Ro0bh5OTEzz//TJ06dVi3bh06nY6RI0dy8+ZN/Zevry/BwcFs27bN4Fq2trZMmDDBYN/atWupVatWoYV1uUMm1qxZg6urK/fdd59BO23btsXJyalAO02bNqVr1676x15eXoSEhHDhwgWjvufCPtHq2rUraWlpnD59GoCDBw8SFxfHpEmTDIYNPProo7i7uxtcb82aNTRp0oTGjRsbxJ877Ch//HkdPHiQ2NhYnn76aYNxzoMGDaJx48b88ccfRn1PRSnu/uaV/1NbY+/Jli1byMrKYvr06QZDYHI/eS7OkSNHuHjxIjNnzixQN2LMWOeKiDGvr776Ci8vL7y9venQoQO7d+9m1qxZBa4zbtw4g5+xstzjvNPD5n66n5WVxZYtW/T787aRkJBAUlISXbt25fDhwwWu16lTJ/0nswD16tVj2LBhbNq0yaTDgYqjKApr165lyJAhKIpicM/69etHUlKSPnY3NzeuXLnCgQMHTNb+U089ZfC4a9euRv/NCAwMZPPmzQW+8t77vPcjOzubuLg4GjZsiJubm8E9Wbt2La1ateKBBx4o0E7+n/sJEyYYTHaQ+3fP2LiXLFmij3XlypX07NmTJ554gnXr1hl1fmGvWVxcnH54X1Hi4uIK/J0E9e+ts7NzsefmPp/bRlnOyfXaa68VuGe+vr4Fznd3dy+0R0qI0pKhUOKut2TJEho1aoSVlRU+Pj6EhITou4nPnTuHoigEBwcXem7+mVfq1KlTYMafiIgIQkJCip1x5Ny5cyQlJeHt7V3o87lFy7nq1atX4Bh3d/cC4+qL8t9///HKK6/w999/F/iPKCkpCVDHSwMFhrlYWVkVmGXq3LlzhIeH4+XlZVT8eeW2ExISUuC5xo0bs2vXruK/mRIUd39zWVlZGQztAuPvSW78+X9GvLy8Cn1jkVfusKyyDsWpiBjzGjZsGNOmTUOj0eDs7EyzZs0KLXIPCgoyeFzae2xhYUH9+vUN9jVq1AjAYLz++vXreeuttzh69KhBLU9hSVlhv8ONGjUiLS2NGzduFPpmy9Ru3LhBYmIiy5YtY9myZYUek3vPZs+ezZYtW2jfvj0NGzakb9++jB49mi5dupSpbTs7uwK/n6X5m+Ho6FjiTGzp6eksWLCA5cuXc/XqVYOamNy/K6D+3A8fPtyodvP/rcv9eTU27vbt2xsUbz/yyCOEhoYybdo0Bg8eXOIMbcW17+LiUuy5Sr6aIFATgJSUlGLPy30+N1koyzm5WrRoYdQMeoqiSOG2MAlJLMRdL/9/PHnpdDo0Gg0bNmwodLaY/At55f3ErjR0Oh3e3t589913hT6f/w1BUTPXFPYfWX6JiYl0794dFxcX3njjDRo0aICdnR2HDx9m9uzZ6HS6MsXfokUL3n///UKf9/f3L/U1TaW4+5vL1ta2QLJR2ntSGSo6xrp16xr1JqWsvwelsXPnToYOHUq3bt349NNP8fPzw9ramuXLl5dY4FxZcn+3xowZo5+IIL+WLVsCaj3UmTNnWL9+PRs3bmTt2rV8+umnvPbaa8ybN6/UbZc025UpTJ8+neXLlzNz5kw6deqEq6urft2EsvxdgfL9rSuMhYUFPXv2ZPHixZw7d45mzZqZpX1PT89Ck58mTZpw5MgRMjMzi5zW+Pjx41hbW+uT4bKcU1qJiYkGtS1ClJUkFkIUo0GDBiiKQlBQkP4T07JcY9++fWRnZxc5t3yDBg3YsmULXbp0MdmbsqI+fdq+fTtxcXGsW7eObt266ffnnQUL0Bcdnj9/np49e+r35+TkcOnSJf0boNz4jx07Ru/evUv9qVduO2fOnNEPncp15syZAsWPFcXYe5Ib37lz5ww+Zb9x40aJn6rmFp+fPHmy2DfsRb2mFRGjKZT2Hut0Oi5cuGDwO3f27Fngzhz8a9euxc7Ojk2bNhm82Vq+fHmhMZw7d67AvrNnz+Lg4FDuBMzYn3kvLy+cnZ3RarVGJWiOjo48/PDDPPzww2RlZfHggw8yf/585syZUyWnLf7pp58YN24c7733nn5fRkYGiYmJBsc1aNCAkydPVnB0d+Tk5ABqEbm5NG7cmLVr1xbYP3jwYPbs2cOaNWsKLSC/dOkSO3fupE+fPvrf6bKcUxpXr14lKytLP7mHEOUhNRZCFOPBBx/E0tKSefPmFfiESlEU4uLiSrzG8OHDuXnzJp988kmB53KvOXLkSLRaLW+++WaBY3Jycgr8x2yM3CEq+c/N/QQu7/eTlZXFp59+anDcPffcg6enJ1988YX+P2JQp+zM/2Z05MiRXL16lS+++KJAHOnp6dy6davIOO+55x68vb357LPPDIazbNiwgfDw8AKz+1QUY+9Jnz59sLa25uOPPzZ4TT/88MMS22jTpg1BQUF8+OGHBe5T3msVdS8rIkZTKMs9zvv7oigKn3zyCdbW1vTu3RtQf441Go1BfcSlS5eKnIFsz549BuP8o6Ki+PXXX+nbt2+5P80v6v7kZ2lpyfDhw1m7dm2hb6xv3Lih387/t8XGxoamTZuiKArZ2dnlitdcLC0tC/yd/PjjjwvUsAwfPpxjx47x888/F7hGWXsijJWdnc1ff/2FjY2NWd9Id+rUiYSEhAK1IJMnT8bb25sXXnihwHMZGRlMmDABRVF47bXXynVOaRw6dAigwBoYQpSF9FgIUYwGDRrw1ltvMWfOHP1Uq87Ozly8eJGff/6ZJ598kueff77Ya4wdO5YVK1Ywa9Ys9u/fT9euXbl16xZbtmzh6aefZtiwYXTv3p3JkyezYMECjh49St++fbG2tubcuXOsWbOGxYsXF7vIUWFat26NpaUlixYtIikpCVtbW3r16kXnzp1xd3dn3LhxzJgxA41Gw7ffflvgP3QbGxtef/11pk+fTq9evRg5ciSXLl0iLCyMBg0aGHxK+9hjj/Hjjz/y1FNPsW3bNrp06YJWq+X06dP8+OOPbNq0qcjhSNbW1ixatIgJEybQvXt3HnnkEf1UpIGBgTz77LOl+r5Nxdh7krsewIIFCxg8eDADBw7kyJEjbNiwocShBRYWFixdupQhQ4bQunVrJkyYgJ+fH6dPn+a///5j06ZNwJ3pIGfMmEG/fv2wtLRk1KhRFRKjKZT2HtvZ2bFx40bGjRtHhw4d2LBhA3/88Qcvv/yyvndh0KBBvP/++/Tv35/Ro0cTGxvLkiVLaNiwIcePHy8QQ/PmzenXr5/BdLNAmYYV5VfU/SnMwoUL2bZtGx06dGDSpEk0bdqU+Ph4Dh8+zJYtW4iPjwegb9+++Pr60qVLF3x8fAgPD+eTTz5h0KBBJRbymkNSUhIrV64s9LncT9EHDx7Mt99+i6urK02bNmXPnj1s2bLFYGpngBdeeIGffvqJESNG8Pjjj9O2bVvi4+P57bff+Oyzz2jVqpXJ4t6wYYN+QorY2FhWrVrFuXPneOmll0qskSiPQYMGYWVlxZYtW3jyySf1+z09Pfnpp58YNGgQbdq0KbCK9vnz51m8eLHBm/yynFMamzdvpl69ejLVrDCNipuASoiqpTRTl65du1a59957FUdHR8XR0VFp3LixMnXqVOXMmTP6Y7p3717k9JBpaWnK//73PyUoKEixtrZWfH19lYceekiJiIgwOG7ZsmVK27ZtFXt7e8XZ2Vlp0aKF8uKLLyrXrl3THxMQEKAMGjSoQBv5p8VUFEX54osvlPr16yuWlpYG05zu3r1b6dixo2Jvb6/Url1befHFF5VNmzYVOj3tRx99pAQEBCi2trZK+/btld27dytt27ZV+vfvb3BcVlaWsmjRIqVZs2aKra2t4u7urrRt21aZN2+ekpSUVNJLrPzwww9KaGioYmtrq3h4eCiPPvqofkrYXGWZbrakY8eNG6c4OjoW+bwx90Sr1Srz5s1T/Pz8FHt7e6VHjx7KyZMnlYCAgGKnm821a9cu5b777lOcnZ0VR0dHpWXLlsrHH3+sfz4nJ0eZPn264uXlpWg0mgJTm5oyxqIAytSpU4s9Jvf7W7NmTaHPG3OPc+9HRESE0rdvX8XBwUHx8fFR5s6dq2i1WoNjv/rqKyU4OFixtbVVGjdurCxfvlw/TWhhsa9cuVJ/fGhoaIH7UNbpZou7P+SbxlRRFOX69evK1KlTFX9/f/3fg969eyvLli3TH/P5558r3bp1Uzw9PRVbW1ulQYMGygsvvFDi71JR080W9jNe2GtVmOKmm817fkJCgjJhwgSlVq1aipOTk9KvXz/l9OnThf6MxcXFKdOmTVPq1Kmj2NjYKHXr1lXGjRunn4a3qJ+lwr6/whQ23aydnZ3SunVrZenSpQbTLitK0dPN5p3eOO91C5uyNb+hQ4cqvXv3LvS5ixcvKpMmTVLq1aunWFtbK7Vq1VKGDh1aYMrusp5T0u9iLq1Wq/j5+SmvvPJKid+PEMbQKIqZ+x2FEDWKTqfDy8uLBx98sNChT0IIIdQJBnr06MHp06fLXFRtbr/88gujR48mIiICPz+/yg5H1ABSYyGEKFJGRkaBIVIrVqwgPj6eHj16VE5QQghRDXTt2pW+ffvy9ttvV3YoRVq0aBHTpk2TpEKYjPRYCCGKtH37dp599llGjBiBp6cnhw8f5quvvqJJkyYcOnSoxDnghRBCCHH3kOJtIUSRAgMD8ff356OPPiI+Ph4PDw/Gjh3LwoULJakQQgghhAHpsRBCCCGEEEKUm9RYCCGEEEIIIcqtUhOLBQsW0K5dO5ydnfH29ub+++/nzJkzBsf06NEDjUZj8PXUU09VUsRCCCGEEEKIwlTqUKj+/fszatQo2rVrR05ODi+//DInT57k1KlT+pVMe/ToQaNGjXjjjTf05zk4OBi9sI1Op+PatWs4OzsbLOglhBBCCCGEKJ6iKKSkpFC7dm0sLIrvk6jU4u2NGzcaPA4LC8Pb25tDhw7RrVs3/X4HBwd8fX3L1Ma1a9fw9/cvV5xCCCGEEELczaKioqhbt26xx1SpWaGSkpIA8PDwMNj/3XffsXLlSnx9fRkyZAivvvoqDg4ORl3T2dkZUF8MY3s5hBBCCCGEEJCcnIy/v7/+PXVxqkxiodPpmDlzJl26dKF58+b6/aNHjyYgIIDatWtz/PhxZs+ezZkzZ1i3bl2h18nMzCQzM1P/OCUlBQAXFxdJLIQQQgghhCgDY0oKqkxiMXXqVE6ePMmuXbsM9j/55JP67RYtWuDn50fv3r2JiIigQYMGBa6zYMEC5s2bZ/Z4hRBCCCGEEHdUielmp02bxvr169m2bVuJY7c6dOgAwPnz5wt9fs6cOSQlJem/oqKiTB6vEEIIIYQQwlCl9lgoisL06dP5+eef2b59O0FBQSWec/ToUQD8/PwKfd7W1hZbW1tThimEEEIIIYQoQaUmFlOnTmXVqlX8+uuvODs7ExMTA4Crqyv29vZERESwatUqBg4ciKenJ8ePH+fZZ5+lW7dutGzZsjJDF0IIIYQwCa1WS3Z2dmWHIe5S1tbWWFpamuRalbqORVFFIMuXL2f8+PFERUUxZswYTp48ya1bt/D39+eBBx7glVdeMboQOzk5GVdXV5KSkqR4WwghhBBVhqIoxMTEkJiYWNmhiLucm5sbvr6+hb43L8176UofClUcf39/duzYUUHRCCGEEEJUnNykwtvbGwcHB1nIV1Q4RVFIS0sjNjYWKLrUwFhVZlYoIYQQQoi7hVar1ScVnp6elR2OuIvZ29sDEBsbi7e3d7mGRVWJWaGEEEIIIe4muTUVxi74K4Q55f4clrfWRxILIYQQQohKIsOfRFVgqp9DSSyEEEIIIYQQ5SaJhRBCCCGEqJYuXbqERqPRr3NWXa5dGmFhYbi5uZXqHI1Gwy+//GKWeIojicVdQqvTciDmAH9e+JMDMQfQ6rSVHZIQQgghykmrU9gTEcevR6+yJyIOrc68qwjcuHGDKVOmUK9ePWxtbfH19aVfv37s3r1bf0xlvamtDD169ECj0bBw4cICzw0aNAiNRsPrr79e8YFVEpkV6i6w5fIWFu5fyPW06/p9Pg4+vNT+JfoE9KnEyIQQQghRVhtPRjPv91NEJ2Xo9/m52jF3SFP6Ny/ftKFFGT58OFlZWXzzzTfUr1+f69evs3XrVuLi4szSXkXIysrCxsamzOf7+/sTFhbGSy+9pN939epVtm7dWu7pW6sb6bGo4bZc3sKs7bMMkgqA2LRYZm2fxZbLWyopMiGEEEKU1caT0UxZedggqQCIScpgysrDbDwZbfI2ExMT2blzJ4sWLaJnz54EBATQvn175syZw9ChQwEIDAwE4IEHHkCj0egfR0REMGzYMHx8fHBycqJdu3Zs2WL4HiQwMJD/+7//4/HHH8fZ2Zl69eqxbNkyg2P2799PaGgodnZ23HPPPRw5csTgea1Wy8SJEwkKCsLe3p6QkBAWL15scMz48eO5//77mT9/PrVr1yYkJMSoaxdl8ODB3Lx506DX5ptvvqFv3754e3sbHJuQkMDYsWNxd3fHwcGBAQMGcO7cOYNjwsLCqFevHg4ODjzwwAOFJm2//vorbdq0wc7Ojvr16zNv3jxycnKMitecJLGowbQ6LQv3L0ShYLdo7r5F+xfJsCghhBCiGtHqFOb9fqqQ/93R75v3+ymTD4tycnLCycmJX375hczMzEKPOXDgAADLly8nOjpa/zg1NZWBAweydetWjhw5Qv/+/RkyZAiRkZEG57/33nv6N/VPP/00U6ZM4cyZM/prDB48mKZNm3Lo0CFef/11nn/+eYPzdToddevWZc2aNZw6dYrXXnuNl19+mR9//NHguK1bt3LmzBk2b97M+vXrjbp2UWxsbHj00UdZvny5fl9YWBiPP/54gWPHjx/PwYMH+e2339izZw+KojBw4ED9NK/79u1j4sSJTJs2jaNHj9KzZ0/eeustg2vs3LmTsWPH8swzz3Dq1Ck+//xzwsLCmD9/vlHxmpMMharBDsceLtBTkZeCQkxaDIdjD9POt10FRiaEEEKI/IZ8vIsbKYW/Yc8rM0dLQlrR6w0oQHRSBve8tRlbq5IXO/NytuX36feWeJyVlRVhYWFMmjSJzz77jDZt2tC9e3dGjRpFy5Yt1Wt5eQHg5uaGr6+v/txWrVrRqlUr/eM333yTn3/+md9++41p06bp9w8cOJCnn34agNmzZ/PBBx+wbds2QkJCWLVqFTqdjq+++go7OzuaNWvGlStXmDJliv58a2tr5s2bp38cFBTEnj17+PHHHxk5cqR+v6OjI19++aV+CNSyZctKvHZxHn/8cbp27crixYs5dOgQSUlJDB482KC+4ty5c/z222/s3r2bzp07A/Ddd9/h7+/PL7/8wogRI1i8eDH9+/fnxRdfBKBRo0b8+++/bNy4UX+defPm8dJLLzFu3DgA6tevz5tvvsmLL77I3LlzjYrXXCSxqMFupN0w6XFCCCGEMJ8bKZnEJGeUfKCR1OSjfAue5Td8+HAGDRrEzp072bt3Lxs2bODtt9/myy+/ZPz48UWel5qayuuvv84ff/xBdHQ0OTk5pKenF+ixyE1QQC0C9/X1JTY2FoDw8HBatmyJnZ2d/phOnToVaGvJkiV8/fXXREZGkp6eTlZWFq1btzY4pkWLFgZ1FcZeuyitWrUiODiYn376iW3btvHYY49hZWX4Njs8PBwrKys6dOig3+fp6UlISAjh4eH6Yx544AGD8zp16mSQWBw7dozdu3cb9FBotVoyMjJIS0ur1EUXJbGowbwcvEx6nBBCCCHMx8vZ1qjjSuqxyOXuYG10j0Vp2NnZcd9993Hffffx6quv8sQTTzB37txiE4vnn3+ezZs38+6779KwYUPs7e156KGHyMrKMjjO2tra4LFGo0Gn0xkd2+rVq3n++ed577336NSpE87Ozrzzzjvs27fP4DhHR0ejr2msxx9/nCVLlnDq1Cn2799v8uvnSk1NZd68eTz44IMFnsubGFUGSSxqsDbebfBx8CE2LbbQOgsAXwdf2ni3qeDIhBBCCJGfMcORQK2xuHfR38QkZRT6v7sG8HW1Y9fsXlhamH9l76ZNmxpML2ttbY1Wa1i/uXv3bsaPH6//ND41NZVLly6Vqp0mTZrw7bffkpGRoX8DvXfv3gLtdO7cWT+cCtTCcVNcuySjR4/m+eefp1WrVjRt2rTQNnJycti3b59+KFRcXBxnzpzRH9+kSZMCSVD+ONq0acOZM2do2LBhqeKrCFK8XYNZWljyUvuXikwqAGa3n42lRcmfZgghhBCiarC00DB3iPpGNH/akPt47pCmJk8q4uLi6NWrFytXruT48eNcvHiRNWvW8PbbbzNs2DD9cYGBgWzdupWYmBgSEhIACA4OZt26dRw9epRjx44xevToUvVEgPrGXaPRMGnSJE6dOsWff/7Ju+++a3BMcHAwBw8eZNOmTZw9e5ZXX31VX0Be3muXxN3dnejoaLZu3Vro88HBwQwbNoxJkyaxa9cujh07xpgxY6hTp47+9ZsxYwYbN27k3Xff5dy5c3zyyScGw6AAXnvtNVasWMG8efP477//CA8PZ/Xq1bzyyiulitccJLGo4foE9GF049GFPudg5cC9dYz7dEQIIYQQVUf/5n4sHdMGX1fDoS++rnYsHdPGLOtYODk50aFDBz744AO6detG8+bNefXVV5k0aRKffPKJ/rj33nuPzZs34+/vT2hoKADvv/8+7u7udO7cmSFDhtCvXz/atCndiAknJyd+//13Tpw4QWhoKP/73/9YtGiRwTGTJ0/mwQcf5OGHH6ZDhw7ExcUZ9F6U59rGcHNzK3aY1fLly2nbti2DBw+mU6dOKIrCn3/+qR8C1rFjR7744gsWL15Mq1at+OuvvwokDP369WP9+vX89ddftGvXjo4dO/LBBx8QEBBQ6nhNTaMoinmXaKxkycnJuLq6kpSUhIuLS2WHUyle2vkSf1z4A4DJLSdzNPYo+2LUbrZ5nefxYHDBMXpCCCGEMJ+MjAwuXrxIUFBQucbFa3UK+y/GE5uSgbezHe2DPCpk+JOoWYr7eSzNe2npsajhFEVh7zV1bJ6DlQOTW07mmTbP6J9fGb6SGp5bCiGEEDWWpYWGTg08Gda6Dp0aeEpSISqVJBY13NmEs8RlqCs2tvNth7WlNS28WtDSS53O7VzCOQ7ElDz2UAghhBBCiOJIYlHD7bm2R7/dqfad+ZjHNBmj314ZvrJCYxJCCCGEEDWPJBY13J7oPImF353Eok9AH7ztvQHYHrWdKylXKjo0IYQQQghRg0hiUYNlajM5dP0QAD4OPgS5Bumfs7aw5uHGDwOgoPD96e8rJUYhhBBCCFEzSGJRgx2JPUKmNhNQh0FpNIYFXQ81eggbC3U5+5/P/UxadlqFxyiEEEIIIWoGSSxqsH+v/avfzjsMKpeHnQeD6g8CICU7hV8jfq2w2IQQQgghRM0iiUUNljvNLEAHvw6FHvNok0f126vCV6FTSrcKphBCCCGEECCJRY0VnxFPeHw4AE08muBp71nocSEeIbTzbQfApeRL7L66u8JiFEIIIYQQNYckFjXUvuh9+u2OtTsWe2zeXovvTn9ntpiEEEIIIYylKApPPvkkHh4eaDQajh49WtkhFTB+/Hjuv//+yg6jypDEooYqqb4irx51e1DHqQ4Au6/u5kLSBbPGJoQQQggT0Wnh4k448ZP6r05r9iZjYmKYPn069evXx9bWFn9/f4YMGcLWrVtN2s7GjRsJCwtj/fr1REdH07x5c5NevyJs374djUaj//Ly8mLgwIGcOHGiskMzC0ksaiBFUfQL49la2tLGp02xx1taWPJI40f0j1eFrzJrfEIIIYQwgVO/wYfN4ZvBsHai+u+HzdX9ZnLp0iXatm3L33//zTvvvMOJEyfYuHEjPXv2ZOrUqSZtKyIiAj8/Pzp37oyvry9WVlalvoaiKOTk5Jg0rrI4c+YM0dHRbNq0iczMTAYNGkRWVlZlh2VykljUQBeTL3I97ToAbX3aYmtpW+I5DwQ/gL2VPQC/RfxGclayWWMUQgghRDmc+g1+HAvJ1wz3J0er+82UXDz99NNoNBr279/P8OHDadSoEc2aNWPWrFns3Xtn0pjIyEiGDRuGk5MTLi4ujBw5kuvXr+uff/3112ndujXffvstgYGBuLq6MmrUKFJSUgB1iNH06dOJjIxEo9EQGBgIQGZmJjNmzMDb2xs7OzvuvfdeDhw4oL9ubg/Bhg0baNu2Lba2tuzatYsePXowffp0Zs6cibu7Oz4+PnzxxRfcunWLCRMm4OzsTMOGDdmwYYP+WlqtlokTJxIUFIS9vT0hISEsXry4TK+bt7c3vr6+tGnThpkzZxIVFcXp06f1z7///vu0aNECR0dH/P39efrpp0lNTQXU5MjLy4uffvpJf3zr1q3x8/PTP961axe2trakpVXu0gGSWNRAub0VUPIwqFwuNi4MbTAUgPScdH4+97NZYhNCCCFEOem0sHE2oBTy5O19G18y+bCo+Ph4Nm7cyNSpU3F0dCzwvJubmxqeTsewYcOIj49nx44dbN68mQsXLvDwww8bHB8REcEvv/zC+vXrWb9+PTt27GDhwoUALF68mDfeeIO6desSHR2tTx5efPFF1q5dyzfffMPhw4dp2LAh/fr1Iz4+3uDaL730EgsXLiQ8PJyWLVsC8M0331CrVi3279/P9OnTmTJlCiNGjKBz584cPnyYvn378thjj+nfnOt0OurWrcuaNWs4deoUr732Gi+//DI//vhjmV/DpKQkVq9eDYCNjY1+v4WFBR999BH//fcf33zzDX///TcvvvgiABqNhm7durF9+3YAEhISCA8PJz09XZ+c7Nixg3bt2uHg4FDm2Eyh9H1KosozSCxqG5dYAIxuMpofzvwAwPenv2dMkzFYWliaPD4hhBBCFOLz7pAaW/JxOZmQHlfMAQokX4V3gsGq5FELOHnD5B0lHnb+/HkURaFx48bFHrd161ZOnDjBxYsX8ff3B2DFihU0a9aMAwcO0K6dOhulTqcjLCwMZ2dnAB577DG2bt3K/PnzcXV1xdnZGUtLS3x9fQG4desWS5cuJSwsjAEDBgDwxRdfsHnzZr766iteeOEFfQxvvPEG9913n0FcrVq14pVXXgFgzpw5LFy4kFq1ajFp0iQAXnvtNZYuXcrx48fp2LEj1tbWzJs3T39+UFAQe/bs4ccff2TkyJElvl551a1bV/89AAwdOtTgdZw5c6Z+OzAwkLfeeounnnqKTz/9FIAePXrw+eefA/DPP/8QGhqKr68v27dvp3Hjxmzfvp3u3buXKiZzkB6LGiZbl82BGDWr97DzINg92Ohz67vWp0vtLgBcTb3K9ivbzRGiEEIIIQqTGgsp10r+KjapyCM9zrjrGZPMoA7JMUZ4eDj+/v76pAKgadOmuLm5ER4ert8XGBioTyoA/Pz8iI0tOpaIiAiys7Pp0qWLfp+1tTXt27c3uC7APffcU+D83J4LAEtLSzw9PWnRooV+n4+PD4BBDEuWLKFt27Z4eXnh5OTEsmXLiIyMLPb7L8zOnTs5dOgQYWFhNGrUiM8++8zg+S1bttC7d2/q1KmDs7Mzjz32GHFxcfrek+7du3Pq1Clu3LjBjh076NGjBz169GD79u1kZ2fz77//0qNHj1LHZWqSWNQwx28cJy1H/SHsVLsTFprS3WKDqWfDZepZIYQQosI4eYNz7ZK/ilibqgB7T+Ou5+Rt1OWCg4PRaDQGtQHlYW1tbfBYo9Gg05lmod7ChmoV1l7efRqNBkAfw+rVq3n++eeZOHEif/31F0ePHmXChAllKroOCgoiJCSEcePG8cQTTxgMC7t06RKDBw+mZcuWrF27lkOHDrFkyRIAfVstWrTAw8ODHTt2GCQWO3bs4MCBA2RnZ9O5c+dSx2VqMhSqhinNNLOF6VKnC4EugVxKvsSBmAOciT9DiEeIKUMUQgghRGGMGI4EqLUTHzZXC7ULrbPQgEttmHkCTDik2cPDg379+rFkyRJmzJhR4M17YmIibm5uNGnShKioKKKiovS9FqdOnSIxMZGmTZuWuf0GDRpgY2PD7t27CQgIACA7O5sDBw4YDCUyld27d9O5c2eefvpp/b6IiIhyX3fq1KksWLCAn3/+mQceeIBDhw6h0+l47733sLBQPxDOX8eh0Wjo2rUrv/76K//99x/33nsvDg4OZGZm8vnnn3PPPfcUmkxVNOmxqGH2XrszI0NHv+IXxiuMhcaC0U1G6x9Lr4UQQghRxVhYQv9Ftx9o8j15+3H/hSZNKnItWbIErVZL+/btWbt2LefOnSM8PJyPPvqITp3UDzT79OlDixYtePTRRzl8+DD79+9n7NixdO/evdAhSsZydHRkypQpvPDCC2zcuJFTp04xadIk0tLSmDhxoqm+Rb3g4GAOHjzIpk2bOHv2LK+++qrBDFRl5eDgwKRJk5g7dy6KotCwYUOys7P5+OOPuXDhAt9++22BoVKg1ll8//33tG7dGicnJywsLOjWrRvfffddlaivAEksapSkzCROxp0EoIFrA3wcfcp0nWENhuFsrY55/OPCH8RnxJdwhhBCCCEqVNOhMHIFuPgZ7nepre5vOtQszdavX5/Dhw/Ts2dPnnvuOZo3b859993H1q1bWbp0KaB+uv7rr7/i7u5Ot27d6NOnD/Xr1+eHH34od/sLFy5k+PDhPPbYY7Rp04bz58+zadMm3N3dy33t/CZPnsyDDz7Iww8/TIcOHYiLizPovSiPadOmER4ezpo1a2jVqhXvv/8+ixYtonnz5nz33XcsWLCgwDndu3dHq9Ua1FL06NGjwL7KpFGMrcSpppKTk3F1dSUpKQkXF5fKDsestlzewrPbnwVgTJMxzG4/u8zXeufAO6w4tQKAGaEzmNRykkliFEIIIQRkZGRw8eJFgoKCsLOzK/uFdFq4/C+kXgcnHwjobJaeClGzFffzWJr30tJjUYMY1FeUYprZwoxqPArN7e7U1WdWk63LLtf1hBBCCGEGFpYQ1BVaPKT+K0mFqESSWNQguetXWFlYcY9P2ccwAvg7+9PDvwcAsWmxbLm8pbzhCSGEEEKIGkwSixoiKiWKK6lXAAj1DsXBuvwrL45pMka/vTJ8ZbmvJ4QQQgghai5JLGoIg9W2yzDNbGHa+bajkXsjQF0f48SNEya5rhBCCCGEqHkksaghDBKLctZX5NJoNAYL5kmvhRBCCCGEKIokFjWAVqdlX8w+AFxsXGji0cRk1x4YNBA3WzcA/rr0F7FpscWfIIQQQggh7kqSWNQA/8X9R0pWCqAuimdpwhkh7KzseKjRQwDkKDn8eObHEs4QQgghhBB3I0ksagBzDIPK6+GQh7HUqMnKmrNryNRmmrwNIYQQQghRvUliUQOYcv2Kwvg6+nJfwH0AxGfEs+HiBpO3IYQQQgghqjdJLKq5W9m3OH7jOAD1nOtRx6mOWdrJW8T9Xfh31PAF24UQQghRyRRF4cknn8TDwwONRsPRo0crO6QCxo8fz/3331/ZYVQZklhUcwdjDpKj5ADm6a3I1cqrFc09mwNwOv40h64fMltbQgghhDCOVqflQMwB/rzwJwdiDqDVac3eZkxMDNOnT6d+/frY2tri7+/PkCFD2Lp1q0nb2bhxI2FhYaxfv57o6GiaN29u0utXhO3bt6PRaGjWrBlareG9cXNzIywsrHICMxOryg5AlI+5h0Hl0mg0PNr0UebsnAPAqtOruMe3fKt7CyGEEKLstlzewsL9C7medl2/z8fBh5fav0SfgD5mafPSpUt06dIFNzc33nnnHVq0aEF2djabNm1i6tSpnD592mRtRURE4OfnR+fOnct8DUVR0Gq1WFlV7lveCxcusGLFCiZMmGCya2ZlZWFjY2Oy65mC9FhUc3ui1cJtS40l7X3bm7WtfgH9qGVfC4CtkVu5lnrNrO0JIYQQonBbLm9h1vZZBkkFQGxaLLO2z2LL5S1maffpp59Go9Gwf/9+hg8fTqNGjWjWrBmzZs1i7969+uMiIyMZNmwYTk5OuLi4MHLkSK5fvxPr66+/TuvWrfn2228JDAzE1dWVUaNGkZKiznI5fvx4pk+fTmRkJBqNhsDAQAAyMzOZMWMG3t7e2NnZce+993LgwAH9dXN7CDZs2EDbtm2xtbVl165d9OjRg+nTpzNz5kzc3d3x8fHhiy++4NatW0yYMAFnZ2caNmzIhg136ki1Wi0TJ04kKCgIe3t7QkJCWLx4cZlet+nTpzN37lwyM4ueAMfY1+zLL78kKCgIOzs7QP3w9/PPP2fw4ME4ODjQpEkT9uzZw/nz5+nRoweOjo507tyZiIiIMsVeGpJYVGMxt2K4mHQRgOa1muNs42zW9qwtrRkZMhIAnaJj9enVZm1PCCGEEAVpdVoW7l+IQsF6x9x9i/YvMvmwqPj4eDZu3MjUqVNxdHQs8LybmxsAOp2OYcOGER8fz44dO9i8eTMXLlzg4YcfNjg+IiKCX375hfXr17N+/Xp27NjBwoULAVi8eDFvvPEGdevWJTo6Wp88vPjii6xdu5ZvvvmGw4cP07BhQ/r160d8fLzBtV966SUWLlxIeHg4LVu2BOCbb76hVq1a7N+/n+nTpzNlyhRGjBhB586dOXz4MH379uWxxx4jLS1N/33UrVuXNWvWcOrUKV577TVefvllfvyx9FPvz5w5k5ycHD7++ONCnzf2NTt//jxr165l3bp1BjUnb775JmPHjuXo0aM0btyY0aNHM3nyZObMmcPBgwdRFIVp06aVOu7SkqFQ1VjeaWY71y57N2FpjGg0gi+Of0G2Lpufzv3EU62ewsHaoULaFkIIIWqyh9c/zM30myUel6XNIjEzscjnFRRi0mLo8WMPbCxLHipTy74WPwz+ocTjzp8/j6IoNG7cuNjjtm7dyokTJ7h48SL+/v4ArFixgmbNmnHgwAHatWsHqG+mw8LCcHZWPxh97LHH2Lp1K/Pnz8fV1RVnZ2csLS3x9fUF4NatWyxdupSwsDAGDBgAwBdffMHmzZv56quveOGFF/QxvPHGG9x3330GcbVq1YpXXnkFgDlz5rBw4UJq1arFpEmTAHjttddYunQpx48fp2PHjlhbWzNv3jz9+UFBQezZs4cff/yRkSNHlvh65eXg4MDcuXN5+eWXmTRpEq6urmV6zbKyslixYgVeXl4G50+YMEEf0+zZs+nUqROvvvoq/fr1A+CZZ54x6TCsokiPRTVm7vUrClPLvhYDgtRf5pSsFNZfWF8h7QohhBA13c30m8SmxZb4VVxSkVdiZqJR1zMmmQGMnhEyPDwcf39//RtkgKZNm+Lm5kZ4eLh+X2BgoD6pAPDz8yM2NrbI60ZERJCdnU2XLl30+6ytrWnfvr3BdQHuuadgHWhuzwWApaUlnp6etGjRQr/Px8cHwCCGJUuW0LZtW7y8vHBycmLZsmVERkYW+/0XZeLEiXh6erJo0aICzxn7mgUEBBRIKvJ/b7nfR/7vLSMjg+Tk5DLFbizpsaimdIqOvdHqWEZHa0ea16q4mRIebfIov0X8BqhTz45oNAKNRlNh7QshhBA1UW4dY0lK6rHI5WbrZnSPhTGCg4PRaDQmK9C2trY2eKzRaNDpdCa5dmFDtQprL+++3PcyuTGsXr2a559/nvfee49OnTrh7OzMO++8w759+8oUk5WVFfPnz2f8+PFlHpZU2PcFFPp9FPe9mYskFtXUmfgzJGQmANDOtx3WFtYlnGE6TT2b0sa7DYdjD3Mh6QJ7ovdU2FAsIYQQoqYyZjgSqDUW/db2IzYtttA6Cw0afBx82Dh8I5YWliaLz8PDg379+rFkyRJmzJhR4E1uYmIibm5uNGnShKioKKKiovSfwJ86dYrExESaNm1a5vYbNGiAjY0Nu3fvJiAgAIDs7GwOHDjAzJkzy3zdouzevZvOnTvz9NNP6/eVtwB6xIgRvPPOOwZDrACzvWYVTYZCVVO5s0FBxdVX5JV/wTwhhBBCVAxLC0teav8SoCYReeU+nt1+tkmTilxLlixBq9XSvn171q5dy7lz5wgPD+ejjz6iUyd1WHafPn1o0aIFjz76KIcPH2b//v2MHTuW7t27FzpEyViOjo5MmTKFF154gY0bN3Lq1CkmTZpEWloaEydONNW3qBccHMzBgwfZtGkTZ8+e5dVXXzWYgaqsFi5cyNdff82tW7f0+8z1mlU0SSyqKYP1K/wqpr4ir171euHn6AfAP1f+4XLy5QqPQQghhLhb9Qnow/s93sfbwdtgv4+DD+/3eN9s61jUr1+fw4cP07NnT5577jmaN2/Offfdx9atW1m6dCmgDrv59ddfcXd3p1u3bvTp04f69evzww/G9cgUZ+HChQwfPpzHHnuMNm3acP78eTZt2oS7u3u5r53f5MmTefDBB3n44Yfp0KEDcXFxBr0XZdWrVy969epFTk6Ofp85X7OKpFGMrcSpppKTk3F1dSUpKQkXF5fKDsckMnIy6PJ9F7J0Wfg5+rFp+KZKqXH4+uTXfHDoAwBGNx7NnA5zKjwGIYQQojrKyMjg4sWLBusRlIVWp+Vw7GFupN3Ay8GLNt5tzNJTIWq24n4eS/NeWnosqqHD1w+TpcsC1GFQlVU4PTx4OHaW6g/fL+d/ISUrpVLiEEIIIe5WlhaWtPNtx8D6A2nn206SClGpJLGohvLWV3Ss3bHS4nC1dWVIgyEApOWk8ev5XystFiGEEEIIUbkksaiGcusrNGjo6Ft5iQWoQ6ByrTq9yuSrfAohhBBCiOpBEotq5mb6Tc4mnAWgiWcT3OzcKjWehu4N6einJjdRKVHsvLqzUuMRQgghhBCVQxKLaiZ3UTyonGlmCzOmyRj99srwlZUYiRBCCCGEqCySWFQze67dqa+ojGlmC9O1blfqOdcDYF/0Ps4lnKvkiIQQQojqwdwrIQthDFP9HMrK29WIoij6xMLeyp7W3q0rN6DbLDQWjG4ymoX7FwLqgnmvd369coMSQgghqjAbGxssLCy4du0aXl5e2NjYVNosj+LupSgKWVlZ3LhxAwsLC2xsbMp1PUksqpGIxAhupN8AoI1PG2wsy3fzTWlYg2F8fORjbmXf4o8LfzCzzcxKr/8QQgghqioLCwuCgoKIjo7m2rVrlR2OuMs5ODhQr149LCzKN5ipUhOLBQsWsG7dOk6fPo29vT2dO3dm0aJFhISE6I/JyMjgueeeY/Xq1WRmZtKvXz8+/fRTfHx8KjHyypF3mtnOflWjviKXk40T9ze8n+/CvyNDm8Hac2uZ2GJiZYclhBBCVFk2NjbUq1ePnJwctFqZVVFUDktLS6ysrEzSY1apicWOHTuYOnUq7dq1Iycnh5dffpm+ffty6tQpHB0dAXj22Wf5448/WLNmDa6urkybNo0HH3yQ3bt3V2bolSJ3mlmATrWrRn1FXqMbj2ZV+CoUFFafWc24ZuOwspBOMSGEEKIoGo0Ga2trrK2tKzsUIcpNoyiKUtlB5Lpx4wbe3t7s2LGDbt26kZSUhJeXF6tWreKhhx4C4PTp0zRp0oQ9e/bQsWPJaziUZhnyqixLm8W9q+8lPScdL3svto7YWiXHYk7bOo0dV3YA8G73d+kX2K+SIxJCCCGEEGVVmvfSVWpWqKSkJAA8PDwAOHToENnZ2fTp00d/TOPGjalXrx579uwp9BqZmZkkJycbfNUEx24cIz0nHYCOfh1LnVRodQp7IuL49ehV9kTEodWZJ598tMmj+u3vwr8zSxtCCCGEEKLqqTLjVHQ6HTNnzqRLly40b94cgJiYGGxsbHBzczM41sfHh5iYmEKvs2DBAubNm2fucCucwTSzpRwGtfFkNPN+P0V0UoZ+n5+rHXOHNKV/cz+TxQhq0tPAtQERSREciT3Cf3H/0cyzmUnbEEIIIYQQVU+V6bGYOnUqJ0+eZPXq1eW6zpw5c0hKStJ/RUVFmSjCylXW+oqNJ6OZsvKwQVIBEJOUwZSVh9l4MtpkMYI6VnR0k9H6x6vCV5n0+kIIIYQQomqqEonFtGnTWL9+Pdu2baNu3br6/b6+vmRlZZGYmGhw/PXr1/H19S30Wra2tri4uBh8VXeJGYmcijsFQLB7MLXsaxl1nlanMO/3UxQ26Cl337zfT5l8WNSQBkNwsVFf9w0XN3Az/aZJry+EEEIIIaqeSk0sFEVh2rRp/Pzzz/z9998EBQUZPN+2bVusra3ZunWrft+ZM2eIjIykU6eqNyuSueyL2YdyOxUozWrb+y/GF+ipyEsBopMy2H8xvrwhGrC3smd4o+EAZOuyWXNmjUmvL4QQQgghqp5KTSymTp3KypUrWbVqFc7OzsTExBATE0N6ulqk7OrqysSJE5k1axbbtm3j0KFDTJgwgU6dOhk1I1RNkbe+onNt49eviE0pOqkoy3Gl8UjII1hqLAH44cwPZGmzTN6GEEIIIYSoOio1sVi6dClJSUn06NEDPz8//dcPP/ygP+aDDz5g8ODBDB8+nG7duuHr68u6desqMeqKpSiKPrGwtrCmjU8bo8/1drYz6XGl4efkR696vQCIy4hj06VNJm9DCCGEEEJUHZU+FKqwr/Hjx+uPsbOzY8mSJcTHx3Pr1i3WrVtXZH1FTRSZEsm1W9cAaOPdBnsre6PPbR/kga9r0UmDBnV2qPZBHuUNs1BjmozRb68MX0kVWjJFCCGEEEKYWJUo3hZFyzsMqmPt0g3/srTQ8GBonWKPmTukKZYW5lloL9Q7lCYeTQA4FXeKYzeOmaUdIYQQQghR+SSxqOLyTjNbmvoKUGeF2hJ+vdDnLC00LB3TxuTrWOSl0WgMFsxbGb7SbG0JIYQQQojKJYlFFZajy+FAzAEA3G3daezRuFTnrz18hbPXUwFo7e/K95M6UM9DHUql1Sl0amDctLXlMSBoAB526lCrLZe3EHOr8IUNhRBCCCFE9SaJRRV28uZJUrPVxKCDXwcsNMbfroxsLR9sPqt/PGdAEzo1qEXPEG/9vmNRiSaLtSg2ljaMDBkJgFbRsvp0+RZAFEIIIYQQVZMkFlVYWaeZBQj795J+DYvejb3pUN8TgNB67vpjjlZAYgHwcMjDWFlYAfDTuZ9Iz0mvkHaFEEIIIUTFkcSiCstbX9GptvEL4yWmZfHptvMAWGjgxf53hlC19nfTb1dUYlHLvhb9A/sDkJSZxJ8X/qyQdoUQQgghRMWRxKKKSslK4cTNEwAEugTi62j8FLufbo8gOSMHgOFt6hLi66x/LsDTAXcHawCORCZU2BSw+Yu4ZepZIYQQQoiaRRKLKupAzAG0ihYoXW/F1cR0wv69BICtlQXP3tfI4HmNRqPvtUhIyyYyPs0k8Zakea3mtPJqBcD5xPPsj9lfIe0KIYQQQoiKIYlFFVXW+or3/zpLVo4OgPFdAqntVnBBvdb+d+osjkQmlj3IUsq7YN7HRz7mzwt/qgmUTlthMQghhBBCCPOwquwAROH2RKuJhZXGina+7Yw6Jzw6mXVHrgDgam/N090bFnpcaD03/fbRqETuL2ERPVPpHdAbFxsXkrOSOXbjmH7BPB8HH15q/xJ9AvpUSBxCCCGEEML0pMeiCrqWeo3LyZcBaOnVEkdrR6POe3vjaXJLF6b2bIDr7VqK/FrlKeA+EplQrlhLY0fUDpKzkgvsj02LZdb2WWy5vKXCYhFCCCGEEKYliUUVlHcYVMfaHY07JyKObWduAFDb1Y6xnQKLPNbV3poGXmqycio6mYxs8w9F0uq0LNy/sNDnFNRsaNH+RTIsSgghhBCimpLEogrKHQYFxtVXKIrCwg3h+sez+oZgZ21Z7Dm5dRbZWoVT0QV7EUztcOxhrqddL/J5BYWYtBgOxx42eyxCCCGEEML0JLGoYrQ6LXuj9wLgbO1MM89mJZ7z54kYjl1JAqCxrzMPGFEzkbfOoiIKuG+k3TDpcUIIIYQQomqRxKKKOR1/mqRMNUlo79dev2J1UbK1Ot7ZdFr/eHb/xlhaaEpsp6IXyvNy8DLpcUIIIYQQomqRxKKKyTsMqpNfyetXrN4fyaU4dS2KjvU96BFi3Bvzxr7O2Fmrt78iCrjbeLfBx8EHDYUnPRo0+Dr40sa7jdljEUIIIYQQpieJRRXz77V/9dsl1VekZuaweOs5/eOXBjRBoym5twLAytKClnXcALiSkM7N1MzSB1sKlhaWvNT+JYAik4vZ7WdjaVF8bYgQQgghhKiaJLGoQtKy0zgSewSAOk518HfxL/b4L3de4GZqFgCDWvgZDG8yRuu861lUQJ1Fn4A+vN/jfbwdvA32e9h68H6P92UdCyGEEEKIakwSiyrk0PVD5OhyAOhUu/hhUDdSMln2zwUArCw0PN8vpNTtheZdzyKqYtaz6BPQh03DNzG+2Xj9voktJkpSIYQQQghRzUliUYWUpr7i47/PkZalrvnwSPt6BNUybhG9vFrnW4G7olhaWNIvsJ/+8fnE8xXWthBCCCGEMA9JLKqQ3IXxLDQWdPDrUORxF2/eYtW+SAAcbCyZ0Tu4TO35udrj42ILwLGoJLQ6pUzXKYsGbg30tRZnE85WWLtCCCGEEMI8JLGoImLTYvWf3DfzbIarrWuRx7771xlybicBk7rWx8vZtsztht5eKC81M4eIG6llvk5p2VvZE+ASAKg9FrLithBCCCFE9SaJRRWRuygeQEe/jkUedywqkT+ORwNQy8mGSd3ql6vdii7gzivYXe1pydRmcjnlcoW2LYQQQgghTEsSiyoidxgUFD3NrKIoLNgQrn88o3cwTrbFL6BXksoo4M7VyL2RfluGQwkhhBBCVG+SWFQBiqLoEwt7K3taebUq9LjtZ2+w90I8AIGeDjzSvp7xjei0cHEnnPhJ/ff20KMWdV31K3UfqeAeixD3OzNZnY2XxEIIIYQQojor38fdwiTOJpwlLiMOgHa+7bC2tC5wjFansGjDaf3j5/uFYG1pZF546jfYOBuSr93Z51Ib+i/CoelQGvk4Ex6dzNnrKdzKzMGxnL0gxmrkcafH4lzCuWKOFEIIIYQQVZ30WFQBeesrippm9pcjVzkdkwJAq7quDGrhZ9zFT/0GP441TCoAkqPV/ad+I/R2nYVOgeNXkkodf1nVdqyNo7U6Ta4MhRJCCCGEqN4ksagCSqqvyMjW8v7mO2+8Zw9ojEajKfnCOq3aU0Fh08je3rfxJULrOuv3VmSdhUaj0ddZXLt1jeSs5AprWwghhBBCmJYkFpUsU5vJwesHAfB28CbINajAMd/uuczVxHQAeoR40blBLeMufvnfgj0VBhRIvkpnqzP6PRU9M1TeAm4ZDiWEEEIIUX1JYlHJjsQeIVObCajDoPL3RCSlZfPJNnV9C40GZvdvbPzFU68bdZifZTLOdmpdxZGoRBSl4hbKk5mhhBBCCCFqBkksKlneYVCdahesr1i6I4Kk9GwAHgitQxM/F+Mv7uRj1GEWzr60vj3t7I2UTK4lZRjfRjlJYiGEEEIIUTNIYlHJ8iYW+RfGu5aYzvLdFwGwsbRg1n2NKJWAzursT0XSgEsdCOisTyygYodD5S6SB5JYCCGEEEJUZ5JYVKL4jHjC49UF7xp7NMbT3tPg+Q+3nCUzRwfAuM4B1HV3KF0DFpbQf2ExByjq8xaWBonFkciKK+B2tHakrlNdQK2x0Cm6CmtbCCGEEEKYjiQWlWhf9D79dv5pZs9eT+GnQ1cAcLaz4ukeDcvWiJ1r0c9Z2UH9HgCGPRZRiWVrq4xyh0Ol56RzJeVKhbYthBBCCCFMQxKLSmQwDKq24TCotzeeRne7hvrpHg1xd7QpWyN7P7uz3X02DP8Kgvupj3My4OgqADydbKnnofaInLiaRLa24noO8i6UJ8OhhBBCCCGqJ0ksKomiKPx77V8AbC1taePdRv/c/ovxbAmPBcDXxY4JXQLL1kj8BTi7Ud12qQPdXoAWD0GfuXeO2f856NQkInehvMwcHaejU8rWZhmEuIfotyWxEEIIIYSoniSxqCQXky9yPU2dDraNdxvsrOwANeFYsCFcf9ys+xphZ21Ztkb2LUO/EF67J8DSWt32aQZB3dTt+Atw7i8g/3CoiquzkJmhhBBCCCGqP0ksKklR08xu+i+GI7dnZQr2duLBNnXK1kBmChxZqW5b2UPb8YbPd5hyZ3ufOlzKsIA7sWztlkFd57rYW9kDklgIIYQQQlRXklhUkr3X9uq3cxOLbK2OtzfeWQV7dv/GWFmW8RYdXQVZt4cztRwJDh6GzzfqB+6B6vaFbRB7mqa1XbC53V5FFnBbaCwIdlOnnY1KieJW9q0Ka1sIIYQQQpiGJBaVIFuXzf6Y/QB42HnohwL9eDCKCzfVN9XtAz3o3cS7bA3odLDv8zuPOzxV8BgLS2g/+c7jfZ9ha2VJ09rqAnwXbt4iMS2rbO2XQd71LM4lnKuwdoUQQgghhGlIYlEJjt84TlpOGqAuimehsSAtK4cPt9x5Qz17QGM0Gk3ZGji/GeIj1O2g7uDTtPDjQh8FGyd1+9hqSIvXF3BDxfZahHhIAbcQQgghRHUmiUUlKKy+4qudF7mRkglA/2a+tA1wL3sDe5fe2S6styKXnSu0Hq1u56TD4RWVtp6FFHALIYQQQlRvklhUgj3ReRILv07EpWby+T8XALC00PBC/5CiTi1Z7Gm1ZgLUGopG/Yo/Pu9wqANf0qaus/5hRRZwy1AoIYQQQojqTRKLCpaUmcTJmycBaODaAB9HHz7++zypmTkAPNzOnwZeTmVvYH+e2or2k9VaiuLUagjBfW8HF0Xd63/jeXsxvmNXElEUpeyxlIKLjQt+jn6A2mNRUe0KIYQQQgjTkMSigh2IOYBOURek61S7E5FxaXy37zIA9taWzOwdXNzpxUtPUGslQK2dCH3UuPPyDJfS7PtcPxwqMS2bS3FpZY+nlHKHQ6Vmp3Lt1rUKa1cIIYQQQpSfJBYVLH99xbt/nSFbq346/0TXILxd7Mp+8cMrIPt2ItD6UbWGwhgNekGt28OvIv/lPo8Y/VNHIitpobx4qbMQQgghhKhOJLGoYP9e+xcAKwsr7HOC+e2Y+sm8h6MNT3arX/YLa3Ng/xd3Hrd/0vhzNRrocKfWomfiOv12hRZwe0gBtxBCCCFEdSWJRQWKSoniSuoVAFp7tWbxlkj9c9N7NcTZzrrsFz/zByRFqdvBfdXaidJoNUrfw+F9eT1emiSgYgu4ZWYoIYQQQojqSxKLCpR3GJSfTUt2nb8JgL+HPaM71CvfxUtaEK8kNo7QZiwAGm0WU112AhAenUxGtrZ8sRmpnnM9bC1tAUkshBBCCCGqG0ksKtDe6L367X2naum3n+8bgq1VCbM3FSf6OFzerW7XClFrJsqi/ZOgUX8kHtRuxJoccnQKJ68mlT22UrCysKKBWwMAIlMiSc9Jr5B2hRBCCCFE+UliUUG0Oq0+sbC3dCLiihsAzeu4MKRl7fJdfN9nd7Y7TFZrJsrCrR40HgSAS048Ay3UeCtjoTydoiMiMaLC2hVCCCGEEOUjiUUF+S/uP1KyUgDIvtWQ3Jf+pf5NsLAoYyIAkHoDTqxRt+1c1VqJ8ugwRb85wWojoHCkAhOLEPc7iwPKcCghhBBCiOpDEosKkre+IjUhCICuwbW4N7hWUacY59By0Gap223GqrUS5RHQGXxbANDa4gJtNOc4KgXcQgghhBCiBJJYVJA90XcSi5xb6iJ4s/s3Lt9Fc7LgwJfqtsaidFPMFkWjMSj+nmC1kauJ6cQmZ5T/2kYIdr+zQKAkFkIIIYQQ1YckFhXgVvYtjsUeA0CX5YmS7cGw1rVpXsfIBeyKcupXSL2ubjcepNZImELzh8BB7UkZYLEfX+IqbDiUu5073vbegJpYKIpSIe0KIYQQQojykcSiAhyMOUiOkgOovRXWlhqe7xtSwllG2Lf0znae2ohys7aDeyYAYKXRMcZqS4UWcAd7qL0WSZlJXE+7XmHtCiGEEEKIspPEwsyycnL46uiP+sfaW/UZ0zEAfw+H8l046gBcPaRu+7ZQayNM6Z6JKBZWAIy23Mp/lyvuDb7UWQghhBBCVD+SWJjROzvXcM+KnhyJ/0e/z9ZnPRrHE+W/eP7eirJOMVsUFz80zR4AwEOTSsDVP9DqKmZYkswMJYQQQghR/UhiYSbv7FzDNxFvoLNINNivsUrmx8j5vLNzTdkvnnxNra8AtRai+fCyX6s4eYZXjeZPzl1PNk87+UiPhRBCCCFE9SOJhRlk5eTw7bmPgIIdCbmPvz37EVk5OWVr4MCXoLt97j0T1JoIc6jblusu6tSzTSyiuHp0i3naySfQNRCr28OwziWcq5A2hRBCCCFE+UhiYQarjm1HsUwscnSSRgOKVSKrjm0v/cWz0+HgcnXbwgrumVjmOI2R2voJ/bbPqeVmbSuXtYU1DVwbAHAx6SKZ2swKaVcIIYQQQpSdJBZmEJkcY9LjDJz4CdLj1e1mD4CLX+mvUQq1O40iRnEHoGnKLki4ZNb2cuUOh9IqWi4kXqiQNoUQQgghRNlJYmEG9Vx8TXqcnqLAvs/uPDblFLNFsLe34y+HwQBYoJD172clnGEaUmchhBBCCFG9SGJhBqNb9UCjdaOotd0UBTQ5boxu1aN0F760C66fVLfr3AN125YrTmNdaTCSTMUaAIujKyEz1extNvKQxEIIIYQQojqRxMIMbKyseCx4BkCB5CL38WONZmBjZVW6C+ftreho/t6KXCFB9flF2wUAq+wUOPa92duUHgshhBBCiOqllO9sITMzk3379nH58mXS0tLw8vIiNDSUoKAgc8RXbb3QdQQA3577CMUyUb/fQuvGY41m6J83WsIlOPOnuu3sB02HmSZQI7Su58Y0bT8ettqu7tj3uVo0bmG+vLSWfS087DyIz4iXxEIIIYQQohow+p3h7t27GTlyJG5ubvTq1YuZM2fy5ptvMmbMGBo2bEhwcDDvvPMOKSkpRjf+zz//MGTIEGrXro1Go+GXX34xeH78+PFoNBqDr/79+xt9/cr2QtcRHBy7jeeaf8CIerN5rvkHHBy3rfRJBcD+L0DRqdvtJoKltWmDLUaQpyNXbRuwV9dE3RF3DiL+Nnu7ub0W8Rnx3Ey/afb2hBBCCCFE2RmVWAwdOpSHH36YwMBA/vrrL1JSUoiLi+PKlSukpaVx7tw5XnnlFbZu3UqjRo3YvHmzUY3funWLVq1asWTJkiKP6d+/P9HR0fqv7783/zAcU7KxsmJ82z681nMM49v2Kf3wJ1BrGg5/q25b2kLbCaYNsgQWFhpa13NneU6epC7vyt9mYjAcKl56LYQQQgghqjKj3uUOGjSItWvXYm1d+Kfk9evXp379+owbN45Tp04RHR1tVOMDBgxgwIABxR5ja2uLr28pZ0+qaY59D5lJ6nbLEeBYq8JDaO3vxidn2xKl88Lf4gac3wI3zoJXo5JPLqMQjxD99tmEs3Su09lsbQkhhBBCiPIxqsdi8uTJRSYV+TVt2pTevXuXK6i8tm/fjre3NyEhIUyZMoW4uLhij8/MzCQ5Odngq1rT6dSahlwdnqqUMELruaHDgm+0fe/s3P950SeYgBRwCyGEEEJUH+Wqvj158iRLlizho48+4tChQ6aKSa9///6sWLGCrVu3smjRInbs2MGAAQPQarVFnrNgwQJcXV31X/7+/iaPq0JF/K3WNAAE3Au+LSoljNZ13QD4UduDDGzVnUe/h/REs7VZ37U+lhpLQBILIYQQQoiqrsyJxZIlS+jduzc7duxg27Zt9OrVi/nz55syNkaNGsXQoUNp0aIF999/P+vXr+fAgQNs3769yHPmzJlDUlKS/isqKsqkMVU4gylmK6e3AsDd0YZATweScWStrpu6M/sWHFlptjZtLG0IclVnG4tIiiBbm222toQQQgghRPkYnVjkf4P+ySef8N9///Hjjz/y888/s3HjRj788ENTx2egfv361KpVi/Pnzxd5jK2tLS4uLgZf1dbNc3D+diG8Wz0IGVip4YTWcwfg6+x8w6F0RfcglVewezAAObocLiZfNFs7QgghhBCifIxOLPr06cPixYtRbq/w5unpycaNG8nMzCQlJYUtW7bg5eVltkABrly5QlxcHH5+fmZtp8rIW1vR/kmwsKy8WFALuAEilDpc9bxdSJ0YCWc2mK1NqbMQQgghhKgejE4sDhw4wJkzZ+jQoQNHjx5l2bJlfPDBB9jb2+Pm5sYPP/zAN998U6rGU1NTOXr0KEePHgXg4sWLHD16lMjISFJTU3nhhRfYu3cvly5dYuvWrQwbNoyGDRvSr1+/UrVTLaUnwtFV6ra1A4Q+VqnhgFrAnWu9fZ4F+vIO1zKxEHfDmaGEEEIIIUTVZPSiCi4uLnz66af8+++/jB8/nl69erFz5060Wi1arRY3N7dSN37w4EF69uypfzxr1iwAxo0bx9KlSzl+/DjffPMNiYmJ1K5dm759+/Lmm29ia2tb6raqnSMr1RoGgFaPgL1bpYYD0NjXBRsrC7JydKyOD2ayZ0OIOw+XdkLMSfBtbvI2pcdCCCGEEKJ6KHXxdufOnTl48CDu7u6Ehobyzz//lCmpAOjRoweKohT4CgsLw97enk2bNhEbG0tWVhaXLl1i2bJl+Pj4lKmtakWnNZzKtZKmmM3PxsqCFnVcAbgYn8Gt1k/cedJMvRbeDt642qptnos/Z5Y2hBBCCCFE+RmdWOTk5PDpp58yffp0wsLCePnll/n999957733GDFiBNevXzdnnHeXsxvV2gWABr3NughdaeXWWQAccusHtreL40+sgVvFrzFSFhqNRt9rEZseS0JGgsnbEEIIIYQQ5Wd0YjFx4kQ++eQTHB0dWb58Oc8++yyNGjXi77//pn///nTq1ImlS5eaM9a7x948r2PHKZUXRyHyJhYHY3Lu1H7kZMCh5WZpU4ZDCSGEEEJUfUYnFr/++itr165l4cKFbN68mT/++EP/3MSJE9m7dy87d+40S5B3lZiTas0CgGdDtceiCslbwH0kMgHaTwI06o4DX4EZ1pqQAm4hhBBCiKrP6MTCx8eHv/76i6ysLP7++288PT0Nnvf29mbVqlUmD/Cuk7dWocNTYFGuxdFNro6bPbWc1OL5Y1GJ6NwC76yvkXINTv1q8jalx0IIIYQQouoz+l3rJ598wvz587G3t+epp54y+2J4d6VbcWqtAqi1C61GVW48hdBoNPpei+SMHC7cvGW4IrgZirjru9XHQqP+qEpiIYQQQghRNRmdWNx3331cv36dmJgYrly5QufOnc0Z193pcJhaqwBq7YKtc6WGU5S8dRZHoxIhsCt4N1N3XDkAVw6ZtD17K3vqOdcD4HzCeXJ0OSa9vhBCCCGEKL9SjbPRaDRmX137rqXNhv1f3n6guV27UDWFGiQWCaDRQIfJdw4wQ69F7nCoLF0WkcmRJr++EEIIIYQoH6MSi/79+7N3794Sj0tJSWHRokUsWbKk3IHddcJ/U2sUQK1Z8Aiq3HiK0dLfDc3teu0jkYm3d44Eew91+7+fISXGpG1KnYUQQgghRNVm1MrbI0aMYPjw4bi6ujJkyBDuueceateujZ2dHQkJCZw6dYpdu3bx559/MmjQIN555x1zx13z7M3zKX/HqrEgXlGcbK1o5O3MmespnI5JIT1Li72NPbQdD7veB122OkNUr/+ZrM0QD8OZofoH9TfZtYUQQgghRPkZlVhMnDiRMWPGsGbNGn744QeWLVtGUlISoA6Patq0Kf369ePAgQM0adLErAHXSFcPwZX96rZ3U7VmoYoLrefGmespaHUKJ64m0T7IA9o9AbsXg6KFg19D1+fA2s4k7UmPhRBCCCFE1WZUYgFga2vLmDFjGDNmDABJSUmkp6fj6emJtbW12QK8K+zNN8Vs7jijKqy1vxurD0QBap1F+yAPcK0DTYfBf+sg7ab6b+vRJmnPz9EPJ2snUrNTJbEQQgghhKiCyrxIgqurK76+vpJUlFdKjFqTAGqNQsuRlRuPkULrueu3j0Yl3nki70rhe5eCopikPY1Go++1iL4VTVJmkkmuK4QQQgghTKNqrb52Nzr4tVqTAGqNgrV9pYZjrIbeTjjaWAJ5CrgB6raD2qHqdsxxiNxjsjaD3YP12+cSzpnsukIIIYQQovwksahMOZlqYgGgsVRrFKoJSwsNLeu6ARCdlEFM0u31NzQa6JCv18JEpM5CCCGEEKLqksSiMp1cC7duqNtNh6k1CtVI7grccHs9i1zNHgAnH3X79HpINM26E/lnhhJCCCGEEFWHJBaVRVEMP83vULWnmC1M3hW4j+Sts7CygXsmqtuKDvZ/YZL2gt1kKJQQQgghRFVVpsQiMTGRL7/8kjlz5hAfHw/A4cOHuXr1qkmDq9Ei96g1CKDWJPi3r9x4yqB1nh4LgzoLgHsmgKWNun34G8i6Ve72HKwd8Hf2B+Bc4jm0Om25rymEEEIIIUyj1InF8ePHadSoEYsWLeLdd98lMTERgHXr1jFnzhxTx1dz7cs7xeyUajHFbH7eznbUcVOLzU9cSSJHq7vzpJM3NH9I3c5IguM/mKTN3DqL9Jx0rqReMck1hRBCCCFE+ZU6sZg1axbjx4/n3Llz2NndWfxs4MCB/PPPPyYNrsZKjILw9eq2k49ak1BN5fZapGdrOXs91fDJDpPvbO/73CRTz0oBtxBCCCFE1VTqxOLAgQNMnjy5wP46deoQExNjkqBqvANfqKtTg1qLYGVTufGUQ6hBnUWC4ZO1W0O9Tur2jdNwYVu52wtxlwJuIYQQQoiqqNSJha2tLcnJyQX2nz17Fi8vL5MEVaNl3YJD36jbljZqLUI1ZjAzVP46CzAsSs+7wngZGfRYxEtiIYQQQghRVZQ6sRg6dChvvPEG2dnqom4ajYbIyEhmz57N8OHDTR5gjXP8B8hIVLebD1drEaqxZrVdsbZU60MMZobK1XgwuKoF15zbBHER5WqvjnMd7K3Uug7psRBCCCGEqDpKnVi89957pKam4u3tTXp6Ot27d6dhw4Y4Ozszf/58c8RYcyiKWmuQqxpOMZufnbUlTfxcAIi4kUpyRrbhAZZWhgv/7V9WrvYsNBb6FbivpF4hNSu1hDOEEEIIIURFKHVi4erqyubNm/n999/56KOPmDZtGn/++Sc7duzA0dHRHDHWHBe2q7UGoNYe1G5dmdGYTO56FooCx6OSCh7QZizc7mXgyHeQUXAoXWnkHQ51PvF8ua4lhBBCCCFMo8wL5N177708/fTTvPjii/Tp08eUMdVcBlPMVv/eilyhButZJBQ8wMEDWj2sbmelwNHvytWezAwlhBBCCFH1WJX2hI8++qjQ/RqNBjs7Oxo2bEi3bt2wtLQsd3A1SlwEnN2kbrvUVWsPaojW/u767aOF1VmAmkgdClO3930O7Z8Ei7L9jMjMUEIIIYQQVU+pE4sPPviAGzdukJaWhru7+oYyISEBBwcHnJyciI2NpX79+mzbtg1/f3+TB1xt7V8G3F7Hof0Tau1BDRHo6YCbgzWJadkciUpEURQ0+Rf8824C9Xuow8ESLsK5vyBkQJnay62xAEkshBBCCCGqilIPhfq///s/2rVrx7lz54iLiyMuLo6zZ8/SoUMHFi9eTGRkJL6+vjz77LPmiLf60WnhzCY4GKY+trSDNuMqNSRT02g0+jqL+FtZRMWnF35ghyl3trcvgBM/wcWd6mtUCs42ztR2rA2oiYVO0ZVwRjF0WjWGMsYihBBCCCFUpf7Y/JVXXmHt2rU0aNBAv69hw4a8++67DB8+nAsXLvD222/L1LMAp36DjbMh+dqdfRYWcGkXNB1aeXGZQai/O9vP3ADUhfLqeToUPCi4Lzh6w61YiD4Gayeq+11qQ/9FpXpNGrk34tqta9zKvsW11GvUda5b+qALuz9liEUIIYQQQpShxyI6OpqcnJwC+3NycvQrb9euXZuUlJTyR1ednfoNfhxr+KYVIDtN3X/qt8qJy0xaGxRwJxZ+0On1alKRX3J0qV+Tcg+HKur+lCEWIYQQQghRhh6Lnj17MnnyZL788ktCQ0MBOHLkCFOmTKFXr14AnDhxgqCgINNGWp3otOon4bk1FYXZ+BI0HlTmAuaqpnVdN/12oQXc+tekMLdfpzXjwS0A8tdnFCLESge3Zzc+u+FZemWW4nVUFEi8TOH3RwE0Ne7+CCGEEEKYW6kTi6+++orHHnuMtm3bYm1tDai9Fb179+arr74CwMnJiffee8+0kVYnl/8t+Em4AQWSr6rHBXWtsLDMydXBmvpejly4cYtT15LJzNFia5XnTXmJrwmgaCHhglHtNbK2gtw6C20qxN8sa+iFBVLj7o8QQgghhLmVOrHw9fVl8+bNnD59mrNn1SEoISEhhITcmQK0Z8+epouwOkq9btrjqonW/m5cuHGLLK2OU9eSCa13Zxpao79Xa0ewsinxsHqAraKQqdFwztYO7N1LPEcvJwuyb5V8XA27P0IIIYQQ5lTmOU8bN25M48aNTRlLzeHkY9rjqonQeu6sO3wVUIdDGSQWxn6vo38wqpfAEmi4fhT/xf3HZStL0madwsG6kILxwlzcCd8YsY5IDbs/QgghhBDmVKbE4sqVK/z2229ERkaSlZVl8Nz7779vksCqtYDO6uxCydEUPo5foz4f0LmiIzOr0NtTzoJawD2hS54nzfCaNHJvxH9x/6GgEJEYQQuvFsadeJfeHyGEEEIIcyp1YrF161aGDh1K/fr1OX36NM2bN+fSpUsoikKbNm3MEWP1Y2GpTln641hAg+Gb19uFyf0X1rjC4BBfZ2ytLMjM0RUs4DbDa9LIvZF++2zCWeMTi2JjQX1cA++PEEIIIYQ5lXq62Tlz5vD8889z4sQJ7OzsWLt2LVFRUXTv3p0RI0aYI8bqqelQGLkCXPwM97vUVvfXwHUSrC0taFnXFYDI+DTiUjMNDzDxaxLicaeup9RTzhYVC4CLvzojlBBCCCGEMFqpeyzCw8P5/vvv1ZOtrEhPT8fJyYk33niDYcOGMWXKlBKucBdpOlR9g3r5X7UQ2MlHHV5Tgz8Jb+3vxoFLCYBaZ9G7Sb46BRO+JsFu5VzLIm8sKTHwz9tw8ywkR8HxH6D16NJfUwghhBDiLlXqxMLR0VFfV+Hn50dERATNmjUD4OZNU075WUNYWN5VU5aqBdsXAbXOokBiASZ7Tdzs3PB28CY2LZYzCWdQFAWNEWtgFBmLs++dou6/50OzB8HartxxCiGEEELcDUo9FKpjx47s2rULgIEDB/Lcc88xf/58Hn/8cTp27GjyAEX10jpPAXehC+WZWG6dRUpWCtfTyjk9bFBXCO6rbidfgf3LyhmdEEIIIcTdo9SJxfvvv0+HDh0AmDdvHr179+aHH34gMDBQv0CeuHv5udrh42ILwLGoRHS6YlYfN4H8Bdzl1nsu+mLyne9BekL5rymEEEIIcRcodWJRv359WrZsCajDoj777DOOHz/O2rVrCQgIMHmAonrRaDT6XouUzBwibqSatb0Q93IUcBfGtzm0ekTdzkiEXR+U/5pCCCGEEHeBMiUWcXFxBfYnJiZSv359kwQlqrfW/ncWxjti5uFQBj0W8SZILAB6vgyWaq8Lez+DpCumua4QQgghRA1W6sTi0qVLaLXaAvszMzO5evWqSYIS1VtoPTf99pHIRLO2FeAagLWFNWCiHgsAN3/o8KS6rc2EbQtMc10hhBBCiBrM6FmhfvvtN/32pk2bcHV11T/WarVs3bqVwMBAkwYnqqcWdVyx0IBOMX8Bt7WFNQ3cGnA6/jSXki+Rqc3ENre3oTzunQWHV0BGEhxbBZ2mgk/T8l9XCCGEEKKGMjqxuP/++wF1DP24ceMMnrO2tiYwMJD33nvPpMGJ6snR1ooQXxfCo5M5E5NMWlYODjalntnYaI3cG3E6/jRaRUtEYgRNPU2QADh4qMnFlrmg6GDrPBj9Q/mvK4QQQghRQxk9FEqn06HT6ahXrx6xsbH6xzqdjszMTM6cOcPgwYPNGauoRnILuHUKHL+SZNa2TD4zVK4Ok8Glzu0Lb4RLu013bSGEEEKIGqbUNRYXL16kVq1a5ohF1CChFbiehdkSC2t7tZA71+bXQDHv9LlCCCGEENVVmcanbN26la1bt+p7LvL6+uuvTRKYqN4MC7jNuxaE2RILUKee/fcTuBEOVw9C+G/QdJhp2xBCCCGEqAFK3WMxb948+vbty9atW7l58yYJCQkGX0IANPBywtlWzVvN3WPhae+Jp50noE45q5iyV8HCEvq8fufx1jdAm2266wshhBBC1BCl7rH47LPPCAsL47HHHjNHPKKGsLDQ0MrfjV3nb3I9OZPopHT8XO3N1l4j90bsid5DQmYCN9Nv4uXgZcKL94OALnB5N8SdV2eLajfRdNcXQgghhKgBSt1jkZWVRefOnc0Ri6hhWuepszD3ehZmHQ6l0UCfeXceb18ImeZdUVwIIYQQoropdWLxxBNPsGrVKnPEImqY1hVZwO1hxsQCwL8dNBmqbt+Khb2fmr4NIYQQQohqrNRDoTIyMli2bBlbtmyhZcuWWFtbGzz//vvvmyw4Ub21rsAC7hD3EP22WRILgN6vwek/QNHC7sVwz+PgKDOkCSGEEEJAGRKL48eP07p1awBOnjxp8JxGozFJUKJmqOVki7+HPVHx6Zy4mkS2Voe1Zak7yYwS5BqElcaKHCXHfIlFrWBoOw4Ofg1ZqbDjbRj4tnnaEkIIIYSoZkqdWGzbts0ccYgaKtTfnaj4dDKydZyJSaF5HVeztGNjaUOgayDnE89zIekC2dpsrC2tSz6xtLrPhmOrITtNTTA6PgUe9U3fjhBCCCFENVPmj4/Pnz/Ppk2bSE9PBzDtFJ+ixjAo4K6ghfJydDlcSLpgnkacfaHTNHVblw1/v2WedoQQQgghqplSJxZxcXH07t2bRo0aMXDgQKKjowGYOHEizz33nMkDFNVb3oXyjlbnmaHy6jwdHNR1Mzi5Fq4dMV9bQgghhBDVRKkTi2effRZra2siIyNxcHDQ73/44YfZuHGjSYMT1V/T2i7Y3K6rOBJl5gJujzsF3OcSzpmvITsXdUhUrs1zQXrshBBCCHGXK3Vi8ddff7Fo0SLq1q1rsD84OJjLly+bLDBRM9haWdKktgsAF27cIinNfKtWV1iPBUDbCeAeqG5f3AERW8t1Oa1OYU9EHL8evcqeiDi0uspLVKpSLEIIIYSoPkpdvH3r1i2Dnopc8fHx2NramiQoUbOE+rtx7HZ9xdEriXRvZMJVsfPwsvfCzdaNxMxEziScMUsbelY20OtVWHt7Be7Nr0P9XmBR+rKljSejmff7KaKTMvT7/FztmDukKf2b+5ko4OoXixBCCCGql1K/C+ratSsrVqzQP9ZoNOh0Ot5++2169uxp0uBEzVBRdRYajUbfa3Ez/SZx6XFmawuAZg+CX2t1+/oJOLGm1JfYeDKaKSsPG7yRB4hJymDKysNsPBltgkCrXyxCCCGEqH5K3WPx9ttv07t3bw4ePEhWVhYvvvgi//33H/Hx8ezevdscMYpqLtTfXb991Mx1Fo3cG7E/Zj8A5xLP4Wnvab7GLCzgvnmwYpj6+O+3oNn9YGVcz51WpzDv91MUNtAod9/T3x3G29nW7GvEKIpCbEpmkbFogHm/n+K+pr5YWsh6NUIIIYQoqNSJRfPmzTl79iyffPIJzs7OpKam8uCDDzJ16lT8/GSohCjI38MeD0cb4m9lcTQqEUVRzPZG2aDOIv4sHf06mqUdvfo9oEFvtcYiKRIOfAmdphp16v6L8QV6B/LTKRCTnGmCQMtHAaKTMth/MZ5ODcyYrAkhhBCi2ip1YgHg6urK//73P1PHImoojUZDa383/j4dS0JaNpfj0gis5WiWthp5VGABd64+r0PE34AC/7wDoWPAruSFAGNTik8qcrnaW2NvbVm+GEuQnq0lKb3kwnpjYxZCCCHE3afUicXy5ctxcnJixIgRBvvXrFlDWloa48aNM/pa//zzD++88w6HDh0iOjqan3/+mfvvv1//vKIozJ07ly+++ILExES6dOnC0qVLCQ4OLm3YopKF3k4sQJ121lyJRQPXBlhoLNApuopLLPxaQsuRcPwHSE+AXR9Cn7klnpadozPq8p+NaWv2XoI9EXE88sXeEo/zdrYzaxxCCCGEqL5KXby9YMECatWqVWC/t7c3//d//1eqa926dYtWrVqxZMmSQp9/++23+eijj/jss8/Yt28fjo6O9OvXj4wM+dS0umldQQXcdlZ2BLgEABCRGEGOLsdsbRno+T+wtFG39y6F5GvFHn40KpE3/zhV7DEa1BmZ2gd5mCjIorUP8sDP1Y7iBqhZaMDdwdrssQghhBCieip1YhEZGUlQUFCB/QEBAURGRpbqWgMGDOCtt97igQceKPCcoih8+OGHvPLKKwwbNoyWLVuyYsUKrl27xi+//FLasEUla+XvRm5ZxdHbU8+aS26dRZYui8vJFbS2insAtJukbuekw/YFRR6669xNRn+xl6T0opOe3Df4c4c0rZBiaUsLDXOHNDVoOz+dAqO+2Gv2+yeEEEKI6qnUiYW3tzfHjx8vsP/YsWN4eppuuMbFixeJiYmhT58++n2urq506NCBPXv2mKwdUTFc7Kxp4OUEwKnoZDKytWZrq0IXysur63Ngqy4GyJGVcKPgWhobTkTzeNgB0rLU779jfQ8+GNkKP1fDIUa+rnYsHdOmQteO6N/cj6Vj2uCbLxYfF1vqeahr1ySmZTP6i73sOnezwuISQgghRPVQ6hqLRx55hBkzZuDs7Ey3bt0A2LFjB8888wyjRo0yWWAxMTEA+Pj4GOz38fHRP1eYzMxMMjPvzKKTnJxssphE+bT2d+N8bCrZWoX/riXTNsC95JPKIMQ9RL99NuEsA4IGmKWdAhw94d6ZsPUNUHSwZR48skr/9Pf7I/nfzyfIXcj6vqY+fPxIKHbWlgxtXYf9F+OJTcnA21kd/lQZ07r2b+7HfU19C8SSlpXDkysOsedCHGlZWh4PO8DiUa0Z0EJmghNCCCGEqtQ9Fm+++SYdOnSgd+/e2NvbY29vT9++fenVq1epayzMYcGCBbi6uuq//P39KzskcVvehfKORJpvPYtK67EA6DAFnG+/2T7zB0SqBdFLt0cwZ92dpOKhtnVZ+mgb7G7P9mRpoaFTA0+Gta5DpwaelbpWRGGxONtZs3xCO/o2VRP9LK2OqasO8/3+0g1/FEIIIUTNVarEQlEUYmJiCAsL48yZM3z33XesW7eOiIgIvv76a2xsbEwWmK+vLwDXr1832H/9+nX9c4WZM2cOSUlJ+q+oqCiTxSTKp7W/m37bnOP0fR19cbZ2BuBMfMHhSGZl4wA95ugfKptfY8Efp1i08bR+3xP3BvH28JZYWZY6r69UdtaWfPpoG0a0rQuoNRdz1p1g6faISo5MCCGEEFVBqROLhg0bcuXKFYKDgxkxYgSDBw8mICDA5IEFBQXh6+vL1q1b9fuSk5PZt28fnTp1KvI8W1tbXFxcDL5E1RDi46xfj8GciYVGoyHYXZ2S+HradZIyk8zWVqFaPwq11F4TTdQ+Lu7+Uf/UC/1C+N+gJlhU09WrrSwtePuhlkzqemcCh0UbT7Pgz3AUpbB1u4UQQghxtyhVYmFhYUFwcDBxcXEmaTw1NZWjR49y9OhRQC3YPnr0KJGRkWg0GmbOnMlbb73Fb7/9xokTJxg7diy1a9c2WOtCVB9Wlha0qKsuHHclIZ0bKeZbUbpSh0NZWpHV4zX9wxetfsBKo+X/HmjB1J4NzbbqeEXRaDS8PLAJL/a/U8vy+T8XmL32ODla49bmEEIIIUTNU+qxGAsXLuSFF17g5MmT5W784MGDhIaGEhoaCsCsWbMIDQ3ltdfUN2Uvvvgi06dP58knn6Rdu3akpqayceNG7Oxkka7qKm+dxef/RLAnIg6tzvSfdFfKCty3pWbmMP7fWhzUqTE0tLjGuo4XGd2hXoXGUWY6LVzcCSd+Uv/VFZzBS6PR8HSPhvzfAy300wj/ePAKU1cdNt2MX0bEIYQQQoiqQ6OUcvyCu7s7aWlp5OTkYGNjg729vcHz8fHxJg2wvJKTk3F1dSUpKUmGRVUBC/48xef/XDTY5+dqx9whTU06terxG8d59M9HARgePJzXO79usmsXJy41kwlhBzh+JYk2mrOss73drpMvzDii1mBUZad+g42zDRf4c6kN/RdB06GFnvLniWieWX2EbK36p6RzA0+Wjb0HJ9tSTzpXrjiEEEIIYXqleS9d6v/5P/zww7LGJe5yG09GF0gqAGKSMpiy8rBJ121o6NYQDRoUlArrsbiWmM6Yr/Zx4cYtACLsmpHo3xe3yL8gNQb2fgrdnq+QWMrk1G/w41gg32cNydHq/pErCn1TP7CFH852Vkz+9hBpWVr+jYhj9Bd7WT6+HZ5OthUWhxBCCCEqV6l7LKob6bGoGrQ6hXsX/U10Ukahz2tQF4XbNbuXyaZaHbRuEJEpkdhZ2rF39F4sLSxNct3CnI9NZexX+7h2+/vzcbHl24kdaGRxDT7tqK5rYesCM46q611UNTotfNjcsIfAgEbtMZh5Aop4HY9EJjAh7ACJadkA1Pdy5NuJHajjZl/o8eaKQwghhBCmY9YeC4CIiAiWL19OREQEixcvxtvbmw0bNlCvXj2aNWtWpqBFzbb/YnyRSQWon01HJ2Ww/2I8nRqY5o13I/dGRKZEkqHNIColikDXQJNcN7/jVxIZv/wA8beyAAj0dODbiR3w93AAQiD0MTj8DWQmw853of8Cs8RRLpf/LebNPIACyVfh7QZgVfi00qHAQTuFRF2Wul5HMlh8CDkONlgZmyzmZEFGcWuc3I7j8r8Q1NW4awohhBCiQpS6eHvHjh20aNGCffv2sW7dOlJTUwE4duwYc+fONXmAomaITSk6qSjLccaoiJmh/o24ySPL9uqTiqZ+Lqx5qvPtpOK2HnPA6van9vu/gIRLZomlXG6cLvkYUN/0p14v8ssqLZZaJOKtUb9qkYhVWmyx5xh8FZtU5JF6veRjhBBCCFGhSt1j8dJLL/HWW28xa9YsnJ2d9ft79erFJ598YtLgRM3h7WzcTF67zt2kW7AX7o7lX2wx/8xQfQP7lvuaeW36L4bpq46QdXuK1faBHnw5/h5c7KwND3Txg05Pw873QJcNf8+H4V+YNJYyu3EG/v0Ijn5v3PGO3mBV8r3UKgo3UzPJvv3aaNDg6WSDnVUJw5dyMuBWbMlxOPkYE60QQgghKlCpE4sTJ06watWqAvu9vb25efOmSYISNU/7IA/8XO2IScrIX5JrYM2hK6w/Hs2o9v5MvDeIuu5ln0XJnD0WPx6M4qW1x8mdKbdPE28+Gd0GO+si3jh3eQYOLof0eDjxI3SeBn6tTBpTqUTug90fwpk/jTyhdLUNloB9RjbTvznI/ovqTHE2ORZ89Ego/Zv7Fn2ivsYimgLF23njCOhsZNxCCCGEqCilHgrl5uZGdHR0gf1HjhyhTp06JglK1DyWFhrmDmkKqIXahcndn56tZfnuS3R/ZzszVx8hPDq5TG3WcaqDg5WamJgysVj2TwQv/nQnqXgwtA5Lx7QtOqkAsHOFbi/cebzldZPFYzSdDk7/CV/1g6/7GiYVdq7QZCjqXch/h24/7r+wVAXTLnbWrHi8PX2aeAOQpdXx9HeH+PFAVNEnWViqU8rmbTe/UsYhhBBCiIpR6sRi1KhRzJ49m5iYGDQaDTqdjt27d/P8888zduxYc8Qoaoj+zf1YOqYNvq6GQ2n8XO34bEwb/nmxJ+M7B2Jnrf5YanUKvxy9xoDFOxn39X7+jbhJaSYxs9BYEOweDMDV1KukZKWUK35FUVi44TT/9+edeoTHuwTx7ohWWFsa8avUbiK43V4kL+JviNhWrniMlpMJR1aqs1OtfgSi9t55zqUO9J0Pz/4HD3+rTuXqkm/KX5faZZ7i1c7akqVj2vJgqPqhg06BF9ceZ9k/EUWf1HRo4XEAdH1eppoVQgghqqhSTzeblZXF1KlTCQsLQ6vVYmVlhVarZfTo0YSFhWFpWbU+SZTpZqserU5h/8V4YlMy8Ha2o32Qh8EUs/G3slix5xLf/HuJhNtTl+ZqWdeVyd0a0L+5r1HT0r6x5w3WnF0DwIoBKwj1Di1zzP/7+QSr83za/tx9jZjWqyEaTSmmxz3+I6ybpG77tYJJ28Gi1Pm9cTKS4FAY7F0KKfl6Gb2aqMOzmg8vOMuTTqvOupR6Xa1lCOhc7h4CnU7hrT/C+Xr3nXVMnuregNn9Q4p+/XLjOLMB9i5R97UYWXXqU4QQQoi7QGneS5d5HYuoqChOnDhBamoqoaGhBAcHlylYc5PEovpKz9Ky5lAUy/65wJWEdIPnAjwdmNS1Pg+1rVvsEKTVp1czf998AF7p8AoPN3641HFk5miZufooG07GAKDRwBvDmvNYx4BSXwudDpZ1g5gT6uPhX0GLh0p/neKk3F6M7+BydYrbvAK6qAlFw/vMl9AUQVEUPvn7PO9tvjMsbVQ7f+Y/0KL4JDEnE95tBBmJYO0Az58DWyfzByyEEEII8yQWOp2Od955h99++42srCx69+7N3LlzsbcvxeJXlUASi+ovR6vjz5MxfL4jgv+uGb5R9nS0YXznQB7rFICbQ8GZpI7EHmHsBnWI3shGI3m106ulavtWZg6Tvz3ErvPqxARWFhref7g1Q1vVLuN3A5zfCisfVLfdAmDawSLXhiiVG2fVGZ6O/wDarDxPaKDJYOj8DPi3K3875fTt3su89utJcv/yDGjuy4ejWmNb3IxR65+Fg1+r2w98Dq1GmT9QIYQQQpTqvbTRH1nOnz+fl19+GScnJ+rUqcPixYuZOnVquYMVoiRWlhYMbVWb9dPv5duJ7bm3YS39c3G3snhv81k6L/ybN34/xdVEw56Nhm4N9dulLeBOuJXF6C/36ZMKe2tLvhx3T/mSCoAGvSCou7qdePnOG+ayitoPqx+FJe3hyLd3kgpLG2gzDqYdgIdXVomkAuCxjgEsHhWqXzRvw8kYHg87QGpmTtEntczT03T8BzNHKIQQQoiyMLrHIjg4mOeff57JkycDsGXLFgYNGkR6ejoWFTykojSkx6JmOnk1ic//ucAfx6/pZ2cCtUdhaKvaPNm9Po191fvdf21/rqZexcHKgT2j92ChKfnnNTopnce+2s/5WHUBSBc7K5ZPaEfbAA/TfAPXjsCyHuq2gyfMOAp2pfj51Ong3F/qlLGRewyfs3WFdo9Dh6fAuZipXSvZ9jOxTFl5mPRsLQCt6rqyfEJ7PApbw0RRYHErNRHTWMCs8Cr9vQkhhBA1hVl6LCIjIxk4cKD+cZ8+fdBoNFy7dq3skQpRRs3ruPLxI6Fsf74nYzsF6GeSytEprDtylf4f7mT88v3svRBHsJta/5OWk8bV1KslXvvCjVQeWrpHn1R4O9vy41OdTJdUANQOVQunAdLi1CFMxsjJgiPfwdJO8P3DhkmFc23o+xY8exL6vF7l33j3CPFm5RPtcbFTl9M5diWJkZ/vITopveDBGs2dXgtFByd+qsBIhRBCCGEMo3ssLC0tiYmJwcvLS7/P2dmZ48ePExQUZLYAy0t6LO4OcamZfLPnMiv2XCIx30xS/g3+IdFGXbPhw54f0rteb/1z+WeocrCx5PGwA8TdUocTBXg68O3jHajnWfaF+ooUfxE+aaeuxm1lDw9+DtrswmdiykiGw9/Ank8hJV8y79UYOs+AFiNMU6tRwU7HJDP2q/3EpmQCUMfNnhUT2xPo6Wg4e5hLPJZL7lFP8m0JT+2sxKiFEEKIu4NZirctLCwYMGAAtra2+n2///47vXr1wtHRUb9v3bp1ZQzbPCSxuLukZeXw44Eovth5UV9vYeV8HPu66mrx99Z6lA/6voCdtSUbT0Yz7/dTRCdl6M/XcGe958a+zqyY2B5vZzvM5s8XYf/nBfe71FYXivPvAPuWwoGvITPJ8Jh6naDLTAjuW+EzPJlaZFwaj329j8txaQA42VphZ23BzdQ7Reh+rnZscHgdt4Tj6o6n94J3k8oIVwghhLhrmCWxmDBhglGNL1++3KjjKookFnenHK2OP05E89mOC5yOi8CpwXsAZCc3xynpcbo0rMVvR69R1A9/Ay9H1j3dBVd7a/MGeuQ7+PXpop+3sAJdvqLmxoPVHop6HcwbWwWLTclg7Ff7OR1T+EKGGmCc5SZet/5G3XHvs+qQLyGEEEKYTYWsY1FdSGJxd1MUhR1nrzNjzyAUTRa6LE9uRbxQ4nm+LnbsfqmXUYvwlZlOCx82h2Qj6pQsbdQag84zwKuR+WKqZPG3sujwf1vI1hb+Z6kWSey1m4oVOnCpCzNPVPveGiGEEKIqM0vxthDVkUajoUeIL81qqW/GLWzi0WgySzwvJjmD/RfjzRvc5X+NSyqaD4dnjsOwT2p0UgFwJialyKQC4Cau7NC2Uh8kX4HLuysoMiGEEEKURBILcVdo5JH7hlxhYm/jFnWMTcko+aDySL1u3HEhA8HFz7yxVBHGvOa/aLvceSBrWgghhBBVhiQW4q7QyP3OJ/029rFGnWPWom1QZ38y5XE1gDGv+WZdW3KsndQHp36F7EKmpxVCCCFEhZPEQtwV8iYW2ZZX8XO1o6jqCQ3qDETtg0y4bkVhAjqrsz8VF4lLHfW4u0T7II8S7427qysWTYeqOzKT4ezGigpPCCGEEMWQxELcFfImFucSzzJ3SFOg4Fv63MdzhzQ1b+E2qOtU9F+Ur+V8kfRfaLieRQ1naaEp8t6AOhXw3CFNsWj18J2dx3+skNiEEEIIUTxJLMRdwdXWFR8HdUjR2fiz9Gvmy9IxbfB1NRx64+tqx9IxbejfvIJqGpoOhZErCtZQuNRW9+d+Mn8X6d/cr9B7A9Cpvqd6bwK7gvPt1+zcX3ArroKjFEIIIUR+VpUdgBAVpZF7I66nXSclO4WYWzH0b+7HfU19DVd3DvIwf09Ffk2HQuNB6ixRqdcLX3n7LpP33kQlpDH315OkZ+s4FJlAXGomnk626krj/36krvPx3zpoP6mywxZCCCHuatJjIe4aeYdDnU04C6hDbzo18GRY6zp0auBZ8UlFLgtLCOoKLR5S/72Lk4pcufdm5D3+PNohAICsHB3f749UD2gpw6GEEEKIqkQSC3HXKCyxENXDuM6B5OZ83+69TLZWB77NwbuZuvPKfoiLqLwAhRBCCCGJhbh7hHiE6Lclsahe/D0c6NNErZG5npzJnyei1SfyFnGfWFMJkQkhhBAilyQW4q4R4BKAtYU1IIlFdTShS5B+e/nuS+pG84fQzx91/AdQil61WwghhBDmJYmFuGtYWVjR0K0hAJeSL5GRY+aVtYVJdazvQWNfZwCORiVyJDIBXOuoNSkA8RfgysFKjFAIIYS4u0liIe4qwe7BAOgUHRFJMia/MFqdlgMxB/jzwp8ciDmAVqet7JAA0Gg0TOgSqH8c9u8ldaPlqDsHHf+hQmMSQgghxB2SWIi7ikEBd7wMh8pvy+Ut9Fvbj8c3Pc7snbN5fNPj9Fvbjy2Xt1R2aAAMa10Hdwd1ONsfx6O5npwBTYaA1e01L06uBW12JUYohBBC3L0ksRB3FSngLtqWy1uYtX0W19OuG+yPTYtl1vZZVSK5sLO2ZHSHegDk6BRW7r0Mdi7qOiAA6fFwvvLjFEIIIe5GkliIu0reHotzCecqMZKqRavTsnD/QhQKFj/n7lu0f1GVGBb1WMdArG7PPbtqXyQZ2dp8a1rIcCghhBCiMkhiIe4qHnYe1LKvBcCZhDMoMosQAIdjDxfoqchLQSEmLYbDsYcrMKrC+braMaCFHwBxt7L47dg1aNALHNT7yuk/ISOpEiMUQggh7k6SWIi7Tm6vRWJmIjfSb1RyNFXDjTTjXgdjjzO3vEXcy3dfQrGwgubD1R3aTDj1W+UEJoQQQtzFJLEQdx1ZgbsgLwcvo447euMoadlpZo6mZKH+brSq6wpAeHQy+y/Gy3AoIYQQopJJYiHuOpJYGFIUhb3Re4069vvT39NvbT8+PfopCRkJZo6saOrUs/kWzKvTBjwaqDsu7YKkK5UTnBBCCHGXksRC3HUksbhDp+iYv28+y44vM/qcxMxElh5bSt+f+jJ/73yupFTOG/iBLfzwdrbl/9u77/Aoqu6B49/NbnojkEoIgdA7BCGEjiJB6RaKdFRQsMGrgq/6w/Iq2BBBBESpUkSaUgQBpXcCSAk19BQIkEZI253fH0OWLOllNyE5n+fhkZm5M3N2M8Q5M/eeC/DXqSiu3rkHTTLmtFDg+G8lEpcQQghRXkliIcqdANcAdBodUL4TizR9GhN2TODXMw+6DfWu0RsvBy+Tdt4O3nzb8VtW9FhB94DuaDVaAJL1ySw7s4xuq7vx7vZ3OXXrlEXjt9FZMaiVPwAGBRbtuwyNnn/Q4NivIIPzhRBCCIvRKGW8LE58fDyurq7ExcXh4uJS0uGIUuKZP57h3J1z6DQ69g/cj43WpqRDsqiktCTGbR/H7uu7AdBqtHza5lN61OiB3qAn9EYoN5Nu4uHgQaBnIForrXHfiMQIFp1axMpzK7mXfs/kuK18WjG84XCCfYLRaDRm/xwxiSm0nvQ3qXoDLnY69v33CRwWPQ1X96sNRu0En8Zmj0MIIYQoqwpyLy1vLES5lNEdKl1J52LcxRKOxrLiUuIYtXmUMamw1dryXafv6FGjBwBaKy0tvFvwdMDTtPBuYZJUAFR2qsz4luPZ/NxmXmv6GhXtKhq37Yvcx6jNo+i3rh9/XvyTdEO6WT+Lu5MtPZtWBiA+OZ1VoddlELcQQghRQiSxEOVSeR1ncTPpJsM2DuPozaMAOFk7MavzLDr4dSjwsVxtXRnVZBSbnt3EB0EfUMWpinFb2O0w3t3xLt1Xd2fp6aVZ3mwUp2Gtqxn/Pn/PJZT6vcHKWl1xfAWUgkn9hBBCiPJAEgtRLpXHxOJq/FUG/zmY87HnAXWywHld5/GY92NFOq6dzo5+dfuxrs86vu7wNfUr1Tduu554nc/3f06XFV2YeXSmWSpJNfR1pWU19a3J+RuJ7LxugFpd1I2JUXBxe7GfUwghhBBZSWIhyqU6bnWMfy8PicWZ22cYsnEI1xOvA1DZsTILn1pI3Yp1i+0cWistIdVCWNZtGT93+Zk2ldsYt8WmxPLDsR/osqILn+//3BhHcTGdMO8iNMncHWp5sZ5LCCGEENmTxEKUS+727rjZugHqTXdZduTGEYZvHE7MvRgAalaoycKnFuLv4m+W82k0Glr6tGTWk7NY0WMF3QK6mVSSWnp6Kd1WdePdHe8SdiusWM75ZH0vfCvYA/DPmZtcdGsDtuoEepz6A1LvFst5hBBCCJEzSSxEuaTRaIzdoW4l3+LXM79yMOog+jLWH3/HtR2M/GskCWkJADR2b8z8rvPxcvTKY8/iUadiHSa3m8yGZzYwsN5A7HXqzb9e0fPnxT/pu64vozaPYl/kPh4uUKc36DkYdZAN4Rvy/NnotFYMCX6QKM0/EAUNeqkLaXfh9Ibi/3BCCCGEMCHlZkW5NWbLGHZc32GyzsvBiwktJ9DZv3MJRVV81oev54NdH5CuqJWZgn2CmdppKg7WDiUWU2xyLMvOLGNJ2BLupJiOt6hXsR4jGo6gs39ntl3dxuQDk4lOijZuz+tnE5eURqtJW7mXpsfRRsuBQfY4Lumpbqz5JAxaYa6PJYQQQpRZBbmXlsRClEtbLm9h7LaxWdZrUOdemNJxyiOdXCw9vZRJ+yehoP7z7uLfhUntJpWa+Trupd/j9/O/s+DkAq4lms7cXcmuEreSb2XZJz8/m/+uPs6S/VcA+LBbXV481AviroJGC/85DU6exfxJhBBCiLJN5rEQIhd6g57JByZnuy3jRvyLA188kt2iFEVh5rGZfL7/c+Nneb7283zZ/stSk1QA2Ovs6V+3P2v7rOWrDl9Rr2I947bskgrI389meKbSswv2XsHQqO/9nfVwYmXxBC+EEEKIbEliIcqd0BuhJl1sHqagEJUUReiNUAtGVXQGxcDkA5P54egPxnUvN3qZD1t9mGWSu9JCZ6Wja7Wu/Nr9V+Z0mUODSg1ybZ/Xz6aWlzPtarkDcOV2Evscn3iwUSbLE0IIIcxKEgtR7txMulms7UqDNEMa/931X5acXmJc9/Zjb/NG4BtoNJoSjCx/NBoNrXxaMaT+kHy1z+1nk7n07PcntODTRF2IOAI3y35pYSGEEKKkSGIhyh0PB498tTsfe550Q7qZoym6e+n3eOuft1gfvh4ArUbLp20+ZWiDoSUcWcHl92eTW7uOtT2p7u4IwJ4Lt4iu3vvBRnlrIYQQQpiNJBai3An0DMTLwcs4GDgnc47PoduqbiwOW0xSWpKFoiuY+NR4Xtn8CjuuqdWtbKxsmNJxCr1r9i7ZwAopPz8bbwdvAj0Dc9xuZaVhaKbSsz/daQaa+7/qji8Hg6HY4hVCCCHEA5JYiHJHa6VlQssJAHkmFxF3I5h8YDJdVnZhxtEZ3E6+bYkQ8yXmXgwjNo4wjjdwtHZk1pOzeLzq4yUcWeHl52czvuX4PMeMPNu8Ck62OgAWHk8mzb+DuiH2ClzdV3wBCyGEEMJIEgtRLnX278yUjlPwdDAtP+rt4M2UDlOYGzKXtr5tjevjUuKYdWwWXVZ04X/7/sfVhKuWDtnEtYRrDPlzCGfuqLOGu9m68XPIz7TwblGicRWHnH42AFZYUaNCjTyP4WxnzfOPVQEgJd3ANvtMyZZ0hxJCCCHMQuaxEOWa3qAn9EYoN5Nu4uHgQaBnoMnT8DO3z7Dg5AL+vPincaI5ACuNFV38uzC84XDqV6pv0ZjP3TnHqM2juHlPHcDs4+jD7CdnU921ukXjMLfMP5u9EXtZc2ENAJ2rdubbTt/muf/lW3fp+PU2FAWqu8Dfysto0u6CnSu8fQ50tmb+BEIIIcSjTybIy0QSC1EcIhMjWXhqISvPreRe+j2Tba18WjG84XCCfYLNXoHp6I2jjNk6hvjUeAACXAOY/eRsvB29zXrekpaUlkS31d2IuRcDwC9P/0ITjyZ57vfSgoNsCbsBwK5aS6lyda26oe8iqN/TbPEKIYQQZYVMkCdEMfNx8mF8y/Fsfm4zrzd7nYp2FY3b9kXuY9TmUfRb1099s2GmSlK7r+9m5OaRxqSiYaWGzO86v8wnFQAO1g682uRV4/KUQ1PIzzOR4W0evMWZlxD0YIN0hxJCCCGKnSQWQhSAq60rIxuPZNOzm/iw1Yf4OfsZt4XdDuPdHe/SfXV3loQtyfJmoyg2XtzIa3+/ZjxmkE8QP4X8hJudW7Gdo7TrU6sP1VyqAeokhxmVsHLTukYl6ng5AzA/yp80+/tlas9ugqTSMxBfCCGEKAsksRCiEOx0dvSt05e1vdfyTYdvTGaMvp54nUkHJtFlRRdmHp3JneQ7RTrX8jPLeXfHu8Y3IU/6P8kPT/yAo7VjkY77qLG2subNwDeNy1NDp6I36HPdR6PRMOz+hHl6tOy276huMKTBqTXmCVQIIYQopySxEKIItFZaulTrwtJuS/m5y8+08W1j3BabEssPx36gy4oufL7/c64nXi/QsRVF4cd/f+TTfZ+ioHb7ebbWs3zV/itstDbF+jkeFU9UfYLGHo0BdQLDPy78kec+vZv6UsHBGoBvo5s92PDvcrPEKIQQQpRXklgIUQw0Gg0tfVoyq/MsVvRYQbeAbmg1anWpZH0yS08vpduqbry7413CboVl2V9v0HMw6iAbwjdwMOogafo0vjr0FdOPTDe2GdFwBBODJ+Y5h0NZptFoGNd8nHF5xtEZJKcn57qPvY2W/i2qAnBM788t+/vjLq7shTuXzBWqEEIIUe5IVSghzCQiMYJFpxZlW0mqdeXWDG84nCDvILZe2crkA5OJToo2brfT2pGsf3DDPK75OIY3HG6x2Eu717e+zrZr2wAY23wsIxqOyLV9ROw92n35D3qDwtsO63jNsETd0OkD6PCOmaMVQgghHl1SbjYTSSxESYtNjmXZmWUsCVvCnRTT8Ra+Tr65dpHSoOHj1h/Tp1Yfc4f5SDl/5zzPrn0Wg2LA2caZP5/5E1db11z3GbM4lPXHI6lMDHvs3lBXVqoFrx0EM5cJFkIIIR5VUm5WiFKkgl0FXmnyCpue28QHQR9QxamKcVte4y5cbV3pWUPmW3hYTbea9KrRC4CE1ATm/Dsnz32G3x/EHYE7x3WN1JW3zkFEqLnCFEIIIcqVUp1YfPTRR2g0GpM/devWLemwhCgUe509/er2Y22ftXzV4Sv8Xfzz3Cc2JZbQG3Ljm53RTUdjq1Vnz15yegkRiRG5tm/u70YjX/WtxqJ7rR5skEHcQgghRLEo1YkFQIMGDYiMjDT+2bVrV0mHJESR6Kx0dK3W1WTCt9zcTLpp5ogeTd6O3gysNxCANEMaM47OyLW9RqNhWOtqAGzUtyRNo1aK4vgK0KeZM1QhhBCiXCj1iYVOp8Pb29v4x93dvaRDEqJYeDp45qudh4OHmSN5dI1oOAIXG7W/59oLazlz+0yu7bs38cHdyZZ4HNmsD1RXJsXAhX/MHaoQQghR5pX6xOLcuXNUrlyZgIAABg4cyJUrV0o6JCGKRaBnIF4OXmjIfuCwBg3eDt4EegZaOLJHR8ZM6AAKClNDp+ba3lanZWCQWnp2VXrbBxv+/dVcIQohhBDlRqlOLIKCgpg/fz4bN25k5syZXLx4kXbt2pGQkJDjPikpKcTHx5v8EaI00lppmdByAkCW5CJjeXzL8eV63or86F+3Pz6OPgDsur6LA5EHcm0/sFVVrLUathuaEIuTuvL0ekjJ+feKEEIIIfJWqhOLp556iueff57GjRsTEhLChg0biI2NZfnynAdbTpo0CVdXV+MfPz8/C0YsRMF09u/MlI5TsnSL8nLwYkrHKXT271xCkT06bLW2vNbsNePyt4e/Jbcq2p7OdvRoXJk0dKxNvz+IO/0ehK01d6hCCCFEmfbIzWPRokULOnfuzKRJk7LdnpKSQkpKinE5Pj4ePz8/mcdClGp6g57QG6HcTLqJh4MHgZ6B8qaiAPQGPc+ve55zd84B8HWHrwmpFpJj++PX4ujx/S4CNWdZZfuRujKgIwz53fzBCiGEEI+QMjuPRWJiIhcuXMDHxyfHNra2tri4uJj8EaK001ppaeHdgqcDnqaFdwtJKgpIa6VlbOBY4/K00GmkGXKu9NSoiivN/d0IVWpx2XD/bVH4dojPvWStEEIIIXJWqhOLt99+m+3bt3Pp0iX27NlDnz590Gq1DBgwoKRDE0KUMm1929LCuwUAVxKusPLsylzbqxPmaVhjyBjErailZ4UQQghRKKU6sbh27RoDBgygTp069O3bl0qVKrFv3z48PKT8phDClEajYVzzccblmcdmcjftbo7tQxp44+Nqx2p9mwcrZbI8IYQQotBKdWKxbNkyIiIiSElJ4dq1ayxbtowaNWqUdFhCiFKqoXtDuvh3AeB28m0WnFyQY1trrRWDg/25pPhwxFBTXRl9HKJPWiJUIYQQoswp1YmFEEIU1BuBb6DT6ACYf3I+Mfdicmw7oEVV7KytHnprIXNaCCGEEIUhiYUQokzxd/Hn2drPAnAv/R6zjs3Ksa2bow19mvmyTh9MmnJ/wPy/v4HBYIlQhTAbvUHPwaiDbAjfwMGog+gN+pIOSQhRDkhiIYQoc15p8goOOgcAVp5dyeX4yzm2Hdq6GrdxYbuhsboiIQIu7bREmEKYxZbLWwhZGcKITSMYv3M8IzaNIGRlCFsubynp0IQQZZwkFkKIMsfd3p1hDYYBkK6kMy10Wo5t63q70LpGJdbo2z5YKYO4xSNqy+UtjNs2juikaJP1N5JuMG7bOEkuhBBmJYmFEKJMGtJgCBXtKgLw1+W/OH7zeI5th7epzmZDcxIUe3XFqd8h7Z4lwhSi2OgNeiYfmIxC1nlvM9Z9ceAL6RYlhDAbSSyEEGWSo7UjrzZ51bj8bei3KErWGy6Ax+t64lWxAn/qW6orUhPgzAZLhClEsQm9EZrlTUVmCgpRSVGE3gi1YFRCiPJEEgshRJn1bO1nqepcFYCDUQfZdX1Xtu20VhqGtq7GaoN0hxKPrptJN4u1nRBCFJQkFkKIMsvaypo3At8wLn8b+m2O3UCef6wKx3UNiVDU7lPK+S1wN+dStUKUNh4O+Zs8Nr/thBCioCSxEEKUaV38u9CwUkMAzt05x/qL67Nt52JnzbPNq/LH/TktNIZ0OLHKYnEKUVS5VT8D0KDB28GbQM9AC0UkhChvJLEQQpRpGo2GcY+NMy5/f+R7UvQp2bYd2roaqzJVh1JksjzxiJh3Yh4f7/041zYKCuNbjkdrpbVQVEKI8kYSCyFEmdfCuwVtfdWEIfJuJEvDlmbbLsDDCd/agZwy+AOguX4IYs5bLE4hCkpRFKYcnsKUw1OM69r7tsfLwStL25oVavJE1ScsGZ4QopyRxEIIUS68FfgWGjQAzDk+h7iUuGzbDW9TndX3u0MBcFwGcYvSSW/Q8/Hej5l3Yp5x3evNXuf7J75n07ObmBsyl8/bfI6HvTqm4nzseXZH7C6pcIUQ5YAkFkKIcqFOxTr0qNEDgPjUeH4+8XO27drVcudftycxKGoSkhq6FHIoUytESUnVp/L29rdZeW4loI6f+LDVh4xsPBKNRoPWSksL7xb0qNmDd1u8a9zv28PfYlAMJRW2EKKMk8RCCFFuvNb0NWysbABYfGoxUXejsrTRaDT0aNuc3YYGANgkXIGrBywapxC5uZt2l9FbR7PlijqLts5Kx5ftv6Rvnb7Ztu9SrQv1K9UH4Oyds6wPz76AgRBCFJUkFkKIcsPHyYcBdQcAkGpIZcbRGdm2eybQl03a9sble4eXWCQ+IfJyJ/kOL216if2R+wGw19nz/ePf07V61xz3sdJYMa75gwIG049Mz7GAgRBCFIUkFkKIcuXlxi/jbO0MwB8X/uDcnXNZ2jjY6KjQ/FnuKerbDU6uhvRUS4YpRBZRd6MYtnEYJ26dAMDFxoUfn/yRNr5t8tgTgnyCjO0i70ay7PQys8YqhCifJLEQQpQrrrauvNjoRQAMioHvQr/Ltl3/tvXZbHgMAPv0ONLO/GWxGIV42MW4iwz5cwjhceEAeNh7ML/rfJp6Ns33McYGjjUpYBCfGm+OUIUQ5ZgkFkKIcmdgvYF4OngCsP3adg5FHcrSpoqbA1d8uxuXIzd+zaF1P3Jy93r06ekWi7XUMujh4k44vkL9bw4zmpcneoPC3gu3+P3odfZeuIXeUDyD/k/dOsWwjcOIvBsJQFXnqix8aiG13GrlvFM2P586FevQPUC9puNS4ph7fG6xxCeEEBk0ilK2y53Ex8fj6upKXFwcLi4uJR2OEKKUWH1uNf+35/8AaOzRmF+e+gWNRmPSZv/5aOotaoKL5p7J+mgqERE8kWYhQy0Wb6ly6g/YOB7iIx6sc6kMXb+A+j1LLq4StPFEJB+vPUVkXLJxnY+rHRN71KdrQ59CH/dg1EFe//t17qbdBaCOWx1mPTkLd3v3nHfK5edzvWpzeqzuQZohDVutLev6rMPb0bvQ8Qkhyr6C3EvLGwshRLnUs0ZPalaoCcC/N/9l65WtWdpYn9+IM/eyrPdQbtFkzxsc2bTA7HGWOqf+gOVDTG9aAeIj1fWn/iiZuErQxhORvPpLqElSARAVl8yrv4Sy8URkoY7795W/eWXzK8akItAzkLld5+adVOTy8/G9cthYwCBFn8LMYzMLFZsQQmRHEgshRLmktdLyVuBbxuXvQr8jzZBmXNanp+O77+Ns97W6/2LDZ+/H5atblEGvPgknuxfd99dtnFCuukXpDQofrz2V2zfCx2tPFbhb1Jrzaxi7bSypBrVoQPsq7Zn15CxcbHJ5WpjPn8/LDUcYCxisOb+GC7EXChSbEELkRBILIUS51b5KewI9AwG4FH+J1edWG7ed3r8JL27xUO8oIysNeHOL0/s3WSLU0uHynqxPwk0oEH9dbVdOHLh4O8ubiswUIDIumQMXb+f7mAtOLuDD3R8aJ7LrHtCdqZ2mYq+zz33HfP58KkSdYkSjEYBawGBq6NR8xyaEELmRxEIIUW5pNBrGPfagvv/MYzNJSksC4N6d6/k6Rn7blQl3LuavXWK0eeMoRSLjsnaVy86NhJyTjwyKovBd6Hd8fehr47qB9QbyWdvPsLayzsdJTuUrFhKjTQoYbLu6jdDo0PztK4QQuZDEQghRrjXxaELnqp0BiLkXw8JTCwGwd/PN1/7ak6u5Hh5mtvhKhYQo2DwR/hyfv/YHf4brh80bUwlLTEnnp53hfLY+fzfzns52uW7XG/R8su8Tfjr+k3HdmKZjGN9iPFaaPP5XHX0KVr+idkPLDycv7HX2jGk6xrhqyuEplPFaLkIIC5CqUEKIcu9i3EX6/N4HvaLHQefAn8/+iavOhZj/1cZDuWUcU5ETvaLhqEtHXJ/4DzWbtrNM0JYQcw52fwf//gr6QkwQWK0dtHkTanYmxz5lj5ibCSks2HOJhXsvEZ+cv/E1Pq527Br/ONocLqRUfSrv7XyPvy6rc6Vo0PBe0HvGQdbZUhS169Pu7+BcAbrjufjCW8fBSku6IZ3n/niOC3HqGIupHafyhP8T+T+WEKJckKpQQghRANVdq/NMrWcASEpPYvax2Wh1OiKCJwLw8Lhbg6Le1yUrOgC0GoXmCf9Qc013TkzqyPEdq1EMBot+hmJ19QAsGwjft4Ajix4kFVobqN7xfqOHb5LvL9u5Plh1aScsfg5mtoFjy0CfxqPqYsxd/rv6OG2++Jvv/zlvklQ09lX/R5tT6tSraeUck4qktCRe2/qaManQaXRMbjc556TCoFcrP/3UGeY/bZpU2FWA+r3vR5JDNJ0ngpVWPZeVjjcD3zRumho6lXRDOSpGIIQodvLGQgghgJtJN+m2uhv30u+hs9LxR68/8HPx48imBVTe+zFe3DK2jaISkcETCXisK6fWfkudS4upiOksxhe0AcQ2G02TkKHorG0s/XEKzmCAc3/B7qlwZa/pNltXaDECgl4BZ+8c5knwha6ToXaI+oZj9zS4dc70OC5VIHgMBA4BWyezf6TicOxqLLO2X2DjySgy/9/SWquhd1NfRrYPoJaXc7bzWGSo4GDNxjfb4+1q2h0qNjmWMVvH8G/MvwDYae2Y0nEK7apk89YrLRn+XQZ7psOt86bbXP3U77XZYPV7ze7nk6HJC9DnQYlZRVEYtnEYoTfUMRb/F/x/PF/7+Xx+O0KI8qAg99KSWAghxH3Tj0znx39/BOCpak/xZYcvAbX07On9m7h35zr2br7UDQpBq9MZ90tOSuTYupn4hv1EFSXK5JgRGk+u1hlBo+6jcXBypdRJT4Xjv8GeaXDztOk2Zx9oNRqaDwO7h35/GvRqV5zEaHDyAv/Wxifh6nYDnNmgJirXDprua1cBWr4MLUeBk4cZPlTRKIrCtrM3mb39AvvCTas5OdnqGBhUleFtqmdJFPQGhQMXb3MjIRlPZ1vm777EplPqQPbggEr88lKQ8c1F9N1oRm0eZeyG5GzjzIwnZtDMs5lpMPdi4dBc2D8r66B4r4ZqV7MGfUD70ODuzD8fxQBrx0Jaorrt2Z+h0XPGpkdvHGXwn4MB8LD3YF2fdThYOxT0axNClFGSWGQiiYUQIr8SUxN5etXT3Em5A8Cy7stoUKlBvvfXp6dzbPMinA/PoFa66dP6Ozhz2q8/dXv+BzePws/EXGyS4yF0Aez9ARIeerLtURdavwGNngddEd+2KApc2acmGGc3mm7T2UHTFyD4NahUo2jnKQZpegPr/o1g9vZwTkclmGzzcLZlRJvqvBBUFVf7fFRoAmKTUuk6dSdR8epbjHe71mF0x5pcjr/MyL9GEnFX/d7d7d2Z1XkWdSrWebBz3HXY9wMcng+piaYHrtYO2r4FNZ7I/9iVY7/C6pHq321d4JWd4FbNuPmtf94yThL5erPXGdl4ZP6OK4Qo8ySxyEQSCyFEQSwOW8zkA5MBCPIJYs6Tc9AUcOCxYjBwcu96DDu/o3Gy6dP6e4oN/3r0wK/bu1SuXrfY4s63hGjYPxMOzoWUONNtVYOhzVtQqwtYmWEI3o0wtSvPv8sh02SEaKygXk/16btvYPGfNw93U9JZdvAqP+8MJ+KhrkwBHo6Mah9A72a+2Oq0ORwhZ3sv3OKFn/ahKKCz0vD1IHe+Pf4Ot5PVNyFVnKrw45M/4ufip+5w47T69ijH7+gN8G1euA+68mU4vlz9e5WWMPxP0Kpv3sLjwnnm92fQK3ocrR3585k/cbNzK9x5hBBliiQWmUhiIYQoiDR9Gj3W9OB6ojo/xezOs2nt27rQxws/sZ/bf31F07it6DQPBnSrlaQ64dr5bWo2aVPkuPMUc169YT22NGuFp7rd1TcUVYPMHwcU/9P4QopJzKjwdJm4e6YDy5tVrcArHWrwZD0vrPIqC5aHrzed4ft/zqO1D8eh6kKwUpOXWm61mN15Nh727jm/1dHaQrOBxfNWJzkeZreDO5fU5fbvwOMfGDd/vPdjVpxdAcCgeoMY3zKf5YWFEGWaJBaZSGIhhCioDeEbGL9TvamqW7Euv3b/Ne+5BPIQefkMl9d/TePo33HQpJhsO24bCG3eomHbHmiK+03BtUPqDWvYOtR5oO/T2kDjfmpC4VG7eM+ZXxnjB/bNhLs3TLflNn6giC7fusucneH8dugaKemm1bueqOvJqA41aFHNrcBvqnKSpjfQ7afZRNj8iMZKrbrUzLMZ0ztOw/XSLrVk7LUDpjsZx6GMBCfPYokDUK+Hn7uAogc0MGwdVGsLwI2kG3Rb1Y1kfTI6Kx1re6+linOV4ju3EOKRJIlFJpJYCCEKyqAY6L+uP2G31YnvJrWbRPeA7sVy7NiYKMLWfkudy0uyVJI6r61BXOCrNOlSxEpSBgOc36zesF7ebbrN1gUeu1/hyaUUjPWABxWPdk+D2xdMt2VUPAocAjaORTrNv9dimb09nD9PRJqUENZZaejV1JdRHQKo7eVcpHNk548Lf/Dh7v/DoOgB0CfWYm7lNrS6trRkKmft+Br+/vT++XzhlV3gUBGAaaHTmHN8DgBPV3+aL9p/YZ4YhBCPDEksMpHEQghRGHsj9jJyszqAtbJjZSYGTyQ2JRYPBw8CPQPRWhW8v31maiWpH6gS9hO+imm1nwiNF1frjqBx9zHYO5re6KamprBm+2xuxF/B06UqvTuMwsbGVt2YngonVqoJxc2HZgN38obgjApPRa9OpTfoCb0Rys2km8X2nWDQq5Wkdk2F64dMt9m7QYuMJ/imlaRy+04URWHHuRhmb7/Angu3TPZztNHyQlBVRrStjo+rfdFiB/TpqYQeX8TN+Ct4uFQlsNFglpz9lS8PfmlsUy3endkx4VTWxJru7FlffUPT8Nlif0OThUEPC3up84wA1OsBfReBRkNiaiJPrXqK2BQ1vuXdl1OvUj3zxmMheVV3s2gsJhXE7GhZvWKOc50IUdIkschEEgshRGGN2jyKPRF7sqz3cvBiQssJdPbvXORz6NPTOfrXIlwOf08tven8BHdw4XTVAdTrOY4K7t78+Pv7LI1ZQ4zuQXcp93QDA9268ZJ3VXXcQvx10xO411a7OzXuCzrbIscLsOXyFiYfmEx00oOEqDi/k1xnldbZQbNB6lP9igE5fif9KvbGp/poZm0PJyzS9M2Qu5Mtw9tUY1Ar/3xXeMrLll2TmHx2MdHaBzeHjgaFu5luFvsnJvPezRsmM9Ma/Nti1fYty89OHh8BM1vDPbUCGt2nwmPDAfjl1C98cVB9UxHsE8yPXX60XFxmkt18NNFUIiJ4Is1Chlo0luzmPPFxtWNij/p0bVhK3iIKkYkkFplIYiGEKKwFJxfw9aGvs6zX3J/VeErHKcVzI839SlJ71mHY9R2Nk02f1icptixxq8O0CnfUURKZbkA193+FT7kRQ+ekew928gtSKzzV7lqsFZ62XN7CuG3jUDD9X4c5vhMAok+qlaSO/waZZ4XWWLHGsy7/Z5+Q43dSO6IVh+L7GNdXd3dkZPsA+jTzxc66iG9XMtmyaxLjzi/OEkdmr96J49XYODSAAQ0b9S34Mb07bTqG8E5ICVQHA3Xcza8D1b/r7GHUdvCoQ6o+lZ5rej4oYPDkbFpXLnwBg5J2ZNMCmux5A4DMLwUyusMdaz3NYsnFxhORvPpLKA/feGWENXNQoCQXotSRxCITSSyEEIWhN+gJWRli8lQ+Mw0aPB08Wd9nfdG7AD0k/MR+7mz5hiZx/6DTGNAD3fwqE63VZnvjqlEUPPV61l+NQFv7KWj9ulkqPOkNerqt7lYi3wlx19VJ4kIXQmpint8JioK7XuHauY9oUMWdke2q80Q9L7TF/FZAn55Kt6WtibYix6TCWa9n55XraLW20PQFwqoPpceSSNINChoNLH4piNY13Is1rnxbN1YdQA/g1Qhe2gLWdqwPX8+EnRMAqFexHsu6LytyAYOSoE9PJ+Z/tfFQbpFdTyODAjc0lXB777TZu0XpDQodvtpmnNfkYRrA29WOXeMfl25RolSRxCITSSyEEIVxMOogIzaNKOkwRBkx164eLbp9b6zwNHPbBb7YqM507u1ix59vtsPNsYiTERZGahLM6fRg1vWgV+GpyVkKGExuN5luAd0sH18Rndy9ngabX8izXf/UD9hnqG+BiPK29OVWBNeoVNJhCGFUkHvpR+/xgxBCWMDNpJslHYIoQ2561zMpGzuqfQCt7988RsUnM37lv5TIcz4bB3j2Z3W+DFAnTzz7F1YaK95q/pax2fQj00l9eP6TR0Dczav5audJrHkDKYAbCdm/0RDiUVAy5RCEEKKU83DwyLsRUKtCLZxtir9EaWY371znalr23Y8yc8edKh5+WJlpEHBCagLnYs/l2c5c34mCQvy9dCLj7qFPjSHFLjbPfWqkg6vWfG8CEgxpnNPmnRB4uFQ1Wbay0vBtv6Z0nbqDO0lp/HUqmsX7rzColb+5Qs2Zd0Po8in8+a66vOZVeHUPrSu3JtgnmL2Re7meeJ3lZ5YzqP4gy8dXCBGx9/h510XOHk6kdT565dm6+dDC2bwzjcffS+NMdGKe7Tyd7cwahxDmJF2hhBAiGxljLG4k3cgyUBnU8QReDl5sfHZj8Y8neEhqagohiwK5pdWg5DDGwl2vcOnc57g5OjCsdTUGB/tTwaF4b6hL6jtJ1xtYfzyS2dvDOXW/wpMV6fjX+m+e38nGwaEPyvGagT49lZCFgdywIsc4vAywcUgoWl3Wn8eWU9G8tFAdrG+rs2Lt623NMpdGnhQFlvZ/MPN3jcdh4EpO3TlNv3X9AKhgW4ENz2wweyJdFGeiEpi94wJ/HI0g3aDQxupfFllPznZ8RYZEHLB//wpaa/OW+dUbFNp+8TdRccnZ/OtRuTvZsv+/T8gYC1GqSFcoIYQoIq2Vlgkt1cGrGkz/J5+xPL7leLMnFQA2NrYMcO+tnvuhZ0EZyxWjgzGg49bdVL7ZfJbWk//m47UnuXYnqdjisPR3kpSazoI9l+j49TbeXHbUmFQAVK3kQjurEPXcOXwn/d17mzWpANDqbJhQe2CucYyvPTDbpAKgc30vhgSrbylS0g28sfQIyWl6M0acA40Ges1Q5zsBuPA37JtB/Ur1ebr60wDEpsQy78Q8y8eWB0VR2B9+ixHzDxIydQerQq+TblAIsTrAPOuvjUlFTo9RnUhCu/l9dWJJM9JaaZjYQx3HkVPakJiSxuHLd8wahxDmJG8shBAiF9nN2eDt4M34luOLt6xqPmQ3Z4NHuoH+7r1p/di7/LgjnHX/RpjMKq210tCzSWVGtg+gnk/x/A4093dy+24qC/ZcYuHeS9xJSjPZ1qSKK690qEGXBt5orTS5ficje31W5FjyK7t5LLz1CuNrD6Rz2/dy3Tc5TU+v73dzJjoBgGGtq/FRzwZmjTdHF/6BRb3Vv1tZw0tbuObsTo81PUg3pGOntWPDMxvy3VXQnPQGhc2nopi1PZyjV2NNtg2z28H/8SNWqMlCbIWGpMZG4MltY5sEHHAmU+LdqC/0/sHsExRmN4+FtVZDml79h2urs2LmoEAer+tl1jiEyC+pCpWJJBZCiKIyyyzThZTrzNvA1dtJ/LQznF8PXSU5zfQJbIfaHozqEEBwQCU0RRyHYY7vJLfYO9bxYFT7GrQKqJgl9ry+E0vJbubtnN5UPOxMVAI9v99FSrr6uX8e+hhP1CuhG8u/PoQ909S/V6oJI7fzxbEZ/BL2CwDP1X6OicETSyY21ERs9ZHrzNkRTnjMXZNtvhXsmVplGy3Of/dgZZMXoOd09ApZZ94+vgz+eB2U+9dbrRB4fr46qN2MHp55u6GvC2OWHGHHWbVohNZKwzfPN6F3M1+zxiFEfkhikYkkFkKI8uj23VQW7r3Egj3ZP/Uf1aEGIfef+pe0E9fjmL0jnPUWeNtSmi3ae4kPfz8JQEVHGza+2Q5PlxIYyJueCnO7QMQRdbnZIO6E/I+nVj3F3bS7aDVaVvVaRYBrgEXDiruXxuL9l5m3+xI3E1JMttX1duaV9gH0uDkL7d7pDza0GgNd/pf7JJFh62DFCNDfP2bVYBiwDOwrFP+HyEVquoFxy4+y7t9I47qPetRnWJvqFo1DiIdJYpGJJBZCiPLsXqqe3w5f5ccd4Vy7c89km38lB15uF8BzzasU60zU+aEoCrvP32L2jgvsPBdjss3BRkv/FlUZ0bYaVdzM++S4NFEUhZcXHmZLmNrFrG1NdxaOaIlVSSR/ty7ArHaQdv+NwHPz+NEQw/Qj6k37E1WfYGqnqRYJJTLuHnN3XWTJ/ivcTTUdfxIcUIlXOtagfY0KaNaNhSOLHmx8/ENo958cJy40cXEHLH0BUtXuaHg1gkErwdmyb430BoX/+/0Ei/dfMa5784lavNW5VpHfMgpRWJJYZCKJhRBCqJWVNpyIYvb2C5yMiDfZ5u5kw7DW1RjUqvgrSRUkjkqONgxvY5k4Sqvbd1N56rsdRMerT8/fe6ouozrUKJlgjiyG30erf7d1JenlLXT/+xVu3lO76yx6ahFNPZua7fRnoxOYvT2c34+qg7EzWGngqYY+jGwfQBO/CpCWDKtegrC191tooPsUeKyAE1xGHIFfnoWkW+qyW3UYsgbcqhXDp8k/RVGYsvks0/8+b1w3NNifiT0alEySKco9SSwykcRCCCEeUBSFXedjmL09nF3ns39T8GK76vhWsC/W82a8OZmzM5yrt0vPm5PSaM/5GAb+vB9FAZ2VhlWjW9O4SgXLB6IosPJFOLFSXfZrxW9tRvDJ/v8BEOgZyPyu84v1SbqiKBy8dIfZ2y+w9fQNk202Oiueb16Fl9sFUM3dUV2ZkgDLXlDfOIA64PyZH6HhM4ULIOYcLOoDcfcn1nPyhsGrwMvyg+l/3nWRT9edMi73bFKZb/o2wVorBT2FZUlikYkkFkIIkb2cxjboMsY2dAigrnfRfm/euZvKwr2XWbD3Erfvms7c3MhXrfDUtWHpGOtRmnyx8TQzt10AoLq7I+teb4ujbQnMaZscB7PaQqzaNSe9w3j6xO7lUvwlAKY/Pp2Ofh2LfBqDQWFzWDSzt18g9EqsyTZXe2uGBPszJLgaHs6ZBuXfvQWLn30wFsTaAfotgppFrEwWd01NLmLOqst2rjBwBfi1LNpxC2Hl4Wu8u/Jf9Pf/gXas48HMgc2xt5EEXFiOJBaZSGIhhBC5u3IriZ92hbM8m2pMnep4MKpDDYKqm1ZjeriqTcvqFU2Sg6u3k/h510V+PXiVew/Ny9C+tgevFFN1qrIqTW/guZl7OHYtDoDnmlfh6+eblEwwVw/A3K6g6EFjxdZun/HWyZkA1HCtwYqeK9BZ5Zz05HatpKTrWR16nR93hhN+07TCU2VXO15sF0D/Fn5Zk6osN/8V7t/8tyiez3z3Fix+DiJC1eXiSlruK0hVtc2nohmzJJTU+xXDHvN34+dhLXC1L3pZ3NJU8S6v3ymi5EhikYkkFkIIkT+3ElNYsPcyC/deIvbhSlJ+FXi1QwBP1vdm86moLHX4fVztmNijPn4VHe7PpxFpfMoKaoWnHo19GNm+BvUry+/i/Lh86y5Pf7fTOGB52oBm9GxSuWSC2f4l/KPOC6K4+jG4VkOO3VIrWH3S+hP61OqT7W7Zzdng42rHOyF1iI5PYe7ui9lWeBrVIYDujStn3+3n5lk1qYi/pi47+8Dg1eBZrxg+aCYpCbBsIFzcri5bWcMzs6Hhs0U6bHbzwHg5eDGh5YQc54HZF36LlxccIiElHVC/o4UjWhapalhh4jCXnK6TiT3q07Whj0VjEVlJYpGJJBZCCFEwSanpLD94lTk7L3I91nQ8hKezLTceuhHMjb21ln4t/HipXfVyVeGpuKwKvca45ccAcLbVseHNdvhVLIHv0aCHBT3g8m4AQut0Zmiq+rbA08GTdX3WYa8zHZez8UQkr/4SSn5vMloFVGRUhxp0rO2R85us66Hqm4SMAdYVA2DwGnDzL8SHyof0FHWcSeaB4d2+hhYvFepwWy5vYdy2cSgPfSsZM9dP6Tglx5v6E9fjGDr3ALfudymsWtGBX14Momqlgl8PRYmjuOV0nWRcATMHBUpyUcIkschEEgshhCicdL2B9ccjmbU9nLDI+Lx3yKSio1ppanArf9wcy2eFp+Ly1rIjrDkaAUBg1QosHxWMriQG8MZdg5mt1XEXwOuNH2dbglq56M3AN3mp0YObbb1Boe0Xf5s8gc7JUw29GdWhBk39KuTe8OIOWDoAUhPVZe9GMGgVOHkW6uPkm0EPa980LWXb6QNo/3b+StnepzfoCVkZYvKG4GFeDl780fuPHLsjXbyZyIj5h4iIUxN+dydbfh76GHULMM+L3qCnx5oe3Ei6ke12DRq8HLzY+OxGs3eLyus60QDernbsGv+4dIsqQZJYZCKJhRBCFI2iKOw8F8MXG09nKRGbnRFtqvFOSF0ZYFpMEpLTeHraTmM1rTcer8m4LnVKJphTv8PyIQBcsHfmGe9KGDDgbO3Mhmc2UMGuAgB7L9xiwJx9eR7u275N6BNYJe/zhq2DFcNBf78AQNXW8MIydWC1JSgKbJkIuzPN6N1qNHT5LPfJ9zI5GHWQEZsKWAK3BM0NmUsL72Ias5KD/F4nS19uRXCNSmaNReSsIPfSUrNMCCFErjQaDe1rezCyff5mWm7iV0GSimLkbGfNd/2bGZ/Yfv/PefaH3yqZYOr3gsChANS4l0DvdHVQdUJaAnOOzwHUCk9bw3J+Kp9ZvuZlOPILLB/8IKmo3VUtAWuppALUNxNPfgKdP36wbt8P6jwf+rSc98vkUPQhMwVnHjm90ShOpyLj8tXuRkLeb75E6VACteuEEEI8ijyd8zdQNL/tRP4FVnVj3JO1+WrTGQwKjP31KH++2R5Xh6JXBiqwrpPgyl6IOcvoiIusr+pHCgaWnl6Ka2onft2XyIWHKjzlJM9rZc90+OuDB8uN+0GvGaAtgc8N0PYtcKiodo1SDHBsKdyLhefngXXWuV8MioFd13cx98RcDkcfztcp6rjVwcU27x4WeoPC2agEYu+piY1GAzU9nHDPXJI3G/Ep8Zy5cybP4886NgtrK2ueqPpEsXeJ+vdaLLO3h7PheGS+2q85cp2Gvq7U8HAq1jhE8ZOuUEIIIfIloz90VFxytgNypT+0eekNCgN/2se+8NuAOjbhh4GBJVOyN/Jf+OkJ0Kcy1c2Vnyuobw/SYpuRHNkvz93zvFYUBbZ+DLu+fbAu6FUI+TzfXY/M6tQf6qDujLco/m1gwFLjW5Q0fRp/XvqTeSfmcT72fC4HeqAwYxtS0vWM+/UY6+/foGs08EnPBgwOrpbjPhljPW4k3cgyeDs7VZ2rMrTBUHrW6ImdrvAPDRRFYce5GGZvv8CeCwV/46bRQJf6XozqUIPAqm6FjkMUnHSFEkIIUey0Vhom9qgPPKjYkiFjeWKP+pJUmInWSsO3/ZpS4f5bij9PRPHrwaslE4xPYxLaqW8SRsTF46JX51jQuR7FyjaCoOoVGdOpBhoKca1kDJbOnFR0el99U1IakgqA+j3VeTNs7j9Bv7wb5nfn7p3LLDi5gKdWPcX7u943SSqqu1anf53+wIPqSxkylse3HF+gtwO2Oi3TBjRjQMuqgJqPffj7Sb7bco6cnhtrrbRMaDkh1zh8nXyN664kXOHTfZ8SsjKEH//9kbiU/HVfypCuN/D70es8PW0XQ+ceMEkq3J1s6dW0crbXSQY7nZXxs206Gc0zP+yh7+y9/H06GoOhTD8bfyTJGwshhBAFIjXnS9amk1GMWqR2q7G31rL29bbU9LRcF5HzNxKZsyOc1UeuMdtqMp20x1jg4szXldSnyI0rBrG4x09AIa6V9BRY9bI6SBwADTz9FbR82dwfq3CuH4ZfniMmJZYlrs4sc3Ul4aE75KYeTRnRcAQd/DpgpbHKdv4IbwdvxrccX+gSr4qi8NWmM/xwf7Z2gGGtq/F/3evnOI4ltzieqPoEeyP3MvfEXPZH7jfZz15nz3O1n2NI/SF4O3rnGFNSajq/HrzKT9mUra7u7sjI9gH0aeaLnbU21+ukTU13luy/wtzdF4mONy11XdvLiVHta9CjSWVsdKUk6SyDpCpUJpJYCCFE8ZNZckvW+6uPs3j/FQDq+7iwekxrbHXmHTB/+PIdZm2/wOZTD25EKxHHRtsJuGri6FGlMhHW6tDNn7r8RJBPEFCAayUlEX4dBOH/qMtWOugzGxo9Z9bPVRSX4y8z/9B3/HHlL1If6pLW0a8jIxqOoJlnsyz7mWvG6zk7wvlsQ5hxuU8zX758rnH2Ew3mM46Tt04y/8R8/rr8FwbFYFyv0+h4OuBphjUYRi23Wsb1txJTWLDnEgv3Xc51os2Hr4G8rpOUdD2/H41g9vYLWcbw+Lja8WLb6vRvWRWnh2dpF0UmiUUmklgIIYQoa+6l6un5/S7O3VDndBjRpjr/d7+bWnEyGBT+Pn2D2TsucPDSHZNtznY6BrfyZ6TvRSqs7M9aJwf+6+EOQINKDVjSbQlWmnw+RU66rU58d/3+AGedPfT7BWpZdgbo/Dp+8zjzTs5jy+UtJuMUdIpC98S7DE/WENB/OVR5zOKxLT90lQkr/yWjl9ATdT2ZMTAQO+uiJS5X46+y4NQC1pxfQ4re9M1BO992dKv6AntPuvLb4WukpBtMtneq48ErHWrQsnrFIo8JMhgUtp6+weztFzh02fSadLHTMTjYn2Gtq+ORxyB2kX+SWGQiiYUQQoiyKCwynl4zdpN6/yZu3vAWdKpTPJPFpaar/eJ/3BFuTF4yeLuoT4cHBGV6OrzpfQx7v6dvZW/O2KoTIn7V4Su6Vuua98niI2BRH7h5Wl22c4UXfoOqQcXyWYqLoijGCk8Pl451tHakb/XuDPx3E16R/6orrR2h/y9Q43GLx7rpZBSvLz1ivDZaVqvInKGP4Wpf9Gpat5Nvs/T0UpaeXpplvIX+nh+ptzqQnlAfnZWWnk0rM7J9AHW9zXP/dejSbWZtD2fLQ+WNbXRWPBtYhZfbVSdAKkkVmSQWmUhiIYQQoqyav/siH609BYC7kw1/vtm+SE9qE5LTWHrgCnN3XSIq3nTugFqeTozqUIOe2fVnT0+BnzqzO+4sr3iryY2fsx+/9/od69xKw8acV5OKOLVbF05e6mza3g0L/RmKW5ohjY0XNzL3xNwsFZ7c7d0ZVG8Qz9d5HhcbF0iOh2UvwKWdagMra3j2J2jQ2+Jx77kQw8iFh0lMSQegno8LC0e0LJYn+YqisOXMVb7es4hrho1YWceabHey8mFEo2EMafQstlrzvzk4fyOBH3eEs/rIddL0D25rNRoIqe/NKx3zMbO7yJEkFplIYiGEEKKsUhSFFxcc4u/T6mRm7Wt7MH9Yi/xNPJfJjfhk5u6+xOJ9l0m4fyOaoWW1iozqEECnOp65HzfmHMrs9rxcyYn99mpZ0v8G/ZcBdQdk3z7yGCx6BpJi1GW3ajB4DVSsXqDYzSUpLYmV51ay8NRCou5GmWyr5lKN4Q2H0z2gOzZaG9Md05LVUrSn191foYEeU6H5MEuEbeL4tTiGzjvA7btqWdxqlRxY9GIQfhUdCnW8dL2B9ccjmb09nFOR8ffX6tG5HMfBYweKTYRJ+0p2lRhUfxDP134eV1vzT2gYFZfMvN0XWbz/ijGhyhBUvSKvdKhBxzoeJVOi+REmiUUmklgIIYQoy24lptD1u53cTFD7vX/QrR4vtcvfLOkXbqoVnlaFXidV/6BffMacASPb16C5fwHmDAhdyMmN/6G/r1otqKKNKxue24SjtaNpu0u7YWl/SLl/c+rVEAatBOecqwxZSsy9GJaELeHXM78Snxpvsq2xR2NGNBxBJ79OuY8f0aerJXOP/vJg3RMToe1Y9cu1oAs3Exn8034i7ldc8nKxZeGIIOp4O+f7GEmp6Sw/eJWfdl3k2h3TCk/VKjnwcvsAnmnmS+jN/cw7MY/9UaaVpBx0DjxX+zkG1x+cayWp4hKfnKZWktp1kRsJpuNB6ng5M6pDAD2aVM5xULswJYlFJpJYCCGEKOt2nrvJ4J8PAGCt1bDildYkpepzrLATeuUOs7df4K9T0WS+C7DRWvFMoC8vtw8o3CzHigK/DeOdmzvZ6KQmE6MavkSQlRM346/g4VKVQFtPtKtegvT7Xa38WsELv4J9hcJ+/HzJqwLSlfgrLDipDk5ONaSa7NuhSgdjhad8P+1WFHXW8L3fP1jX+nV48lN11u7LeyAxWu3+5d8ainl268wiYu8x+Of9xmpKrvbWzBvegiaVnTm9fxP37lzH3s2XukEhaHUPqirdvpuqVnjae4k7D1d4quLKKx1q0KVB1gpPJ2NOMvfEXLZc2ZJtJanhDYZT062myT769FRCjy96cJ00GoxW99DboAJKSdez5sh1Zu8IJ/yhSlKVXe14sV0A/Vv44fhQJanU1BTWbJ/NjfgreLpUpXeHUdjYWH4weGmJo8wlFjNmzOCrr74iKiqKJk2aMH36dFq2bJmvfSWxEEIIUR5M2hDG7B3hgDqZnj7T5GE+rnZ82K0+ttZWzN4ezoFLt032dbbVMbCVPyPaVMPTpfCzKwNw7w5Xf2xHT1dI12jUG+xMN+Ne6elMuHWHzkn3oFYXeH4B2BSua05+ZTdng5eDFxNaTsDb0Vu9CX64wpOVjm7VuzGswbAsN8H5piiweyps+ejBumrt4dZ5SMjUbcilMnT9Qp14z0xu301l+LwDHLumDrjurjvEB7oFePNgwrpoKhERPBH3Fs8zZ2c4yw9dJTnNtMJTxzoejGpfg1YBeVd4uhJ/hYWnFmZbSapDlQ4MbzicQM9Atu6ezOSzi4nWZrpO9AoTag+kc9v3ivrRMRgUtoRFM2v7BUKvxJpsc7W3ZnArf4a2roaHsy0//v4+S2PWEJNpHJF7uoEB7r0Z2euzIseSX6UlDihjicWvv/7KkCFDmDVrFkFBQUydOpXffvuNM2fO4OmZd/ULSSyEEEKUB6npBjpP2c6V20n53sfLxVat8NSyKs52Ra8YZHR5L6+uH8wuR/ssmzT3bzumGCrReegWyG1wdzHYcnkL47aNM0kacuOgc+D52s8zqP6g4uu2c3g+rBurvqnI1v0b6r4LzZpcJKakM2rRIZzC/2Sm9VQAMr9syMhFR6e9xUbDgwe4WisNPZuoFZ7q+RT8XurWvVssOb2EZaeXZele5m9bicvJ98fZZEpUjNdJzeJJLjIcvHSb2dsvsCXshsl6G50VT/v+yd/2f6tXSjaxvObWyyI39T/+/j7f3/m9xOPIUKYSi6CgIFq0aMH336uvEg0GA35+frz++utMmDAhz/0lsRBCCFEe6A0KrT7fys3ElDzb1vR0YmT7AHo39TXLjMX69FSeXNiMm1aa7McUKApeBlg9YFeRu7vkGodBT+8/enMj6UaebTMGGvet01et8FTcTqyCFcNzaaABZx8Yvc+s3aKSUlKI/6Y5ntwhu7H4BgWiqciTKV9iZ2PNs4FVGBLsj2+FrEligc+dlsSq8LUsPLOMyKSovHfIuE6e3Yj24UHyRXQh5i6L9l5i48ko0vUKVuhxrzmZW9qcr1l3vcIvPdZjY22+azY1LZVBa7sRk0McmvtxbBwcarFuUWUmsUhNTcXBwYEVK1bQu3dv4/qhQ4cSGxvL77//nmWflJQUUlIe/FKNj4/Hz89PEgshhBBl2t4LtxgwZ1+e7d7uUpvRHWsWuHJUQRw88jMj/p1qtuMXt6H1h/J64OvmLY16cScs6G6+4z9C0oCNjg7McHPlurV531iVVR9Wfpm+T75hkXMVJLEo1cPhY2Ji0Ov1eHl5maz38vIiKir7THfSpEm4uroa//j5+VkiVCGEEKJE3UhIzrsR4FfRwaxJBcDN+CtmPX5xq1+pvvnnW0iMzrtNOWEN9LibxOt34vJsK7J3o5T+G9Pl3eTR8t577zFu3DjjcsYbCyGEEKIs83TO36Dr/LYrCg+XqvlqV8/RD1cXX7PFEZcSR9jtsDzbeTh4mC0GIyevvNsA+DQB+wKU+C2gxDsxON05kXc7t4Y4ubmbLQ4Az5QbQGKe7erprXA1c+J3Jz2FM7qcxsA8EJACDpjvLUsSaYTn46N65vPfmKWV6sTC3d0drVZLdLRplh8dHY23d/YDqmxtbbG1tXwpLiGEEKIktaxeER9XO6LikrMdpqwBvF3V0rPmFthoMF5HvuWGFSg59BP3MsDS3mvMPsYiZGUIN5JuZDt4W4MGLwcvAj0DzRaDkX9rtfpTfCTk9BNyqQwv/2PWMRb26elE/682HsqtHMdY3NBUwmPMdtCZ9zYxMD0Vr4WBeV8nQw6a9ToBtbRryKJAbmk1Ocbirlf4bZh5xzbkN47eHUaZLYaiKNVdoWxsbGjevDlbt241rjMYDGzdupXg4OASjEwIIYQoXbRWGib2qA8YawwZZSxP7FE/y5wDZolFZ8OE2gPVcz80lDNjeXztgWa/WdRaaZnQUi30onnoW8lYHt9yvMl8FmZjpVVLyt4/u6n7y10nmzWpANDqdEQETwQeVIHKkLEcGTzRZD4L88VSOq4TABsbWwa49841lv7uvc0+YLq0xFFYpTqxABg3bhxz5sxhwYIFhIWF8eqrr3L37l2GD8+tsoIQQghR/nRt6MPMQYF4u5p2d/J2tWPmoEC6NvSxWCyd277HlJoD8Xyod4mXofhLiOYah39npnScgqeDaYl6LwcvpnScQmf/zhaJA1BLyfZdCC4P/RxcKpu91GxmzUKGcqz1NG5qKpmsv6GpxLHW02gWMtQicUDpuU4ARvb6jNfcelFJb3pD765XLFritbTEURiluipUhu+//944QV7Tpk2ZNm0aQUFB+dpXys0KIYQob/QGhQMXb+c487ZFYzHDjMqFiiOPmbctyqC36MzbOdGnp+c687ZlYykd1wmUnhmvS0scZabcbHGQxEIIIYQQQojCKTPlZoUQQgghhBCPBkkshBBCCCGEEEUmiYUQQgghhBCiyCSxEEIIIYQQQhSZJBZCCCGEEEKIIpPEQgghhBBCCFFkklgIIYQQQgghikwSCyGEEEIIIUSRSWIhhBBCCCGEKDJJLIQQQgghhBBFJomFEEIIIYQQosh0JR2AuSmKAkB8fHwJRyKEEEIIIcSjJeMeOuOeOjdlPrFISEgAwM/Pr4QjEUIIIYQQ4tGUkJCAq6trrm00Sn7Sj0eYwWAgIiICZ2dnNBpNgfaNj4/Hz8+Pq1ev4uLiYqYIRVkg14rID7lORH7JtSLyQ64TkR9FvU4URSEhIYHKlStjZZX7KIoy/8bCysqKKlWqFOkYLi4u8g9W5ItcKyI/5DoR+SXXisgPuU5EfhTlOsnrTUUGGbwthBBCCCGEKDJJLIQQQgghhBBFJolFLmxtbZk4cSK2trYlHYoo5eRaEfkh14nIL7lWRH7IdSLyw5LXSZkfvC2EEEIIIYQwP3ljIYQQQgghhCgySSyEEEIIIYQQRSaJhRBCCCGEEKLIyn1iMWPGDKpVq4adnR1BQUEcOHAg1/a//fYbdevWxc7OjkaNGrFhwwYLRSpKWkGulTlz5tCuXTvc3Nxwc3Ojc+fOeV5bomwo6O+UDMuWLUOj0dC7d2/zBihKjYJeK7GxsYwZMwYfHx9sbW2pXbu2/D+oHCjodTJ16lTq1KmDvb09fn5+jB07luTkZAtFK0rCjh076NGjB5UrV0aj0bBmzZo899m2bRuBgYHY2tpSs2ZN5s+fXzzBKOXYsmXLFBsbG2Xu3LnKyZMnlZdfflmpUKGCEh0dnW373bt3K1qtVvnyyy+VU6dOKR988IFibW2tHD9+3MKRC0sr6LXywgsvKDNmzFCOHDmihIWFKcOGDVNcXV2Va9euWThyYUkFvU4yXLx4UfH19VXatWun9OrVyzLBihJV0GslJSVFeeyxx5Snn35a2bVrl3Lx4kVl27ZtytGjRy0cubCkgl4nixcvVmxtbZXFixcrFy9eVDZt2qT4+PgoY8eOtXDkwpI2bNigvP/++8qqVasUQFm9enWu7cPDwxUHBwdl3LhxyqlTp5Tp06crWq1W2bhxY5FjKdeJRcuWLZUxY8YYl/V6vVK5cmVl0qRJ2bbv27ev0q1bN5N1QUFByqhRo8wapyh5Bb1WHpaenq44OzsrCxYsMFeIohQozHWSnp6utG7dWvnpp5+UoUOHSmJRThT0Wpk5c6YSEBCgpKamWipEUQoU9DoZM2aM8vjjj5usGzdunNKmTRuzxilKj/wkFu+++67SoEEDk3X9+vVTQkJCinz+ctsVKjU1lcOHD9O5c2fjOisrKzp37szevXuz3Wfv3r0m7QFCQkJybC/KhsJcKw9LSkoiLS2NihUrmitMUcIKe5188skneHp68uKLL1oiTFEKFOZa+eOPPwgODmbMmDF4eXnRsGFDPv/8c/R6vaXCFhZWmOukdevWHD582NhdKjw8nA0bNvD0009bJGbxaDDn/ayuyEd4RMXExKDX6/Hy8jJZ7+XlxenTp7PdJyoqKtv2UVFRZotTlLzCXCsPGz9+PJUrV87yD1mUHYW5Tnbt2sXPP//M0aNHLRChKC0Kc62Eh4fz999/M3DgQDZs2MD58+cZPXo0aWlpTJw40RJhCwsrzHXywgsvEBMTQ9u2bVEUhfT0dF555RX++9//WiJk8YjI6X42Pj6ee/fuYW9vX+hjl9s3FkJYyuTJk1m2bBmrV6/Gzs6upMMRpURCQgKDBw9mzpw5uLu7l3Q4opQzGAx4enry448/0rx5c/r168f777/PrFmzSjo0UYps27aNzz//nB9++IHQ0FBWrVrF+vXr+fTTT0s6NFFOlNs3Fu7u7mi1WqKjo03WR0dH4+3tne0+3t7eBWovyobCXCsZvv76ayZPnsyWLVto3LixOcMUJayg18mFCxe4dOkSPXr0MK4zGAwA6HQ6zpw5Q40aNcwbtCgRhfmd4uPjg7W1NVqt1riuXr16REVFkZqaio2NjVljFpZXmOvkww8/ZPDgwbz00ksANGrUiLt37zJy5Ejef/99rKzkebLI+X7WxcWlSG8roBy/sbCxsaF58+Zs3brVuM5gMLB161aCg4Oz3Sc4ONikPcDmzZtzbC/KhsJcKwBffvkln376KRs3buSxxx6zRKiiBBX0Oqlbty7Hjx/n6NGjxj89e/akU6dOHD16FD8/P0uGLyyoML9T2rRpw/nz543JJ8DZs2fx8fGRpKKMKsx1kpSUlCV5yEhG1XG9Qpj5frbIw78fYcuWLVNsbW2V+fPnK6dOnVJGjhypVKhQQYmKilIURVEGDx6sTJgwwdh+9+7dik6nU77++mslLCxMmThxopSbLScKeq1MnjxZsbGxUVasWKFERkYa/yQkJJTURxAWUNDr5GFSFar8KOi1cuXKFcXZ2Vl57bXXlDNnzijr1q1TPD09lf/9738l9RGEBRT0Opk4caLi7OysLF26VAkPD1f++usvpUaNGkrfvn1L6iMIC0hISFCOHDmiHDlyRAGUKVOmKEeOHFEuX76sKIqiTJgwQRk8eLCxfUa52XfeeUcJCwtTZsyYIeVmi8v06dOVqlWrKjY2NkrLli2Vffv2Gbd16NBBGTp0qEn75cuXK7Vr11ZsbGyUBg0aKOvXr7dwxKKkFORa8ff3V4AsfyZOnGj5wIVFFfR3SmaSWJQvBb1W9uzZowQFBSm2trZKQECA8tlnnynp6ekWjlpYWkGuk7S0NOWjjz5SatSoodjZ2Sl+fn7K6NGjlTt37lg+cGEx//zzT7b3HBnXxtChQ5UOHTpk2adp06aKjY2NEhAQoMybN69YYtEoirwbE0IIIYQQQhRNuR1jIYQQQgghhCg+klgIIYQQQgghikwSCyGEEEIIIUSRSWIhhBBCCCGEKDJJLIQQQgghhBBFJomFEEIIIYQQosgksRBCCCGEEEIUmSQWQgghhBBCiCKTxEIIIcqgYcOG0bt3b+Nyx44deeuttywex7Zt29BoNMTGxlr83NWqVWPq1KlFOsb8+fOpUKFCrm0++ugjmjZtalwuLd+9EEJYmiQWQghhIcOGDUOj0aDRaLCxsaFmzZp88sknpKenm/3cq1at4tNPP81XW0snA9WqVTN+L46OjgQGBvLbb79Z5NzF4e2332br1q05bn/4uy+OhEcIIUojSSyEEMKCunbtSmRkJOfOneM///kPH330EV999VW2bVNTU4vtvBUrVsTZ2bnYjlfcPvnkEyIjIzly5AgtWrSgX79+7NmzJ9u2xfm9FAcnJycqVaqU4/bS/t0LIURxkcRCCCEsyNbWFm9vb/z9/Xn11Vfp3Lkzf/zxB/CgC81nn31G5cqVqVOnDgBXr16lb9++VKhQgYoVK9KrVy8uXbpkPKZer2fcuHFUqFCBSpUq8e6776Ioisl5H+6Ok5KSwvjx4/Hz88PW1paaNWvy888/c+nSJTp16gSAm5sbGo2GYcOGAWAwGJg0aRLVq1fH3t6eJk2asGLFCpPzbNiwgdq1a2Nvb0+nTp1M4syNs7Mz3t7e1K5dmxkzZmBvb8/atWsB9Qn/p59+ypAhQ3BxcWHkyJEArFy5kgYNGmBra0u1atX45ptvshw3ISGBAQMG4OjoiK+vLzNmzDDZPmXKFBo1aoSjoyN+fn6MHj2axMTELMdZs2YNtWrVws7OjpCQEK5evWrc9nBXqIdl/u47duzI5cuXGTt2rPEtzd27d3FxccnyXa5ZswZHR0cSEhLy9R0KIURJk8RCCCFKkL29vckT+K1bt3LmzBk2b97MunXrSEtLIyQkBGdnZ3bu3Mnu3btxcnKia9euxv2++eYb5s+fz9y5c9m1axe3b99m9erVuZ53yJAhLF26lGnTphEWFsbs2bNxcnLCz8+PlStXAnDmzBkiIyP57rvvAJg0aRILFy5k1qxZnDx5krFjxzJo0CC2b98OqAnQM888Q48ePTh69CgvvfQSEyZMKPB3otPpsLa2Nvlevv76a5o0acKRI0f48MMPOXz4MH379qV///4cP36cjz76iA8//JD58+ebHOurr74y7jdhwgTefPNNNm/ebNxuZWXFtGnTOHnyJAsWLODvv//m3XffNTlGUlISn332GQsXLmT37t3ExsbSv3//An8uULtFValSxfiGJjIyEkdHR/r378+8efNM2s6bN4/nnntO3nYIIR4dihBCCIsYOnSo0qtXL0VRFMVgMCibN29WbG1tlbffftu43cvLS0lJSTHus2jRIqVOnTqKwWAwrktJSVHs7e2VTZs2KYqiKD4+PsqXX35p3J6WlqZUqVLFeC5FUZQOHToob775pqIoinLmzBkFUDZv3pxtnP/8848CKHfu3DGuS05OVhwcHJQ9e/aYtH3xxReVAQMGKIqiKO+9955Sv359k+3jx4/PcqyH+fv7K99++63xs33++ecKoKxbt864vXfv3ib7vPDCC8qTTz5psu6dd94xOb+/v7/StWtXkzb9+vVTnnrqqRxj+e2335RKlSoZl+fNm6cAyr59+4zrwsLCFEDZv3+/oiiKMnHiRKVJkybG7Zl/zopi+t0//Hkz7N+/X9FqtUpERISiKIoSHR2t6HQ6Zdu2bTnGKoQQpY28sRBCCAtat24dTk5O2NnZ8dRTT9GvXz8++ugj4/ZGjRphY2NjXD527Bjnz5/H2dkZJycnnJycqFixIsnJyVy4cIG4uDgiIyMJCgoy7qPT6XjsscdyjOHo0aNotVo6dOiQ77jPnz9PUlISTz75pDEOJycnFi5cyIULFwAICwsziQMgODg4X8cfP348Tk5OODg48MUXXzB58mS6detm3P7w5wkLC6NNmzYm69q0acO5c+fQ6/U5nj84OJiwsDDj8pYtW3jiiSfw9fXF2dmZwYMHc+vWLZKSkoxtdDodLVq0MC7XrVuXChUqmBynqFq2bEmDBg1YsGABAL/88gv+/v60b9++2M4hhBDmpivpAIQQojzp1KkTM2fOxMbGhsqVK6PTmf4adnR0NFlOTEykefPmLF68OMuxPDw8ChWDvb19gffJGHewfv16fH19TbbZ2toWKo7M3nnnHYYNG4aTkxNeXl5oNBqT7Q9/L8Xh0qVLdO/enVdffZXPPvuMihUrsmvXLl588UVSU1NxcHAo9nPm5qWXXmLGjBlMmDCBefPmMXz48CzfgxBClGbyxkIIISzI0dGRmjVrUrVq1SxJRXYCAwM5d+4cnp6e1KxZ0+SPq6srrq6u+Pj4sH//fuM+6enpHD58OMdjNmrUCIPBYBwb8bCMNyaZn/zXr18fW1tbrly5kiUOPz8/AOrVq8eBAwdMjrVv3748PyOAu7s7NWvWxNvbO1830/Xq1WP37t0m63bv3k3t2rXRarU5nn/fvn3Uq1cPgMOHD2MwGPjmm29o1aoVtWvXJiIiIsu50tPTOXTokHH5zJkzxMbGGo9TUDY2NibfbYZBgwZx+fJlpk2bxqlTpxg6dGihji+EECVFEgshhCjFBg4ciLu7O7169WLnzp1cvHiRbdu28cYbb3Dt2jUA3nzzTSZPnsyaNWs4ffo0o0ePznUOimrVqjF06FBGjBjBmjVrjMdcvnw5AP7+/mg0GtatW8fNmzdJTEzE2dmZt99+m7Fjx7JgwQIuXLhAaGgo06dPN3bfeeWVVzh37hzvvPMOZ86cYcmSJVkGUxeX//znP2zdupVPP/2Us2fPsmDBAr7//nvefvttk3a7d+/myy+/5OzZs8yYMYPffvuNN998E4CaNWuSlpbG9OnTCQ8PZ9GiRcyaNSvLuaytrXn99dfZv38/hw8fZtiwYbRq1YqWLVsWKvZq1aqxY8cOrl+/TkxMjHG9m5sbzzzzDO+88w5dunShSpUqhTq+EEKUFEkshBCiFHNwcGDHjh1UrVqVZ555hnr16vHiiy+SnJyMi4sLoN5kDx48mKFDhxIcHIyzszN9+vTJ9bgzZ87kueeeY/To0dStW5eXX36Zu3fvAuDr68vHH3/MhAkT8PLy4rXXXgPg008/5cMPP2TSpEnUq1ePrl27sn79eqpXrw5A1apVWblyJWvWrKFJkybMmjWLzz//3CzfS2BgIMuXL2fZsmU0bNiQ//u//+OTTz4xlsbN8J///IdDhw7RrFkz/ve//zFlyhRCQkIAaNKkCVOmTOGLL76gYcOGLF68mEmTJmU5l4ODA+PHj+eFF16gTZs2ODk58euvvxY69k8++YRLly5Ro0aNLN3ZMrphjRgxotDHF0KIkqJRlIeKnQshhBCiRCxatIixY8cSERFhMohfCCEeBTJ4WwghhChhSUlJREZGMnnyZEaNGiVJhRDikSRdoYQQQogS9uWXX1K3bl28vb157733SjocIYQoFOkKJYQQQgghhCgyeWMhhBBCCCGEKDJJLIQQQgghhBBFJomFEEIIIYQQosgksRBCCCGEEEIUmSQWQgghhBBCiCKTxEIIIYQQQghRZJJYCCGEEEIIIYpMEgshhBBCCCFEkUliIYQQQgghhCiy/wdvEVf86WVc1wAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "probs_standard = df_oof_compare[\"StandardModel\"].values\n", + "probs_raw = df_oof_compare[\"ConformalRaw\"].values\n", + "probs_norm = df_oof_compare[\"ConformalNorm\"].values\n", + "\n", + "plt.figure(figsize=(8, 5))\n", + "bins = np.linspace(0, 1, 21)\n", + "\n", + "\n", + "def plot_percentage_line(probs, bins, label, color):\n", + " counts, bin_edges = np.histogram(probs, bins=bins)\n", + " percent = 100 * counts / len(probs)\n", + " bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2\n", + " plt.plot(bin_centers, percent, marker=\"o\", label=label, color=color, linewidth=2)\n", + "\n", + "\n", + "plot_percentage_line(probs_standard, bins, \"Standard Model\", \"tab:blue\")\n", + "plot_percentage_line(probs_raw, bins, \"Conformal Raw\", \"tab:orange\")\n", + "plot_percentage_line(probs_norm, bins, \"Conformal Norm\", \"tab:green\")\n", + "\n", + "plt.xlabel(\"Predicted Probability\")\n", + "plt.ylabel(\"Percentage (%)\")\n", + "plt.title(\"Percentage of Predicted Probabilities in Each Bin (OOF)\")\n", + "plt.legend()\n", + "plt.tight_layout()\n", + "plt.show()" ] }, { @@ -631,7 +767,7 @@ } ], "source": [ - "### 3.3 Visualizing Uncertainty and Prediction Sets\n", + "# 3.3 Visualizing Uncertainty and Prediction Sets\n", "\n", "plt.figure(figsize=(8, 4))\n", "plt.hist(p1_cp, bins=20, alpha=0.7, label=\"CrossConformalCV Probabilities\")\n", @@ -643,7 +779,6 @@ "plt.show()\n", "\n", "\n", - "\n", "# Get conformal prediction sets (list of sets per sample)\n", "conf_pred_sets = cc_clf.predict_conformal_set(smiles_test, confidence=0.9)\n", "\n", @@ -666,11 +801,13 @@ "})\n", "display(df_cp_class.head())\n", "\n", + "\n", "def coverage_and_set_size(y_true, conf_sets):\n", " covered = [y in s for y, s in zip(y_true, conf_sets)]\n", " avg_size = np.mean([len(s) for s in conf_sets])\n", " return np.mean(covered), avg_size\n", "\n", + "\n", "coverage, avg_set_size = coverage_and_set_size(y_test_cp, conf_pred_sets)\n", "error = 1 - coverage\n", "empty = np.mean([len(s) == 0 for s in conf_pred_sets])\n", @@ -683,8 +820,7 @@ "print(\"Brier:\", brier_score_loss(y_test_cp, p1_norm))\n", "print(\"AUROC:\", roc_auc_score(y_test_cp, p1_norm))\n", "print(\"F1:\", f1_score(y_test_cp, (p1_norm >= 0.5).astype(int)))\n", - "print(\"MCC:\", matthews_corrcoef(y_test_cp, (p1_norm >= 0.5).astype(int)))\n", - "\n" + "print(\"MCC:\", matthews_corrcoef(y_test_cp, (p1_norm >= 0.5).astype(int)))\n" ] }, { @@ -878,8 +1014,7 @@ ], "source": [ "\n", - "## 4. Regression: Conformal Prediction and Interval Evaluation\n", - "\n", + "# 4. Regression: Conformal Prediction and Interval Evaluation\n", "\n", "\n", "# --- Prepare regression data (filter NaNs as before) ---\n", diff --git a/pyproject.toml b/pyproject.toml index 053d92da..289f2f77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -204,9 +204,6 @@ exclude = ["tests", "docs"] [tool.setuptools.package-data] "molpipeline" = ["py.typed"] -[tool.uv.sources] -molpipeline = { workspace = true } - [dependency-groups] dev = [ "bandit>=1.8.3", @@ -216,7 +213,6 @@ dev = [ "flake8>=7.2.0", "interrogate>=1.7.0", "isort>=6.0.1", - "molpipeline[chemprop]", "mypy>=1.15.0", "pre-commit>=4.2.0", "pydocstyle>=6.3.0", @@ -225,4 +221,4 @@ dev = [ "rdkit<2025.3.3", # only temporarily, see https://github.com/kuelumbus/rdkit-pypi/issues/132 "rdkit-stubs>=0.8", "ruff>=0.11.4", -] +] \ No newline at end of file diff --git a/tests/test_experimental/test_uncertainty/__init__.py b/tests/test_experimental/test_uncertainty/__init__.py index b5f6abf0..269df2fa 100644 --- a/tests/test_experimental/test_uncertainty/__init__.py +++ b/tests/test_experimental/test_uncertainty/__init__.py @@ -1,4 +1 @@ -"""Unit tests for conformal prediction wrappers in molpipeline.experimental.uncertainty.conformal. -""" - -"Uncertainty test module" \ No newline at end of file +"""Unit tests for conformal prediction wrappers in molpipeline.experimental.uncertainty.conformal.""" diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index 87cbc420..dbc560fd 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -35,7 +35,7 @@ def test_unified_conformal_classifier(self) -> None: def test_unified_conformal_regressor(self) -> None: """Test UnifiedConformalCV with a regressor.""" - x, y = make_regression(n_samples=100, n_features=10, random_state=42) + x, y, _ = make_regression(n_samples=100, n_features=10, random_state=42) x_train, x_calib, y_train, y_calib = train_test_split( x, y, test_size=0.2, random_state=42, ) @@ -62,7 +62,7 @@ def test_cross_conformal_classifier(self) -> None: def test_cross_conformal_regressor(self) -> None: """Test CrossConformalCV with a regressor.""" - x, y = make_regression(n_samples=100, n_features=10, random_state=42) + x, y, _ = make_regression(n_samples=100, n_features=10, random_state=42) reg = RandomForestRegressor(random_state=42) ccp = CrossConformalCV(reg, estimator_type="regressor", n_folds=3) ccp.fit(x, y) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 9fd85a94..d308f412 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,3 +1,5 @@ +# pylint: disable=too-many-locals, import-outside-toplevel, invalid-name + """Test functionality of the pipeline class.""" from __future__ import annotations @@ -381,10 +383,13 @@ def test_conformal_pipeline_classifier(self) -> None: This test does not take any parameters and does not return a value. """ - from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV + from molpipeline.experimental.uncertainty.conformal import ( + CrossConformalCV, + UnifiedConformalCV, + ) # Use the global test data - smiles = TEST_SMILES + smiles = np.array(TEST_SMILES) y = np.array(CONTAINS_OX) # Build a pipeline: SMILES -> Mol -> MorganFP -> RF From 6a1f1f001bbb767dc2d962cfe0841f1dfd689673 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 7 Jul 2025 12:13:08 +0200 Subject: [PATCH 05/20] ruffed and ready --- .../experimental/uncertainty/conformal.py | 30 +- .../advanced_04_conformal_prediction.ipynb | 556 +++++++----------- pyproject.toml | 2 +- .../test_uncertainty/test_conformal.py | 11 +- tests/test_pipeline.py | 11 +- 5 files changed, 245 insertions(+), 365 deletions(-) diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py index 9f948ede..13b3fa65 100644 --- a/molpipeline/experimental/uncertainty/conformal.py +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -17,8 +17,7 @@ def bin_targets(y: NDArray[Any], n_bins: int = 10) -> NDArray[np.int_]: - """ - Bin continuous targets for stratified splitting in regression. + """Bin continuous targets for stratified splitting in regression. Parameters ---------- @@ -31,6 +30,7 @@ def bin_targets(y: NDArray[Any], n_bins: int = 10) -> NDArray[np.int_]: ------- np.ndarray Binned targets. + """ y = np.asarray(y) bins = np.linspace(np.min(y), np.max(y), n_bins + 1) @@ -40,9 +40,9 @@ def bin_targets(y: NDArray[Any], n_bins: int = 10) -> NDArray[np.int_]: class UnifiedConformalCV(BaseEstimator): - """One wrapper to rule them all: conformal prediction for both classifiers and regressors. + """Conformal prediction wrapper for both classifiers and regressors. - Uses crepes under the hood, so you know it's sweet. + Uses crepes under the hood. Parameters ---------- @@ -146,7 +146,10 @@ def fit(self, x: NDArray[Any], y: NDArray[Any]) -> "UnifiedConformalCV": return self def calibrate( - self, x_calib: NDArray[Any], y_calib: NDArray[Any], **calib_params: Any, + self, + x_calib: NDArray[Any], + y_calib: NDArray[Any], + **calib_params: Any, ) -> None: """Calibrate the conformal predictor. @@ -225,7 +228,9 @@ def predict_proba(self, x: NDArray[Any]) -> NDArray[Any]: return conformal.predict_proba(x) def predict_conformal_set( - self, x: NDArray[Any], confidence: float | None = None, + self, + x: NDArray[Any], + confidence: float | None = None, ) -> Any: """Predict conformal sets. @@ -309,7 +314,7 @@ def predict_int(self, x: NDArray[Any], confidence: float | None = None) -> Any: class CrossConformalCV(BaseEstimator): - """Cross-conformal prediction for both classifiers and regressors using WrapClassifier/WrapRegressor. + """Cross-conformal prediction using WrapClassifier/WrapRegressor. Handles Mondrian (class_cond) logic as described. @@ -416,7 +421,9 @@ def fit( self.models_ = [] if self.estimator_type == "classifier": splitter = StratifiedKFold( - n_splits=self.n_folds, shuffle=True, random_state=42, + n_splits=self.n_folds, + shuffle=True, + random_state=42, ) y_split = y elif self.estimator_type == "regressor": @@ -448,7 +455,8 @@ def fit( calib_idx_val = calib_idx def _bin_func( - _: Any, calib_idx_val: Any = calib_idx_val, + _: Any, + calib_idx_val: Any = calib_idx_val, ) -> Any: return y[calib_idx_val] @@ -516,7 +524,9 @@ def predict_proba(self, x: NDArray[Any]) -> NDArray[Any]: return proba def predict_conformal_set( - self, x: NDArray[Any], confidence: float | None = None, + self, + x: NDArray[Any], + confidence: float | None = None, ) -> list[list[Any]]: """Predict conformal sets using the cross-conformal predictor. diff --git a/notebooks/advanced_04_conformal_prediction.ipynb b/notebooks/advanced_04_conformal_prediction.ipynb index 2afd872d..bba7b9d3 100644 --- a/notebooks/advanced_04_conformal_prediction.ipynb +++ b/notebooks/advanced_04_conformal_prediction.ipynb @@ -13,12 +13,23 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "id": "ab2b079b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'matplotlib'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# 1. Import Required Libraries and Define Utility Functions\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpyplot\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mplt\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnp\u001b[39;00m\n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpd\u001b[39;00m\n", + "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'matplotlib'" + ] + } + ], "source": [ - "\n", "# 1. Import Required Libraries and Define Utility Functions\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -44,31 +55,78 @@ "from molpipeline.pipeline import Pipeline\n", "from molpipeline.post_prediction import PostPredictionWrapper\n", "\n", + "THRESHOLD = 0.5\n", + "\n", + "\n", + "def compute_ece(y_true: np.ndarray, probs: np.ndarray, n_bins: int = 10) -> float:\n", + " \"\"\"Compute Expected Calibration Error (ECE).\n", + "\n", + " Parameters\n", + " ----------\n", + " y_true : np.ndarray\n", + " True binary labels.\n", + " probs : np.ndarray\n", + " Predicted probabilities.\n", + " n_bins : int, optional\n", + " Number of bins (default: 10).\n", "\n", - "def compute_ece(y_true, probs, n_bins=10):\n", + " Returns\n", + " -------\n", + " float\n", + " Expected calibration error.\n", + "\n", + " \"\"\"\n", " bins = np.linspace(0, 1, n_bins + 1)\n", " binids = np.digitize(probs, bins) - 1\n", " ece = 0.0\n", " for i in range(n_bins):\n", " mask = binids == i\n", " if np.any(mask):\n", - " acc = np.mean(y_true[mask] == (probs[mask] >= 0.5))\n", + " acc = np.mean(y_true[mask] == (probs[mask] >= THRESHOLD))\n", " conf = np.mean(probs[mask])\n", " ece += np.abs(acc - conf) * np.sum(mask) / len(y_true)\n", " return ece\n", "\n", "\n", - "def compute_uncertainty_error_corr(y_true, probs):\n", + "def uncertain_error_corr(y_true: np.ndarray, probs: np.ndarray) -> float:\n", + " \"\"\"Compute correlation between uncertainty and error.\n", + "\n", + " Parameters\n", + " ----------\n", + " y_true : np.ndarray\n", + " True binary labels.\n", + " probs : np.ndarray\n", + " Predicted probabilities.\n", + "\n", + " Returns\n", + " -------\n", + " float\n", + " Correlation coefficient between entropy and error.\n", + "\n", + " \"\"\"\n", " eps = 1e-12\n", " entropy = -probs * np.log(probs + eps) - (1 - probs) * np.log(1 - probs + eps)\n", - " error = np.abs(y_true - (probs >= 0.5))\n", + " error = np.abs(y_true - (probs >= THRESHOLD))\n", " return np.corrcoef(entropy, error)[0, 1]\n", "\n", "\n", - "def compute_sharpness(probs):\n", + "def compute_sharpness(probs: np.ndarray) -> float:\n", + " \"\"\"Compute sharpness (mean entropy) of predicted probabilities.\n", + "\n", + " Parameters\n", + " ----------\n", + " probs : np.ndarray\n", + " Predicted probabilities.\n", + "\n", + " Returns\n", + " -------\n", + " float\n", + " Mean entropy of predicted probabilities.\n", + "\n", + " \"\"\"\n", " eps = 1e-12\n", " entropy = -probs * np.log(probs + eps) - (1 - probs) * np.log(1 - probs + eps)\n", - " return np.mean(entropy)\n" + " return np.mean(entropy)" ] }, { @@ -78,21 +136,23 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape of X=(138, 256), y_class=(138,), y_reg=(138,)\n" + "ename": "NameError", + "evalue": "name 'pd' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# 2. Data Loading, Cleaning, and Featurization\u001b[39;00m\n\u001b[32m 2\u001b[39m \u001b[38;5;66;03m# Load real data\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m df = \u001b[43mpd\u001b[49m.read_csv(\u001b[33m\"\u001b[39m\u001b[33mexample_data/renin_harren.csv\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 4\u001b[39m smiles = df[\u001b[33m\"\u001b[39m\u001b[33mpubchem_smiles\u001b[39m\u001b[33m\"\u001b[39m].to_numpy()\n\u001b[32m 5\u001b[39m y_reg = df[\u001b[33m\"\u001b[39m\u001b[33mpIC50\u001b[39m\u001b[33m\"\u001b[39m].to_numpy()\n", + "\u001b[31mNameError\u001b[39m: name 'pd' is not defined" ] } ], "source": [ - "\n", - "\n", "# 2. Data Loading, Cleaning, and Featurization\n", "# Load real data\n", "df = pd.read_csv(\"example_data/renin_harren.csv\")\n", - "smiles = df[\"pubchem_smiles\"].values\n", - "y_reg = df[\"pIC50\"].values\n", + "smiles = df[\"pubchem_smiles\"].to_numpy()\n", + "y_reg = df[\"pIC50\"].to_numpy()\n", "\n", "# Binarize for classification: top 20% as 'active'\n", "threshold = np.nanquantile(y_reg, 0.8)\n", @@ -101,300 +161,71 @@ "# Featurization pipeline (NaN-safe)\n", "error_filter = ErrorFilter(filter_everything=True)\n", "error_replacer = FilterReinserter.from_error_filter(error_filter, fill_value=np.nan)\n", - "featurizer = Pipeline([\n", - " (\"smi2mol\", SmilesToMol()),\n", - " (\"error_filter\", error_filter),\n", - " (\"morgan\", MolToMorganFP(radius=2, n_bits=256, return_as=\"dense\")),\n", - " (\"error_replacer\", PostPredictionWrapper(error_replacer)),\n", - "], n_jobs=1)\n", + "featurizer = Pipeline(\n", + " [\n", + " (\"smi2mol\", SmilesToMol()),\n", + " (\"error_filter\", error_filter),\n", + " (\"morgan\", MolToMorganFP(radius=2, n_bits=256, return_as=\"dense\")),\n", + " (\"error_replacer\", PostPredictionWrapper(error_replacer)),\n", + " ],\n", + " n_jobs=1,\n", + ")\n", "X_feat = featurizer.transform(smiles)\n", "\n", "print(f\"Shape of X={X_feat.shape}, y_class={y_class.shape}, y_reg={y_reg.shape}\")\n", "\n", - "\n", - "# ## 3. Classification: Splitting, Model Benchmarking, and Conformal Prediction\n", - "# # Train/test split for classification\n", - "# X_train, X_test, y_train, y_test = train_test_split(\n", - "# X_feat, y_class, test_size=0.3, random_state=42, stratify=y_class\n", - "# )\n", - "# skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", - "\n", - "# # Split for conformal pipeline (use SMILES)\n", - "# smiles_train, smiles_test, y_train_cp, y_test_cp = train_test_split(\n", - "# smiles, y_class, test_size=0.3, random_state=42, stratify=y_class\n", - "# )\n", - "\n", "# Generate indices for a single split\n", "indices = np.arange(len(y_class))\n", "train_idx, test_idx = train_test_split(\n", - " indices, test_size=0.3, random_state=42, stratify=y_class\n", + " indices,\n", + " test_size=0.3,\n", + " random_state=42,\n", + " stratify=y_class,\n", ")\n", "\n", "# Use these indices for all splits\n", "X_train, X_test = X_feat[train_idx], X_feat[test_idx]\n", "y_train, y_test = y_class[train_idx], y_class[test_idx]\n", - "smiles_train, smiles_test = smiles[train_idx], smiles[test_idx]\n" + "smiles_train, smiles_test = smiles[train_idx], smiles[test_idx]" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "e4b28946", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fold 1\n", - "Fold 2\n", - "Fold 3\n", - "Fold 4\n", - "Fold 5\n" - ] - }, - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "object", - "type": "string" - }, - { - "name": "ensemble_xgb (OOF)", - "rawType": "float64", - "type": "float" - }, - { - "name": "CrossConformalCV (OOF, norm)", - "rawType": "float64", - "type": "float" - }, - { - "name": "CrossConformalCV (OOF, raw)", - "rawType": "float64", - "type": "float" - } - ], - "ref": "19a2d863-8031-48e1-86bd-f6970dfaed4f", - "rows": [ - [ - "NLL", - "0.5082971245539286", - "0.4948980269013652", - "0.484719057953362" - ], - [ - "ECE", - "0.6230208333333332", - "0.6428021891094953", - "0.6036250000000001" - ], - [ - "Brier", - "0.17212395833333335", - "0.16448031114059614", - "0.16170891666666665" - ], - [ - "Uncertainty Error Correlation", - "0.4139304487779154", - "0.45798897710100606", - "0.40508529649564395" - ], - [ - "Sharpness", - "0.4164086587962599", - "0.4037236852211876", - "0.44035714473370763" - ], - [ - "Balanced Accuracy", - "0.49419002050580996", - "0.5006835269993164", - "0.507177033492823" - ], - [ - "AUROC", - "0.7053998632946001", - "0.7006151742993848", - "0.7084757347915241" - ], - [ - "AUPRC", - "0.306106679300729", - "0.3094994417942496", - "0.3133013787846372" - ], - [ - "F1 Score", - "0.13333333333333333", - "0.13793103448275862", - "0.14285714285714285" - ], - [ - "MCC", - "-0.014535198024344553", - "0.0017830298218644615", - "0.019620779205386296" - ] - ], - "shape": { - "columns": 3, - "rows": 10 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Modelensemble_xgb (OOF)CrossConformalCV (OOF, norm)CrossConformalCV (OOF, raw)
NLL0.5082970.4948980.484719
ECE0.6230210.6428020.603625
Brier0.1721240.1644800.161709
Uncertainty Error Correlation0.4139300.4579890.405085
Sharpness0.4164090.4037240.440357
Balanced Accuracy0.4941900.5006840.507177
AUROC0.7054000.7006150.708476
AUPRC0.3061070.3094990.313301
F1 Score0.1333330.1379310.142857
MCC-0.0145350.0017830.019621
\n", - "
" - ], - "text/plain": [ - "Model ensemble_xgb (OOF) \\\n", - "NLL 0.508297 \n", - "ECE 0.623021 \n", - "Brier 0.172124 \n", - "Uncertainty Error Correlation 0.413930 \n", - "Sharpness 0.416409 \n", - "Balanced Accuracy 0.494190 \n", - "AUROC 0.705400 \n", - "AUPRC 0.306107 \n", - "F1 Score 0.133333 \n", - "MCC -0.014535 \n", - "\n", - "Model CrossConformalCV (OOF, norm) \\\n", - "NLL 0.494898 \n", - "ECE 0.642802 \n", - "Brier 0.164480 \n", - "Uncertainty Error Correlation 0.457989 \n", - "Sharpness 0.403724 \n", - "Balanced Accuracy 0.500684 \n", - "AUROC 0.700615 \n", - "AUPRC 0.309499 \n", - "F1 Score 0.137931 \n", - "MCC 0.001783 \n", - "\n", - "Model CrossConformalCV (OOF, raw) \n", - "NLL 0.484719 \n", - "ECE 0.603625 \n", - "Brier 0.161709 \n", - "Uncertainty Error Correlation 0.405085 \n", - "Sharpness 0.440357 \n", - "Balanced Accuracy 0.507177 \n", - "AUROC 0.708476 \n", - "AUPRC 0.313301 \n", - "F1 Score 0.142857 \n", - "MCC 0.019621 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40f1540a", + "metadata": {}, + "outputs": [], "source": [ "# 3.1 Cross-Validation Benchmarking: Standard Models and Conformal Prediction\n", "\n", - "\n", "# Use StratifiedKFold on the training set\n", "skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", "\n", "model_dict = {\n", - " # \"ensemble_xgb\": XGBClassifier(eval_metric='logloss', random_state=42),\n", - " \"ensemble_rf\": RandomForestClassifier(n_estimators=100, random_state=42)\n", + " \"ensemble_rf\": RandomForestClassifier(n_estimators=100, random_state=42),\n", "}\n", "metrics_list = [\n", - " \"NLL\", \"ECE\", \"Brier\", \"Uncertainty Error Correlation\", \"Sharpness\",\n", - " \"Balanced Accuracy\", \"AUROC\", \"AUPRC\", \"F1 Score\", \"MCC\"\n", + " \"NLL\",\n", + " \"ECE\",\n", + " \"Brier\",\n", + " \"Uncertainty Error Correlation\",\n", + " \"Sharpness\",\n", + " \"Balanced Accuracy\",\n", + " \"AUROC\",\n", + " \"AUPRC\",\n", + " \"F1 Score\",\n", + " \"MCC\",\n", "]\n", "results = []\n", "results_cp = []\n", - "# ...existing code...\n", "\n", "# Arrays to collect out-of-fold predictions\n", "oof_preds = np.zeros_like(y_train, dtype=float)\n", @@ -408,27 +239,34 @@ " smiles_tr, smiles_val = smiles_train[train_idx], smiles_train[val_idx]\n", "\n", " # --- Standard Model ---\n", - " for model_name, model in model_dict.items():\n", + " for model in model_dict.values():\n", " model.fit(X_tr, y_tr)\n", " prob = model.predict_proba(X_val)[:, 1]\n", " oof_preds[val_idx] = prob\n", "\n", " # --- Conformal Prediction (CrossConformalCV) ---\n", " rf = RandomForestClassifier(n_estimators=100, random_state=42)\n", - " rf_pipeline = Pipeline([\n", - " (\"featurizer\", featurizer),\n", - " (\"rf\", rf)\n", - " ], n_jobs=1)\n", + " rf_pipeline = Pipeline(\n", + " [\n", + " (\"featurizer\", featurizer),\n", + " (\"rf\", rf),\n", + " ],\n", + " n_jobs=1,\n", + " )\n", " cc_clf = CrossConformalCV(\n", " estimator=rf_pipeline,\n", " n_folds=5,\n", " confidence_level=0.9,\n", - " estimator_type=\"classifier\"\n", + " estimator_type=\"classifier\",\n", " )\n", " cc_clf.fit(smiles_tr, y_tr)\n", " # Average ensemble probabilities for the validation fold\n", - " probs_cp_ensemble = np.mean([m.predict_p(smiles_val) for m in cc_clf.models_], axis=0)\n", - " probs_cp_ensemble_raw = np.mean([m.predict_proba(smiles_val) for m in cc_clf.models_], axis=0)\n", + " probs_cp_ensemble = np.mean(\n", + " [m.predict_p(smiles_val) for m in cc_clf.models_], axis=0\n", + " )\n", + " probs_cp_ensemble_raw = np.mean(\n", + " [m.predict_proba(smiles_val) for m in cc_clf.models_], axis=0\n", + " )\n", " p0 = probs_cp_ensemble[:, 0]\n", " p1 = probs_cp_ensemble[:, 1]\n", " p1_norm = p1 / (p0 + p1 + 1e-12)\n", @@ -436,61 +274,62 @@ " oof_preds_cp_raw[val_idx] = probs_cp_ensemble_raw[:, 1]\n", "\n", "# Create a DataFrame to compare raw and normalized conformal probabilities\n", - "df_oof_compare = pd.DataFrame({\n", - " \"y_true\": y_train,\n", - " \"StandardModel\": oof_preds,\n", - " \"ConformalRaw\": oof_preds_cp_raw,\n", - " \"ConformalNorm\": oof_preds_cp_norm\n", - "})\n", - "# display(df_oof_compare.head())\n", + "df_oof_compare = pd.DataFrame(\n", + " {\n", + " \"y_true\": y_train,\n", + " \"StandardModel\": oof_preds,\n", + " \"ConformalRaw\": oof_preds_cp_raw,\n", + " \"ConformalNorm\": oof_preds_cp_norm,\n", + " }\n", + ")\n", "\n", "# Compute metrics for out-of-fold predictions (standard model)\n", - "mean_pred = (oof_preds >= 0.5).astype(int)\n", + "mean_pred = (oof_preds >= THRESHOLD).astype(int)\n", "metrics = {\n", " \"Model\": \"ensemble_xgb (OOF)\",\n", " \"NLL\": log_loss(y_train, oof_preds),\n", " \"ECE\": compute_ece(y_train, oof_preds),\n", " \"Brier\": brier_score_loss(y_train, oof_preds),\n", - " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_train, oof_preds),\n", + " \"Uncertainty Error Correlation\": uncertain_error_corr(y_train, oof_preds),\n", " \"Sharpness\": compute_sharpness(oof_preds),\n", " \"Balanced Accuracy\": balanced_accuracy_score(y_train, mean_pred),\n", " \"AUROC\": roc_auc_score(y_train, oof_preds),\n", " \"AUPRC\": average_precision_score(y_train, oof_preds),\n", " \"F1 Score\": f1_score(y_train, mean_pred),\n", - " \"MCC\": matthews_corrcoef(y_train, mean_pred)\n", + " \"MCC\": matthews_corrcoef(y_train, mean_pred),\n", "}\n", "results.append(metrics)\n", "\n", "# Compute metrics for out-of-fold predictions (conformal, both raw and norm)\n", - "mean_pred_cp_norm = (oof_preds_cp_norm >= 0.5).astype(int)\n", + "mean_pred_cp_norm = (oof_preds_cp_norm >= THRESHOLD).astype(int)\n", "metrics_cp_norm = {\n", " \"Model\": \"CrossConformalCV (OOF, norm)\",\n", " \"NLL\": log_loss(y_train, oof_preds_cp_norm),\n", " \"ECE\": compute_ece(y_train, oof_preds_cp_norm),\n", " \"Brier\": brier_score_loss(y_train, oof_preds_cp_norm),\n", - " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_train, oof_preds_cp_norm),\n", + " \"Uncertainty Error Correlation\": uncertain_error_corr(y_train, oof_preds_cp_norm),\n", " \"Sharpness\": compute_sharpness(oof_preds_cp_norm),\n", " \"Balanced Accuracy\": balanced_accuracy_score(y_train, mean_pred_cp_norm),\n", " \"AUROC\": roc_auc_score(y_train, oof_preds_cp_norm),\n", " \"AUPRC\": average_precision_score(y_train, oof_preds_cp_norm),\n", " \"F1 Score\": f1_score(y_train, mean_pred_cp_norm),\n", - " \"MCC\": matthews_corrcoef(y_train, mean_pred_cp_norm)\n", + " \"MCC\": matthews_corrcoef(y_train, mean_pred_cp_norm),\n", "}\n", "results_cp.append(metrics_cp_norm)\n", "\n", - "mean_pred_cp_raw = (oof_preds_cp_raw >= 0.5).astype(int)\n", + "mean_pred_cp_raw = (oof_preds_cp_raw >= THRESHOLD).astype(int)\n", "metrics_cp_raw = {\n", " \"Model\": \"CrossConformalCV (OOF, raw)\",\n", " \"NLL\": log_loss(y_train, oof_preds_cp_raw),\n", " \"ECE\": compute_ece(y_train, oof_preds_cp_raw),\n", " \"Brier\": brier_score_loss(y_train, oof_preds_cp_raw),\n", - " \"Uncertainty Error Correlation\": compute_uncertainty_error_corr(y_train, oof_preds_cp_raw),\n", + " \"Uncertainty Error Correlation\": uncertain_error_corr(y_train, oof_preds_cp_raw),\n", " \"Sharpness\": compute_sharpness(oof_preds_cp_raw),\n", " \"Balanced Accuracy\": balanced_accuracy_score(y_train, mean_pred_cp_raw),\n", " \"AUROC\": roc_auc_score(y_train, oof_preds_cp_raw),\n", " \"AUPRC\": average_precision_score(y_train, oof_preds_cp_raw),\n", " \"F1 Score\": f1_score(y_train, mean_pred_cp_raw),\n", - " \"MCC\": matthews_corrcoef(y_train, mean_pred_cp_raw)\n", + " \"MCC\": matthews_corrcoef(y_train, mean_pred_cp_raw),\n", "}\n", "results_cp.append(metrics_cp_raw)\n", "\n", @@ -500,7 +339,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "ad5d684e", "metadata": {}, "outputs": [ @@ -518,15 +357,18 @@ "source": [ "import numpy as np\n", "\n", - "probs_standard = df_oof_compare[\"StandardModel\"].values\n", - "probs_raw = df_oof_compare[\"ConformalRaw\"].values\n", - "probs_norm = df_oof_compare[\"ConformalNorm\"].values\n", + "probs_standard = df_oof_compare[\"StandardModel\"].to_numpy()\n", + "probs_raw = df_oof_compare[\"ConformalRaw\"].to_numpy()\n", + "probs_norm = df_oof_compare[\"ConformalNorm\"].to_numpy()\n", "\n", "plt.figure(figsize=(8, 5))\n", "bins = np.linspace(0, 1, 21)\n", "\n", "\n", - "def plot_percentage_line(probs, bins, label, color):\n", + "def plot_percentage_line(\n", + " probs: np.ndarray, bins: np.ndarray, label: str, color: str\n", + ") -> None:\n", + " \"\"\"Plot percentage of predictions in each probability bin.\"\"\"\n", " counts, bin_edges = np.histogram(probs, bins=bins)\n", " percent = 100 * counts / len(probs)\n", " bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2\n", @@ -547,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "2bcaf7d7", "metadata": {}, "outputs": [ @@ -770,7 +612,6 @@ "# 3.3 Visualizing Uncertainty and Prediction Sets\n", "\n", "plt.figure(figsize=(8, 4))\n", - "plt.hist(p1_cp, bins=20, alpha=0.7, label=\"CrossConformalCV Probabilities\")\n", "plt.hist(p1, bins=20, alpha=0.7, label=\"Best Ensemble Model Probabilities\")\n", "plt.xlabel(\"Predicted Probability (Active)\")\n", "plt.ylabel(\"Count\")\n", @@ -791,24 +632,34 @@ "p1 = p_vals[:, 1]\n", "p1_norm = p1 / (p0 + p1 + 1e-12)\n", "\n", - "df_cp_class = pd.DataFrame({\n", - " \"SMILES\": smiles_test,\n", - " \"p0\": p0,\n", - " \"p1\": p1,\n", - " \"p1_norm\": p1_norm,\n", - " \"conformal_set\": conf_pred_sets,\n", - " \"true_label\": y_test_cp\n", - "})\n", + "df_cp_class = pd.DataFrame(\n", + " {\n", + " \"SMILES\": smiles_test,\n", + " \"p0\": p0,\n", + " \"p1\": p1,\n", + " \"p1_norm\": p1_norm,\n", + " \"conformal_set\": conf_pred_sets,\n", + " \"true_label\": y_test,\n", + " }\n", + ")\n", "display(df_cp_class.head())\n", "\n", "\n", - "def coverage_and_set_size(y_true, conf_sets):\n", - " covered = [y in s for y, s in zip(y_true, conf_sets)]\n", + "def coverage_and_set_size(y_true: np.ndarray, conf_sets: list) -> tuple[float, float]:\n", + " \"\"\"Compute coverage and average set size for conformal sets.\n", + "\n", + " Returns\n", + " -------\n", + " float, float\n", + " Coverage (fraction of true labels in sets) and average set size.\n", + "\n", + " \"\"\"\n", + " covered = [y in s for y, s in zip(y_true, conf_sets, strict=True)]\n", " avg_size = np.mean([len(s) for s in conf_sets])\n", " return np.mean(covered), avg_size\n", "\n", "\n", - "coverage, avg_set_size = coverage_and_set_size(y_test_cp, conf_pred_sets)\n", + "coverage, avg_set_size = coverage_and_set_size(y_test, conf_pred_sets)\n", "error = 1 - coverage\n", "empty = np.mean([len(s) == 0 for s in conf_pred_sets])\n", "\n", @@ -816,16 +667,16 @@ "print(f\"Conformal set average size: {avg_set_size:.3f}\")\n", "print(f\"Conformal set error: {error:.3f}\")\n", "print(f\"Fraction of empty sets: {empty:.3f}\")\n", - "print(\"NLL:\", log_loss(y_test_cp, p1_norm))\n", - "print(\"Brier:\", brier_score_loss(y_test_cp, p1_norm))\n", - "print(\"AUROC:\", roc_auc_score(y_test_cp, p1_norm))\n", - "print(\"F1:\", f1_score(y_test_cp, (p1_norm >= 0.5).astype(int)))\n", - "print(\"MCC:\", matthews_corrcoef(y_test_cp, (p1_norm >= 0.5).astype(int)))\n" + "print(\"NLL:\", log_loss(y_test, p1_norm))\n", + "print(\"Brier:\", brier_score_loss(y_test, p1_norm))\n", + "print(\"AUROC:\", roc_auc_score(y_test, p1_norm))\n", + "print(\"F1:\", f1_score(y_test, (p1_norm >= THRESHOLD).astype(int)))\n", + "print(\"MCC:\", matthews_corrcoef(y_test, (p1_norm >= THRESHOLD).astype(int)))" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "6cd8a8da", "metadata": {}, "outputs": [ @@ -1013,32 +864,43 @@ } ], "source": [ - "\n", "# 4. Regression: Conformal Prediction and Interval Evaluation\n", "\n", - "\n", "# --- Prepare regression data (filter NaNs as before) ---\n", "mask_reg = ~np.isnan(X_feat).any(axis=1) & ~np.isnan(y_reg)\n", "X_feat_reg = X_feat[mask_reg]\n", "y_reg_clean = y_reg[mask_reg]\n", "smiles_reg = np.array(smiles)[mask_reg]\n", "\n", - "# Split for regression\n", - "X_train_reg, X_test_reg, y_train_reg, y_test_reg, smiles_train_reg, smiles_test_reg = train_test_split(\n", - " X_feat_reg, y_reg_clean, smiles_reg, test_size=0.3, random_state=42\n", + "(\n", + " X_train_reg,\n", + " X_test_reg,\n", + " y_train_reg,\n", + " y_test_reg,\n", + " smiles_train_reg,\n", + " smiles_test_reg,\n", + ") = train_test_split(\n", + " X_feat_reg,\n", + " y_reg_clean,\n", + " smiles_reg,\n", + " test_size=0.3,\n", + " random_state=42,\n", ")\n", "\n", "# --- Wrap regressor with CrossConformalCV ---\n", "rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)\n", - "rf_reg_pipeline = Pipeline([\n", - " (\"rf\", rf_reg)\n", - "], n_jobs=1)\n", + "rf_reg_pipeline = Pipeline(\n", + " [\n", + " (\"rf\", rf_reg),\n", + " ],\n", + " n_jobs=1,\n", + ")\n", "\n", "cc_reg = CrossConformalCV(\n", " estimator=rf_reg_pipeline,\n", " n_folds=5,\n", " confidence_level=0.95,\n", - " estimator_type=\"regressor\"\n", + " estimator_type=\"regressor\",\n", ")\n", "cc_reg.fit(X_train_reg, y_train_reg)\n", "\n", @@ -1049,13 +911,15 @@ "upper = intervals_mean[:, 1]\n", "point_pred = np.mean([m.predict(X_test_reg) for m in cc_reg.models_], axis=0)\n", "\n", - "df_cp_reg = pd.DataFrame({\n", - " \"pubchem_smiles\": smiles_test_reg,\n", - " \"pIC50\": y_test_reg,\n", - " \"pred_lower\": lower,\n", - " \"pred_upper\": upper,\n", - " \"point_pred\": point_pred\n", - "})\n", + "df_cp_reg = pd.DataFrame(\n", + " {\n", + " \"pubchem_smiles\": smiles_test_reg,\n", + " \"pIC50\": y_test_reg,\n", + " \"pred_lower\": lower,\n", + " \"pred_upper\": upper,\n", + " \"point_pred\": point_pred,\n", + " }\n", + ")\n", "display(df_cp_reg.head())\n", "\n", "# --- Regression: Evaluate coverage and interval width ---\n", @@ -1065,7 +929,7 @@ "\n", "print(f\"Interval coverage: {coverage_reg:.3f}\")\n", "print(f\"Average interval width: {avg_width:.3f}\")\n", - "print(f\"MAE (point prediction): {mae:.3f}\")\n" + "print(f\"MAE (point prediction): {mae:.3f}\")" ] } ], diff --git a/pyproject.toml b/pyproject.toml index 289f2f77..c24678f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -221,4 +221,4 @@ dev = [ "rdkit<2025.3.3", # only temporarily, see https://github.com/kuelumbus/rdkit-pypi/issues/132 "rdkit-stubs>=0.8", "ruff>=0.11.4", -] \ No newline at end of file +] diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index dbc560fd..70f49e10 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -1,6 +1,7 @@ """Unit tests for conformal prediction wrappers in molpipeline.experimental.uncertainty.conformal. """ + import unittest from sklearn.datasets import make_classification, make_regression @@ -20,7 +21,10 @@ def test_unified_conformal_classifier(self) -> None: """Test UnifiedConformalCV with a classifier.""" x, y = make_classification(n_samples=100, n_features=10, random_state=42) x_train, x_calib, y_train, y_calib = train_test_split( - x, y, test_size=0.2, random_state=42, + x, + y, + test_size=0.2, + random_state=42, ) clf = RandomForestClassifier(random_state=42) cp = UnifiedConformalCV(clf, estimator_type="classifier") @@ -37,7 +41,10 @@ def test_unified_conformal_regressor(self) -> None: """Test UnifiedConformalCV with a regressor.""" x, y, _ = make_regression(n_samples=100, n_features=10, random_state=42) x_train, x_calib, y_train, y_calib = train_test_split( - x, y, test_size=0.2, random_state=42, + x, + y, + test_size=0.2, + random_state=42, ) reg = RandomForestRegressor(random_state=42) cp = UnifiedConformalCV(reg, estimator_type="regressor") diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index d308f412..fea6451f 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -396,15 +396,14 @@ def test_conformal_pipeline_classifier(self) -> None: smi2mol = SmilesToMol() mol2morgan = MolToMorganFP(radius=2, n_bits=128) rf = RandomForestClassifier(n_estimators=10, random_state=42) - pipeline = Pipeline([ - ("smi2mol", smi2mol), - ("morgan", mol2morgan), - ("rf", rf) - ]) + pipeline = Pipeline([("smi2mol", smi2mol), ("morgan", mol2morgan), ("rf", rf)]) # Split data from sklearn.model_selection import train_test_split - X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42) + + X_train, X_calib, y_train, y_calib = train_test_split( + smiles, y, test_size=0.3, random_state=42 + ) # UnifiedConformalCV cp = UnifiedConformalCV(pipeline, estimator_type="classifier") From c9cab14f5f96dbc2a26c6862f5a490b920ef2ea7 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 7 Jul 2025 12:23:32 +0200 Subject: [PATCH 06/20] tests ruffed --- .../test_uncertainty/test_conformal.py | 4 +--- tests/test_pipeline.py | 23 ++++++++++--------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index 70f49e10..a538a8a6 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -1,6 +1,4 @@ -"""Unit tests for conformal prediction wrappers in -molpipeline.experimental.uncertainty.conformal. -""" +"""Unit tests for conformal prediction wrappers.""" import unittest diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index fea6451f..2ed31aa7 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -16,11 +16,15 @@ from sklearn.base import BaseEstimator from sklearn.calibration import CalibratedClassifierCV from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor -from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.tree import DecisionTreeClassifier from molpipeline import ErrorFilter, FilterReinserter, Pipeline, PostPredictionWrapper from molpipeline.any2mol import AutoToMol, SmilesToMol +from molpipeline.experimental.uncertainty.conformal import ( + CrossConformalCV, + UnifiedConformalCV, +) from molpipeline.mol2any import MolToMorganFP, MolToRDKitPhysChem, MolToSmiles from molpipeline.mol2mol import ( ChargeParentExtractor, @@ -383,10 +387,7 @@ def test_conformal_pipeline_classifier(self) -> None: This test does not take any parameters and does not return a value. """ - from molpipeline.experimental.uncertainty.conformal import ( - CrossConformalCV, - UnifiedConformalCV, - ) + # Use the global test data smiles = np.array(TEST_SMILES) @@ -396,14 +397,14 @@ def test_conformal_pipeline_classifier(self) -> None: smi2mol = SmilesToMol() mol2morgan = MolToMorganFP(radius=2, n_bits=128) rf = RandomForestClassifier(n_estimators=10, random_state=42) - pipeline = Pipeline([("smi2mol", smi2mol), ("morgan", mol2morgan), ("rf", rf)]) + pipeline = Pipeline([ + ("smi2mol", smi2mol), + ("morgan", mol2morgan), + ("rf", rf) + ]) # Split data - from sklearn.model_selection import train_test_split - - X_train, X_calib, y_train, y_calib = train_test_split( - smiles, y, test_size=0.3, random_state=42 - ) + X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42) # UnifiedConformalCV cp = UnifiedConformalCV(pipeline, estimator_type="classifier") From 319d2828ebbedf9fe2c2afd8a533bea4a41e3af4 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 7 Jul 2025 12:26:32 +0200 Subject: [PATCH 07/20] tests ruffed and formatted --- tests/test_pipeline.py | 61 +++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 2ed31aa7..fa214da1 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -60,7 +60,7 @@ def test_fit_transform_single_core(self) -> None: [ ("smi2mol", smi2mol), ("morgan", mol2morgan), - ] + ], ) # Run pipeline @@ -79,11 +79,11 @@ def test_sklearn_pipeline(self) -> None: ("smi2mol", smi2mol), ("morgan", mol2morgan), ("decision_tree", d_tree), - ] + ], ) s_pipeline.fit(TEST_SMILES, CONTAINS_OX) predicted_value_array = s_pipeline.predict(TEST_SMILES) - for pred_val, true_val in zip(predicted_value_array, CONTAINS_OX): + for pred_val, true_val in zip(predicted_value_array, CONTAINS_OX, strict=False): self.assertEqual(pred_val, true_val) def test_sklearn_pipeline_parallel(self) -> None: @@ -102,7 +102,7 @@ def test_sklearn_pipeline_parallel(self) -> None: s_pipeline.fit(TEST_SMILES, CONTAINS_OX) out = s_pipeline.predict(TEST_SMILES) self.assertEqual(len(out), len(CONTAINS_OX)) - for pred_val, true_val in zip(out, CONTAINS_OX): + for pred_val, true_val in zip(out, CONTAINS_OX, strict=False): self.assertEqual(pred_val, true_val) def test_salt_removal(self) -> None: @@ -125,11 +125,13 @@ def test_salt_removal(self) -> None: ("empty_mol_filter", empty_mol_filter), ("remove_charge", remove_charge), ("mol2smi", mol2smi), - ] + ], ) generated_smiles = salt_remover_pipeline.transform(smiles_with_salt_list) for generated_smiles, smiles_without_salt in zip( - generated_smiles, smiles_without_salt_list + generated_smiles, + smiles_without_salt_list, + strict=False, ): self.assertEqual(generated_smiles, smiles_without_salt) @@ -152,7 +154,7 @@ def test_json_generation(self) -> None: ("metal_disconnector", metal_disconnector), ("salt_remover", salt_remover), ("physchem", physchem), - ] + ], ) # Convert pipeline to json @@ -162,7 +164,9 @@ def test_json_generation(self) -> None: self.assertTrue(isinstance(loaded_pipeline, Pipeline)) # Compare pipeline elements for loaded_element, original_element in zip( - loaded_pipeline.steps, pipeline_element_list + loaded_pipeline.steps, + pipeline_element_list, + strict=False, ): if loaded_element[1] == "passthrough": self.assertEqual(loaded_element[1], original_element) @@ -182,7 +186,7 @@ def test_fit_transform_record_remove_nones(self) -> None: mol2morgan = MolToMorganFP(radius=FP_RADIUS, n_bits=FP_SIZE) empty_mol_filter = EmptyMoleculeFilter() remove_none = ErrorFilter.from_element_list( - [smi2mol, salt_remover, mol2morgan, empty_mol_filter] + [smi2mol, salt_remover, mol2morgan, empty_mol_filter], ) # Create pipeline pipeline = Pipeline( @@ -203,7 +207,9 @@ def test_fit_transform_record_remove_nones(self) -> None: def test_caching(self) -> None: """Test if the caching gives the same results and is faster on the second run.""" molecule_net_logd_df = pd.read_csv( - TEST_DATA_DIR / "molecule_net_logd.tsv.gz", sep="\t", nrows=20 + TEST_DATA_DIR / "molecule_net_logd.tsv.gz", + sep="\t", + nrows=20, ) prediction_list = [] for cache_activated in [False, True]: @@ -269,7 +275,7 @@ def test_gridsearchcv(self) -> None: "physchem__descriptor_list": [ ["HeavyAtomMolWt"], ["HeavyAtomMolWt", "HeavyAtomCount"], - ] + ], }, }, ] @@ -319,7 +325,9 @@ def test_gridsearch_cache(self) -> None: } # First without caching data_df = pd.read_csv( - TEST_DATA_DIR / "molecule_net_logd.tsv.gz", sep="\t", nrows=20 + TEST_DATA_DIR / "molecule_net_logd.tsv.gz", + sep="\t", + nrows=20, ) best_param_dict = {} prediction_dict = {} @@ -345,7 +353,7 @@ def test_gridsearch_cache(self) -> None: grid_search_cv.fit(data_df["smiles"].tolist(), data_df["exp"].tolist()) best_param_dict[cache_activated] = grid_search_cv.best_params_ prediction_dict[cache_activated] = grid_search_cv.predict( - data_df["smiles"].tolist() + data_df["smiles"].tolist(), ) mem.clear(warn=False) self.assertEqual(best_param_dict[True], best_param_dict[False]) @@ -366,13 +374,16 @@ def test_calibrated_classifier(self) -> None: ( "error_replacer", PostPredictionWrapper( - FilterReinserter.from_error_filter(error_filter, np.nan) + FilterReinserter.from_error_filter(error_filter, np.nan), ), ), - ] + ], ) calibrated_pipeline = CalibratedClassifierCV( - s_pipeline, cv=2, ensemble=True, method="isotonic" + s_pipeline, + cv=2, + ensemble=True, + method="isotonic", ) calibrated_pipeline.fit(TEST_SMILES, CONTAINS_OX) predicted_value_array = calibrated_pipeline.predict(TEST_SMILES) @@ -387,8 +398,6 @@ def test_conformal_pipeline_classifier(self) -> None: This test does not take any parameters and does not return a value. """ - - # Use the global test data smiles = np.array(TEST_SMILES) y = np.array(CONTAINS_OX) @@ -397,14 +406,18 @@ def test_conformal_pipeline_classifier(self) -> None: smi2mol = SmilesToMol() mol2morgan = MolToMorganFP(radius=2, n_bits=128) rf = RandomForestClassifier(n_estimators=10, random_state=42) - pipeline = Pipeline([ - ("smi2mol", smi2mol), - ("morgan", mol2morgan), - ("rf", rf) - ]) + pipeline = Pipeline( + [ + ("smi2mol", smi2mol), + ("morgan", mol2morgan), + ("rf", rf), + ], + ) # Split data - X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42) + X_train, X_calib, y_train, y_calib = train_test_split( + smiles, y, test_size=0.3, random_state=42, + ) # UnifiedConformalCV cp = UnifiedConformalCV(pipeline, estimator_type="classifier") From 7394ba6195930d144b458720041742f7b787944b Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 7 Jul 2025 12:27:39 +0200 Subject: [PATCH 08/20] tests rereformatted --- tests/test_pipeline.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index fa214da1..c8e5bb53 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -416,7 +416,10 @@ def test_conformal_pipeline_classifier(self) -> None: # Split data X_train, X_calib, y_train, y_calib = train_test_split( - smiles, y, test_size=0.3, random_state=42, + smiles, + y, + test_size=0.3, + random_state=42, ) # UnifiedConformalCV From abb1067d0ba559cd8fac4681e795eacb611d922c Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 7 Jul 2025 12:58:49 +0200 Subject: [PATCH 09/20] fix test --- tests/test_experimental/test_uncertainty/test_conformal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index a538a8a6..20e69a33 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -37,7 +37,7 @@ def test_unified_conformal_classifier(self) -> None: def test_unified_conformal_regressor(self) -> None: """Test UnifiedConformalCV with a regressor.""" - x, y, _ = make_regression(n_samples=100, n_features=10, random_state=42) + x, y, _ = make_regression(n_samples=100, n_features=10, random_state=42,coef=True) x_train, x_calib, y_train, y_calib = train_test_split( x, y, @@ -67,7 +67,7 @@ def test_cross_conformal_classifier(self) -> None: def test_cross_conformal_regressor(self) -> None: """Test CrossConformalCV with a regressor.""" - x, y, _ = make_regression(n_samples=100, n_features=10, random_state=42) + x, y, _ = make_regression(n_samples=100, n_features=10, random_state=42, coef=True) reg = RandomForestRegressor(random_state=42) ccp = CrossConformalCV(reg, estimator_type="regressor", n_folds=3) ccp.fit(x, y) From ebc6d4965b118681f012c85931a60aa0e6c9e342 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 7 Jul 2025 13:03:06 +0200 Subject: [PATCH 10/20] reformatted after fix --- .../test_experimental/test_uncertainty/test_conformal.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index 20e69a33..4b6dd5b4 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -37,7 +37,9 @@ def test_unified_conformal_classifier(self) -> None: def test_unified_conformal_regressor(self) -> None: """Test UnifiedConformalCV with a regressor.""" - x, y, _ = make_regression(n_samples=100, n_features=10, random_state=42,coef=True) + x, y, _ = make_regression( + n_samples=100, n_features=10, random_state=42, coef=True, + ) x_train, x_calib, y_train, y_calib = train_test_split( x, y, @@ -67,7 +69,9 @@ def test_cross_conformal_classifier(self) -> None: def test_cross_conformal_regressor(self) -> None: """Test CrossConformalCV with a regressor.""" - x, y, _ = make_regression(n_samples=100, n_features=10, random_state=42, coef=True) + x, y, _ = make_regression( + n_samples=100, n_features=10, random_state=42, coef=True, + ) reg = RandomForestRegressor(random_state=42) ccp = CrossConformalCV(reg, estimator_type="regressor", n_folds=3) ccp.fit(x, y) From a71fa5bc240fc28526da1582a893d56fd52d52ef Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 7 Jul 2025 13:04:16 +0200 Subject: [PATCH 11/20] reformatted after fix --- .../test_uncertainty/test_conformal.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index 4b6dd5b4..2d7734c2 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -38,7 +38,10 @@ def test_unified_conformal_classifier(self) -> None: def test_unified_conformal_regressor(self) -> None: """Test UnifiedConformalCV with a regressor.""" x, y, _ = make_regression( - n_samples=100, n_features=10, random_state=42, coef=True, + n_samples=100, + n_features=10, + random_state=42, + coef=True, ) x_train, x_calib, y_train, y_calib = train_test_split( x, @@ -70,7 +73,10 @@ def test_cross_conformal_classifier(self) -> None: def test_cross_conformal_regressor(self) -> None: """Test CrossConformalCV with a regressor.""" x, y, _ = make_regression( - n_samples=100, n_features=10, random_state=42, coef=True, + n_samples=100, + n_features=10, + random_state=42, + coef=True, ) reg = RandomForestRegressor(random_state=42) ccp = CrossConformalCV(reg, estimator_type="regressor", n_folds=3) From 9629327465c76e31aca30f98bef9f6aac60879c2 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Fri, 11 Jul 2025 13:21:41 +0200 Subject: [PATCH 12/20] addressed pr comments, types, docus, mondrian, fit and calib flags, tests, random_states, hide priv functions --- .../experimental/uncertainty/__init__.py | 2 +- .../experimental/uncertainty/conformal.py | 753 ++++++++---------- .../advanced_04_conformal_prediction.ipynb | 132 ++- .../test_uncertainty/test_conformal.py | 177 ++-- 4 files changed, 513 insertions(+), 551 deletions(-) diff --git a/molpipeline/experimental/uncertainty/__init__.py b/molpipeline/experimental/uncertainty/__init__.py index 1dbfef58..ffbbd9c0 100644 --- a/molpipeline/experimental/uncertainty/__init__.py +++ b/molpipeline/experimental/uncertainty/__init__.py @@ -1,4 +1,4 @@ -"""Experimental uncertainty wrappers for conformal prediction in MolPipeline. +"""Wrappers for conformal prediction in MolPipeline. Provides CrossConformalCV and UnifiedConformalCV for robust uncertainty quantification. """ diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py index 13b3fa65..7e65ce1b 100644 --- a/molpipeline/experimental/uncertainty/conformal.py +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -1,36 +1,37 @@ -"""Conformal prediction wrappers for classification and regression using crepes. - -Provides unified and cross-conformal prediction with Mondrian and nonconformity options. """ +Conformal prediction wrappers for classification and regression models. -# pylint: disable=too-many-instance-attributes, attribute-defined-outside-init - -from typing import Any, cast +This module provides unified implementations of conformal prediction for +uncertainty quantification with both classification and regression models. +""" -import numpy as np from crepes import WrapClassifier, WrapRegressor -from crepes.extras import MondrianCategorizer -from numpy.typing import NDArray -from scipy.stats import mode +from sklearn.base import is_classifier, is_regressor +from sklearn.model_selection import StratifiedKFold, KFold +from crepes.extras import hinge, margin, MondrianCategorizer, DifficultyEstimator +import numpy as np +import numpy.typing as npt +from typing import Any, Callable, Optional, Literal, List, Union from sklearn.base import BaseEstimator, clone -from sklearn.model_selection import KFold, StratifiedKFold +from sklearn.utils import check_random_state +from scipy.stats import mode -def bin_targets(y: NDArray[Any], n_bins: int = 10) -> NDArray[np.int_]: - """Bin continuous targets for stratified splitting in regression. +def _bin_targets(y: npt.NDArray[Any], n_bins: int = 10) -> npt.NDArray[np.int_]: + """ + Bin continuous targets for stratified splitting in regression. Parameters ---------- - y : np.ndarray + y : npt.NDArray[Any] Target values. n_bins : int, optional Number of bins (default: 10). Returns ------- - np.ndarray + npt.NDArray[np.int_] Binned targets. - """ y = np.asarray(y) bins = np.linspace(np.min(y), np.max(y), n_bins + 1) @@ -40,69 +41,44 @@ def bin_targets(y: NDArray[Any], n_bins: int = 10) -> NDArray[np.int_]: class UnifiedConformalCV(BaseEstimator): - """Conformal prediction wrapper for both classifiers and regressors. - - Uses crepes under the hood. - - Parameters - ---------- - estimator : sklearn-like estimator - Your favorite model (or pipeline). - mondrian : bool/callable/MondrianCategorizer, optional - If True, use class-conditional (Mondrian) calibration. If callable or - MondrianCategorizer, use as custom group function/categorizer. - confidence_level : float, optional - How confident should we be? (default: 0.9) - estimator_type : {'classifier', 'regressor'}, optional - What kind of model are we wrapping? - nonconformity : callable, optional - Nonconformity function for classification (e.g., hinge, margin, or custom). - difficulty_estimator : callable or DifficultyEstimator, optional - For regression: difficulty estimator for normalized conformal prediction. - binning : int or callable, optional - For regression: number of bins or binning function for Mondrian calibration. - n_jobs : int, optional - Parallelize all the things. - kwargs : dict - Extra toppings for crepes. - - """ - def __init__( self, - estimator: Any, - mondrian: Any = False, + estimator: BaseEstimator, + mondrian: bool | Callable | MondrianCategorizer = False, confidence_level: float = 0.9, - estimator_type: str = "classifier", - nonconformity: Any | None = None, - difficulty_estimator: Any | None = None, - binning: Any | None = None, + estimator_type: Literal["auto", "classifier", "regressor"] = "auto", + nonconformity: Optional[Callable] = None, + difficulty_estimator: Optional[Callable] = None, + binning: Optional[int | Callable] = None, n_jobs: int = 1, - **kwargs: Any, - ) -> None: - """Initialize UnifiedConformalCV. + random_state: Optional[int] = None, + **kwargs: Any + ): + """ + Unified conformal prediction wrapper for both classifiers and regressors. Parameters ---------- - estimator : Any - The base estimator or pipeline to wrap. - mondrian : Any, optional - Mondrian calibration/grouping (default: False). + estimator : BaseEstimator + The underlying model or pipeline to wrap. + mondrian : bool, callable, or MondrianCategorizer, optional + If True, use class-conditional (Mondrian) calibration. If callable or MondrianCategorizer, use as custom group function/categorizer. confidence_level : float, optional Confidence level for prediction sets/intervals (default: 0.9). - estimator_type : str, optional - Type of estimator: 'classifier' or 'regressor' (default: 'classifier'). - nonconformity : Any, optional - Nonconformity function for classification. - difficulty_estimator : Any, optional - Difficulty estimator for normalized conformal prediction (regression). - binning : Any, optional - Number of bins or binning function for Mondrian calibration (regression). + estimator_type : Literal["auto", "classifier", "regressor"], optional + Type of estimator. If "auto", will infer using sklearn's is_classifier/is_regressor. + nonconformity : callable, optional + Nonconformity function for classification (e.g., hinge, margin, or custom). + difficulty_estimator : callable, optional + For regression: difficulty estimator for normalized conformal prediction. + binning : int or callable, optional + For regression: number of bins or binning function for Mondrian calibration. n_jobs : int, optional - Number of parallel jobs (default: 1). - **kwargs : Any + Number of parallel jobs to use. + random_state : int or None, optional + Random state for reproducibility. + **kwargs : dict Additional keyword arguments for crepes. - """ self.estimator = estimator self.mondrian = mondrian @@ -113,273 +89,203 @@ def __init__( self.binning = binning self.n_jobs = n_jobs self.kwargs = kwargs + self.random_state = check_random_state(random_state) if random_state is not None else None + self.fitted_ = False + self.calibrated_ = False + self._conformal = None + + # Determine estimator_type if auto + if estimator_type == "auto": + if is_classifier(estimator): + self._resolved_estimator_type = "classifier" + elif is_regressor(estimator): + self._resolved_estimator_type = "regressor" + else: + raise ValueError( + "Could not automatically determine estimator_type. " + "Please specify 'classifier' or 'regressor'." + ) + else: + self._resolved_estimator_type = estimator_type - def fit(self, x: NDArray[Any], y: NDArray[Any]) -> "UnifiedConformalCV": - """Fit the conformal predictor. - - Parameters - ---------- - x : np.ndarray - Training features. - y : np.ndarray - Training targets. - - Returns - ------- - UnifiedConformalCV - Self. + def _get_mondrian_param_classification(self, mondrian, y_calib): + if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): + return mondrian + elif mondrian is True: + return y_calib + else: + return None - Raises - ------ - ValueError - If estimator_type is not 'classifier' or 'regressor'. + def _get_mondrian_param_regression(self, mondrian, y_calib): + if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): + return mondrian + elif mondrian is True: + return y_calib + else: + return None + + def get_params(self, deep=True): + return { + "estimator": self.estimator, + "mondrian": self.mondrian, + "confidence_level": self.confidence_level, + "estimator_type": self.estimator_type, + "nonconformity": self.nonconformity, + "difficulty_estimator": self.difficulty_estimator, + "binning": self.binning, + "n_jobs": self.n_jobs, + "random_state": self.random_state, + **self.kwargs, + } + + def set_params(self, **params): + for key, value in params.items(): + setattr(self, key, value) + return self - """ - if self.estimator_type == "classifier": + def fit(self, X: npt.NDArray[Any], y: npt.NDArray[Any], **fit_params: Any) -> "UnifiedConformalCV": + if self._resolved_estimator_type == "classifier": self._conformal = WrapClassifier(clone(self.estimator)) - elif self.estimator_type == "regressor": + elif self._resolved_estimator_type == "regressor": self._conformal = WrapRegressor(clone(self.estimator)) else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") - self._conformal.fit(x, y) + self._conformal.fit(X, y, **fit_params) self.fitted_ = True + self.models_ = [self._conformal] + return self def calibrate( self, - x_calib: NDArray[Any], - y_calib: NDArray[Any], + X_calib: npt.NDArray[Any], + y_calib: npt.NDArray[Any], **calib_params: Any, ) -> None: - """Calibrate the conformal predictor. - - Parameters - ---------- - x_calib : np.ndarray - Calibration features. - y_calib : np.ndarray - Calibration targets. - calib_params : dict - Additional calibration parameters. - - Raises - ------ - ValueError - If estimator_type is not 'classifier' or 'regressor'. - - """ - if self.estimator_type == "classifier": - mondrian = self.mondrian - if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): - self._conformal.calibrate(x_calib, y_calib, mc=mondrian, **calib_params) - elif mondrian is True: - # Use class labels as Mondrian categories - self._conformal.calibrate(x_calib, y_calib, mc=y_calib, **calib_params) - else: - self._conformal.calibrate(x_calib, y_calib, **calib_params) - elif self.estimator_type == "regressor": - mondrian = self.mondrian - if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): - mc = mondrian - else: - mc = None - self._conformal.calibrate(x_calib, y_calib, mc=mc, **calib_params) + if self._resolved_estimator_type == "classifier": + nc = self.nonconformity if self.nonconformity is not None else hinge + mc = self._get_mondrian_param_classification(self.mondrian, y_calib) + self._conformal.calibrate(X_calib, y_calib, nc=nc, mc=mc, **calib_params) + self.calibrated_ = True + + elif self._resolved_estimator_type == "regressor": + de = self.difficulty_estimator + mc = self._get_mondrian_param_regression(self.mondrian, y_calib) + self._conformal.calibrate(X_calib, y_calib, de=de, mc=mc, **calib_params) + self.calibrated_ = True else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") - def predict(self, x: NDArray[Any]) -> NDArray[Any]: - """Predict using the conformal predictor. - - Parameters - ---------- - x : np.ndarray - Features to predict. - - Returns - ------- - np.ndarray - Predictions. - - """ - return self._conformal.predict(x) - - def predict_proba(self, x: NDArray[Any]) -> NDArray[Any]: - """Predict probabilities using the conformal predictor. - - Parameters - ---------- - x : np.ndarray - Features to predict. - - Returns - ------- - np.ndarray - Predicted probabilities. + def predict(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: + return self._conformal.predict(X) - Raises - ------ - NotImplementedError - If called for a regressor. - - """ - if self.estimator_type != "classifier": + def predict_proba(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: + if self._resolved_estimator_type != "classifier": raise NotImplementedError("predict_proba is for classifiers only.") - conformal = cast("WrapClassifier", self._conformal) - return conformal.predict_proba(x) + return self._conformal.predict_proba(X) def predict_conformal_set( self, - x: NDArray[Any], + X: npt.NDArray[Any], confidence: float | None = None, - ) -> Any: - """Predict conformal sets. - - Parameters - ---------- - x : np.ndarray - Features to predict. - confidence : float, optional - Confidence level. - - Returns - ------- - Any - Conformal prediction sets. - - Raises - ------ - NotImplementedError - If called for a regressor. - + ) -> list[list[Any]]: """ - if self.estimator_type != "classifier": - raise NotImplementedError( - "predict_conformal_set is only for classification.", - ) - conf = confidence if confidence is not None else self.confidence_level - conformal = cast("WrapClassifier", self._conformal) - return conformal.predict_set(x, confidence=conf) - - def predict_p(self, x: NDArray[Any], **kwargs: Any) -> Any: - """Predict p-values. + Predict conformal sets for classification. Parameters ---------- - x : np.ndarray - Features to predict. - kwargs : dict - Additional parameters. + X : npt.NDArray[Any] + Input features. + confidence : float or None, optional + Confidence level for prediction set (default: self.confidence_level). Returns ------- - Any - p-values. - - Raises - ------ - NotImplementedError - If called for a regressor. - + list[list[Any]] + List of conformal sets (per sample), each a list of class labels. """ - if self.estimator_type != "classifier": + if self._resolved_estimator_type != "classifier": + raise NotImplementedError("predict_conformal_set is only for classification.") + if not self.fitted_: + raise RuntimeError("You must fit the model before calling predict_conformal_set.") + + # Default confidence to self.confidence_level if not provided + confidence = confidence if confidence is not None else self.confidence_level + + pred_set_bin = self._conformal.predict_set(X, confidence=confidence) + classes = self._conformal.learner.classes_ + return [list(np.array(classes)[row.astype(bool)]) for row in pred_set_bin] + + def predict_p(self, X: npt.NDArray[Any], **kwargs: Any) -> npt.NDArray[Any]: + if self._resolved_estimator_type != "classifier": raise NotImplementedError("predict_p is only for classification.") - return self._conformal.predict_p(x, **kwargs) - - def predict_int(self, x: NDArray[Any], confidence: float | None = None) -> Any: - """Predict intervals. + return self._conformal.predict_p(X, **kwargs) + def predict_int(self, X: npt.NDArray[Any], confidence: float | None = None) -> npt.NDArray[Any]: + """ + Predict confidence intervals for regression. + Parameters ---------- - x : np.ndarray - Features to predict. - confidence : float, optional - Confidence level. - + X : npt.NDArray[Any] + Input features. + confidence : float or None, optional + Confidence level for intervals (default: self.confidence_level). + Returns ------- - Any - Prediction intervals. - - Raises - ------ - NotImplementedError - If called for a classifier. - + npt.NDArray[Any] + Array of prediction intervals, shape (n_samples, 2). """ - if self.estimator_type != "regressor": - raise NotImplementedError("predict_interval is only for regression.") + if self._resolved_estimator_type != "regressor": + raise NotImplementedError("predict_int is only for regression.") conf = confidence if confidence is not None else self.confidence_level - conformal = cast("WrapRegressor", self._conformal) - return conformal.predict_int(x, confidence=conf) + return self._conformal.predict_int(X, confidence=conf) + class CrossConformalCV(BaseEstimator): - """Cross-conformal prediction using WrapClassifier/WrapRegressor. - - Handles Mondrian (class_cond) logic as described. - - Parameters - ---------- - estimator : sklearn-like estimator - Your favorite model (or pipeline). - n_folds : int, optional - Number of cross-validation folds. - confidence_level : float, optional - Confidence level for prediction sets/intervals. - mondrian : bool/callable/MondrianCategorizer, optional - Mondrian calibration/grouping. - nonconformity : callable, optional - Nonconformity function for classification (e.g., hinge, margin, or custom). - difficulty_estimator : callable or DifficultyEstimator, optional - For regression: difficulty estimator for normalized conformal prediction. - binning : int or callable, optional - For regression: number of bins or binning function for Mondrian calibration. - estimator_type : {'classifier', 'regressor'}, optional - What kind of model are we wrapping? - n_bins : int, optional - Number of bins for stratified splitting in regression. - n_jobs : int, optional - Parallelize all the things. - kwargs : dict - Extra toppings for crepes. - - """ - def __init__( self, - estimator: Any, + estimator: BaseEstimator, n_folds: int = 5, confidence_level: float = 0.9, - mondrian: Any = False, - nonconformity: Any | None = None, - binning: Any | None = None, - estimator_type: str = "classifier", + mondrian: bool | Callable | MondrianCategorizer = False, + nonconformity: Optional[Callable] = None, + binning: Optional[int | Callable] = None, + estimator_type: Literal["auto", "classifier", "regressor"] = "auto", n_bins: int = 10, - **kwargs: Any, - ) -> None: - """Initialize CrossConformalCV. + difficulty_estimator: Optional[Callable] = None, + random_state: Optional[int] = None, + **kwargs: Any + ): + """ + Cross-conformal prediction for both classifiers and regressors using WrapClassifier/WrapRegressor. Parameters ---------- - estimator : Any - The base estimator or pipeline to wrap. + estimator : BaseEstimator + The underlying model or pipeline to wrap. n_folds : int, optional Number of cross-validation folds (default: 5). confidence_level : float, optional Confidence level for prediction sets/intervals (default: 0.9). - mondrian : Any, optional - Mondrian calibration/grouping (default: False). - nonconformity : Any, optional - Nonconformity function for classification. - binning : Any, optional - Number of bins or binning function for Mondrian calibration (regression). - estimator_type : str, optional - Type of estimator: 'classifier' or 'regressor' (default: 'classifier'). + mondrian : bool, callable, or MondrianCategorizer, optional + Mondrian calibration/grouping. + nonconformity : callable, optional + Nonconformity function for classification (e.g., hinge, margin, or custom). + binning : int or callable, optional + For regression: number of bins or binning function for Mondrian calibration. + estimator_type : Literal["auto", "classifier", "regressor"], optional + Type of estimator. If "auto", will infer using sklearn's is_classifier/is_regressor. n_bins : int, optional Number of bins for stratified splitting in regression (default: 10). - **kwargs : Any + difficulty_estimator : callable, optional + For regression: difficulty estimator for normalized conformal prediction. + random_state : int or None, optional + Random state for reproducibility. + **kwargs : dict Additional keyword arguments for crepes. - """ self.estimator = estimator self.n_folds = n_folds @@ -389,176 +295,193 @@ def __init__( self.binning = binning self.estimator_type = estimator_type self.n_bins = n_bins + self.difficulty_estimator = difficulty_estimator self.kwargs = kwargs + self.random_state = check_random_state(random_state) if random_state is not None else None + self.fitted_ = False + self.calibrated_ = False + + # Determine estimator_type if auto + if estimator_type == "auto": + if is_classifier(estimator): + self._resolved_estimator_type = "classifier" + elif is_regressor(estimator): + self._resolved_estimator_type = "regressor" + else: + raise ValueError( + "Could not automatically determine estimator_type. " + "Please specify 'classifier' or 'regressor'." + ) + else: + self._resolved_estimator_type = estimator_type + + def get_params(self, deep=True): + return { + "estimator": self.estimator, + "n_folds": self.n_folds, + "confidence_level": self.confidence_level, + "mondrian": self.mondrian, + "nonconformity": self.nonconformity, + "binning": self.binning, + "estimator_type": self.estimator_type, + "n_bins": self.n_bins, + "difficulty_estimator": self.difficulty_estimator, + "random_state": self.random_state, + **self.kwargs, + } + + def set_params(self, **params): + for key, value in params.items(): + setattr(self, key, value) + return self - def fit( - self, - x: NDArray[Any], - y: NDArray[Any], - ) -> "CrossConformalCV": - """Fit the cross-conformal predictor. - - Parameters - ---------- - x : np.ndarray - Training features. - y : np.ndarray - Training targets. - - Returns - ------- - CrossConformalCV - Self. - - Raises - ------ - ValueError - If estimator_type is not 'classifier' or 'regressor'. - - """ - x = np.array(x) - y = np.array(y) + def fit(self, X: npt.NDArray[Any], y: npt.NDArray[Any], **fit_params: Any) -> "CrossConformalCV": + X = np.asarray(X) + y = np.asarray(y) self.models_ = [] - if self.estimator_type == "classifier": - splitter = StratifiedKFold( - n_splits=self.n_folds, - shuffle=True, - random_state=42, - ) + self.mondrian_categorizers_ = [] # Store categorizers for each fold + self.calib_bins_ = [] # Store calibration bins for each fold + + if self._resolved_estimator_type == "classifier": + splitter = StratifiedKFold(n_splits=self.n_folds, shuffle=True, random_state=self.random_state) y_split = y - elif self.estimator_type == "regressor": - splitter = KFold(n_splits=self.n_folds, shuffle=True, random_state=42) - y_split = bin_targets(y, n_bins=self.n_bins) + elif self._resolved_estimator_type == "regressor": + splitter = KFold(n_splits=self.n_folds, shuffle=True, random_state=self.random_state) + y_split = _bin_targets(y, n_bins=self.n_bins) else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") - for train_idx, calib_idx in splitter.split(x, y_split): - if self.estimator_type == "classifier": + + for train_idx, calib_idx in splitter.split(X, y_split): + if self._resolved_estimator_type == "classifier": model = WrapClassifier(clone(self.estimator)) - model.fit(x[train_idx], y[train_idx]) - mondrian = self.mondrian - if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): - model.calibrate(x[calib_idx], y[calib_idx], mc=mondrian) - elif mondrian is True: - model.calibrate(x[calib_idx], y[calib_idx], mc=y[calib_idx]) + model.fit(X[train_idx], y[train_idx]) + if self.mondrian: + model.calibrate(X[calib_idx], y[calib_idx], nc=self.nonconformity or hinge, class_cond=True) else: - model.calibrate(x[calib_idx], y[calib_idx]) + model.calibrate(X[calib_idx], y[calib_idx], nc=self.nonconformity or hinge, class_cond=False) + self.mondrian_categorizers_.append(None) + self.calib_bins_.append(None) else: model = WrapRegressor(clone(self.estimator)) - model.fit(x[train_idx], y[train_idx]) - mondrian = self.mondrian - if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): - mc = mondrian + model.fit(X[train_idx], y[train_idx]) + de = None + if self.difficulty_estimator is not None: + de = DifficultyEstimator() + de.fit(X[calib_idx], y=y[calib_idx]) + if self.mondrian: + if self.binning is not None: + mc = MondrianCategorizer() + mc.fit(X[calib_idx], f=lambda X: y[calib_idx], no_bins=self.binning) + else: + mc = MondrianCategorizer() + mc.fit(X[calib_idx], f=lambda X: y[calib_idx]) + model.calibrate(X[calib_idx], y[calib_idx], de=de, mc=mc) + self.mondrian_categorizers_.append(mc) + self.calib_bins_.append(None) else: - mc = None - if self.binning is not None: - mc_obj = MondrianCategorizer() - calib_idx_val = calib_idx - - def _bin_func( - _: Any, - calib_idx_val: Any = calib_idx_val, - ) -> Any: - return y[calib_idx_val] - - mc_obj.fit(x[calib_idx], f=_bin_func, no_bins=self.binning) - mc = mc_obj - model.calibrate(x[calib_idx], y[calib_idx], mc=mc) + model.calibrate(X[calib_idx], y[calib_idx], de=de) + self.mondrian_categorizers_.append(None) + self.calib_bins_.append(None) self.models_.append(model) - return self - - def predict(self, x: NDArray[Any]) -> NDArray[Any]: - """Predict using the cross-conformal predictor. - - Parameters - ---------- - x : np.ndarray - Features to predict. + self.calibrated_ = True + self.fitted_ = True - Returns - ------- - np.ndarray - Predictions (majority vote). + return self - """ - result = np.array([m.predict(x) for m in self.models_]) - result = np.asarray(result) - if result.shape == (): - result = np.full((len(self.models_), len(x)), result) - if result.ndim == 1 and len(x) == 1: - result = result[:, np.newaxis] + def predict(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: + result = np.array([m.predict(X) for m in self.models_]) + if self._resolved_estimator_type == "regressor": + return np.mean(result, axis=0) pred_mode = mode(result, axis=0, keepdims=False) return np.ravel(pred_mode.mode) - def predict_proba(self, x: NDArray[Any]) -> NDArray[Any]: - """Predict probabilities using the cross-conformal predictor. - - Parameters - ---------- - x : np.ndarray - Features to predict. - - Returns - ------- - np.ndarray - Predicted probabilities (averaged). - - Raises - ------ - NotImplementedError - If called for a regressor. - - """ - if self.estimator_type != "classifier": - raise NotImplementedError("predict_proba is for classifiers only.") - binary_class_dim = 2 - result = np.array([m.predict_proba(x) for m in self.models_]) - if ( - result.ndim == binary_class_dim - and result.shape[1] == binary_class_dim - and len(x) == 1 - ): - result = result[:, np.newaxis, :] + def predict_proba(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: + result = np.array([m.predict_proba(X) for m in self.models_]) proba = np.atleast_2d(np.mean(result, axis=0)) - if proba.shape[0] != len(x): - proba = np.full((len(x), proba.shape[1]), np.nan) return proba def predict_conformal_set( self, - x: NDArray[Any], + X: npt.NDArray[Any], confidence: float | None = None, - ) -> list[list[Any]]: - """Predict conformal sets using the cross-conformal predictor. + ) -> List[List[Union[int]]]: + """ + Predict conformal sets for classification by union across folds. Parameters ---------- - x : np.ndarray - Features to predict. - confidence : float, optional - Confidence level. + X : npt.NDArray[Any] + Input features. + confidence : float or None, optional + Confidence level for prediction set (default: self.confidence_level). Returns ------- - list[list[Any]] - Union of conformal sets from all folds. - - Raises - ------ - NotImplementedError - If called for a regressor. - + List[List[Union[int]]] + List of conformal sets (per sample), each containing the class labels + that might be the true class with the specified confidence level. + For example, for a binary classifier with classes [0, 1], might return + [[0, 1], [1], [0, 1]] for three samples. """ - if self.estimator_type != "classifier": - raise NotImplementedError( - "predict_conformal_set is only for classification.", - ) - conf = confidence if confidence is not None else self.confidence_level - sets = [m.predict_set(x, confidence=conf) for m in self.models_] - n = len(x) - union_sets = [] + if self._resolved_estimator_type != "classifier": + raise NotImplementedError("predict_conformal_set is only for classification.") + if not self.fitted_: + raise RuntimeError("You must fit the model before calling predict_conformal_set.") + + # Default confidence to self.confidence_level if not provided + confidence = confidence if confidence is not None else self.confidence_level + + sets = [] + for m in self.models_: + pred_set_bin = m.predict_set(X, confidence=confidence) + classes = getattr(m.learner, "classes_", None) + if classes is None: + raise AttributeError("Underlying estimator does not expose 'classes_'.") + sets.append([list(np.array(classes)[row.astype(bool)]) for row in pred_set_bin]) + + n = len(X) + union_sets: list[list[Any]] = [] for i in range(n): union = set() for s in sets: union.update(s[i]) union_sets.append(list(union)) return union_sets + + def predict_int(self, X: npt.NDArray[Any], confidence: float | None = None) -> npt.NDArray[Any]: + """ + Predict confidence intervals for regression. + + Parameters + ---------- + X : npt.NDArray[Any] + Input features. + confidence : float or None, optional + Confidence level for intervals (default: self.confidence_level). + + Returns + ------- + npt.NDArray[Any] + Array of prediction intervals, shape (n_samples, 2). + """ + if self._resolved_estimator_type != "regressor": + raise NotImplementedError("predict_int is only for regression.") + conf = confidence if confidence is not None else self.confidence_level + intervals = [] + for i, model in enumerate(self.models_): + interval = model.predict_int(X, confidence=conf) + intervals.append(np.array(interval)) + # Return average lower/upper bounds across folds + intervals = np.array(intervals) # shape: (n_folds, n_samples, 2) + avg_intervals = np.nanmean(intervals, axis=0) + return avg_intervals + + + def predict_p(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: + """Return averaged conformal p-values across folds (classification only).""" + if self._resolved_estimator_type != "classifier": + raise NotImplementedError("predict_p is only for classification.") + # Each model in self.models_ has predict_p + pvals = np.array([m.predict_p(X) for m in self.models_]) # shape: (n_folds, n_samples, n_classes) + avg_pvals = np.mean(pvals, axis=0) # shape: (n_samples, n_classes) + return avg_pvals \ No newline at end of file diff --git a/notebooks/advanced_04_conformal_prediction.ipynb b/notebooks/advanced_04_conformal_prediction.ipynb index bba7b9d3..ba2203a4 100644 --- a/notebooks/advanced_04_conformal_prediction.ipynb +++ b/notebooks/advanced_04_conformal_prediction.ipynb @@ -126,7 +126,7 @@ " \"\"\"\n", " eps = 1e-12\n", " entropy = -probs * np.log(probs + eps) - (1 - probs) * np.log(1 - probs + eps)\n", - " return np.mean(entropy)" + " return np.mean(entropy)\n" ] }, { @@ -161,15 +161,12 @@ "# Featurization pipeline (NaN-safe)\n", "error_filter = ErrorFilter(filter_everything=True)\n", "error_replacer = FilterReinserter.from_error_filter(error_filter, fill_value=np.nan)\n", - "featurizer = Pipeline(\n", - " [\n", - " (\"smi2mol\", SmilesToMol()),\n", - " (\"error_filter\", error_filter),\n", - " (\"morgan\", MolToMorganFP(radius=2, n_bits=256, return_as=\"dense\")),\n", - " (\"error_replacer\", PostPredictionWrapper(error_replacer)),\n", - " ],\n", - " n_jobs=1,\n", - ")\n", + "featurizer = Pipeline([\n", + " (\"smi2mol\", SmilesToMol()),\n", + " (\"error_filter\", error_filter),\n", + " (\"morgan\", MolToMorganFP(radius=2, n_bits=256, return_as=\"dense\")),\n", + " (\"error_replacer\", PostPredictionWrapper(error_replacer)),\n", + "], n_jobs=1)\n", "X_feat = featurizer.transform(smiles)\n", "\n", "print(f\"Shape of X={X_feat.shape}, y_class={y_class.shape}, y_reg={y_reg.shape}\")\n", @@ -177,10 +174,7 @@ "# Generate indices for a single split\n", "indices = np.arange(len(y_class))\n", "train_idx, test_idx = train_test_split(\n", - " indices,\n", - " test_size=0.3,\n", - " random_state=42,\n", - " stratify=y_class,\n", + " indices, test_size=0.3, random_state=42, stratify=y_class,\n", ")\n", "\n", "# Use these indices for all splits\n", @@ -213,16 +207,8 @@ " \"ensemble_rf\": RandomForestClassifier(n_estimators=100, random_state=42),\n", "}\n", "metrics_list = [\n", - " \"NLL\",\n", - " \"ECE\",\n", - " \"Brier\",\n", - " \"Uncertainty Error Correlation\",\n", - " \"Sharpness\",\n", - " \"Balanced Accuracy\",\n", - " \"AUROC\",\n", - " \"AUPRC\",\n", - " \"F1 Score\",\n", - " \"MCC\",\n", + " \"NLL\", \"ECE\", \"Brier\", \"Uncertainty Error Correlation\", \"Sharpness\",\n", + " \"Balanced Accuracy\", \"AUROC\", \"AUPRC\", \"F1 Score\", \"MCC\",\n", "]\n", "results = []\n", "results_cp = []\n", @@ -246,13 +232,10 @@ "\n", " # --- Conformal Prediction (CrossConformalCV) ---\n", " rf = RandomForestClassifier(n_estimators=100, random_state=42)\n", - " rf_pipeline = Pipeline(\n", - " [\n", - " (\"featurizer\", featurizer),\n", - " (\"rf\", rf),\n", - " ],\n", - " n_jobs=1,\n", - " )\n", + " rf_pipeline = Pipeline([\n", + " (\"featurizer\", featurizer),\n", + " (\"rf\", rf),\n", + " ], n_jobs=1)\n", " cc_clf = CrossConformalCV(\n", " estimator=rf_pipeline,\n", " n_folds=5,\n", @@ -261,12 +244,11 @@ " )\n", " cc_clf.fit(smiles_tr, y_tr)\n", " # Average ensemble probabilities for the validation fold\n", - " probs_cp_ensemble = np.mean(\n", - " [m.predict_p(smiles_val) for m in cc_clf.models_], axis=0\n", - " )\n", - " probs_cp_ensemble_raw = np.mean(\n", - " [m.predict_proba(smiles_val) for m in cc_clf.models_], axis=0\n", - " )\n", + " probs_cp_ensemble = np.mean([m.predict_p(smiles_val) for m in cc_clf.models_],\n", + " axis=0)\n", + " probs_cp_ensemble_raw = np.mean([m.predict_proba(smiles_val) for m\n", + " in cc_clf.models_],\n", + " axis=0)\n", " p0 = probs_cp_ensemble[:, 0]\n", " p1 = probs_cp_ensemble[:, 1]\n", " p1_norm = p1 / (p0 + p1 + 1e-12)\n", @@ -274,14 +256,12 @@ " oof_preds_cp_raw[val_idx] = probs_cp_ensemble_raw[:, 1]\n", "\n", "# Create a DataFrame to compare raw and normalized conformal probabilities\n", - "df_oof_compare = pd.DataFrame(\n", - " {\n", - " \"y_true\": y_train,\n", - " \"StandardModel\": oof_preds,\n", - " \"ConformalRaw\": oof_preds_cp_raw,\n", - " \"ConformalNorm\": oof_preds_cp_norm,\n", - " }\n", - ")\n", + "df_oof_compare = pd.DataFrame({\n", + " \"y_true\": y_train,\n", + " \"StandardModel\": oof_preds,\n", + " \"ConformalRaw\": oof_preds_cp_raw,\n", + " \"ConformalNorm\": oof_preds_cp_norm,\n", + "})\n", "\n", "# Compute metrics for out-of-fold predictions (standard model)\n", "mean_pred = (oof_preds >= THRESHOLD).astype(int)\n", @@ -354,6 +334,14 @@ "output_type": "display_data" } ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "629b1099", + "metadata": {}, + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -365,9 +353,8 @@ "bins = np.linspace(0, 1, 21)\n", "\n", "\n", - "def plot_percentage_line(\n", - " probs: np.ndarray, bins: np.ndarray, label: str, color: str\n", - ") -> None:\n", + "def plot_percentage_line(probs: np.ndarray, bins: np.ndarray, label: str,\n", + " color: str) -> None:\n", " \"\"\"Plot percentage of predictions in each probability bin.\"\"\"\n", " counts, bin_edges = np.histogram(probs, bins=bins)\n", " percent = 100 * counts / len(probs)\n", @@ -632,16 +619,14 @@ "p1 = p_vals[:, 1]\n", "p1_norm = p1 / (p0 + p1 + 1e-12)\n", "\n", - "df_cp_class = pd.DataFrame(\n", - " {\n", - " \"SMILES\": smiles_test,\n", - " \"p0\": p0,\n", - " \"p1\": p1,\n", - " \"p1_norm\": p1_norm,\n", - " \"conformal_set\": conf_pred_sets,\n", - " \"true_label\": y_test,\n", - " }\n", - ")\n", + "df_cp_class = pd.DataFrame({\n", + " \"SMILES\": smiles_test,\n", + " \"p0\": p0,\n", + " \"p1\": p1,\n", + " \"p1_norm\": p1_norm,\n", + " \"conformal_set\": conf_pred_sets,\n", + " \"true_label\": y_test,\n", + "})\n", "display(df_cp_class.head())\n", "\n", "\n", @@ -671,7 +656,7 @@ "print(\"Brier:\", brier_score_loss(y_test, p1_norm))\n", "print(\"AUROC:\", roc_auc_score(y_test, p1_norm))\n", "print(\"F1:\", f1_score(y_test, (p1_norm >= THRESHOLD).astype(int)))\n", - "print(\"MCC:\", matthews_corrcoef(y_test, (p1_norm >= THRESHOLD).astype(int)))" + "print(\"MCC:\", matthews_corrcoef(y_test, (p1_norm >= THRESHOLD).astype(int)))\n" ] }, { @@ -889,12 +874,9 @@ "\n", "# --- Wrap regressor with CrossConformalCV ---\n", "rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)\n", - "rf_reg_pipeline = Pipeline(\n", - " [\n", - " (\"rf\", rf_reg),\n", - " ],\n", - " n_jobs=1,\n", - ")\n", + "rf_reg_pipeline = Pipeline([\n", + " (\"rf\", rf_reg),\n", + "], n_jobs=1)\n", "\n", "cc_reg = CrossConformalCV(\n", " estimator=rf_reg_pipeline,\n", @@ -911,15 +893,13 @@ "upper = intervals_mean[:, 1]\n", "point_pred = np.mean([m.predict(X_test_reg) for m in cc_reg.models_], axis=0)\n", "\n", - "df_cp_reg = pd.DataFrame(\n", - " {\n", - " \"pubchem_smiles\": smiles_test_reg,\n", - " \"pIC50\": y_test_reg,\n", - " \"pred_lower\": lower,\n", - " \"pred_upper\": upper,\n", - " \"point_pred\": point_pred,\n", - " }\n", - ")\n", + "df_cp_reg = pd.DataFrame({\n", + " \"pubchem_smiles\": smiles_test_reg,\n", + " \"pIC50\": y_test_reg,\n", + " \"pred_lower\": lower,\n", + " \"pred_upper\": upper,\n", + " \"point_pred\": point_pred,\n", + "})\n", "display(df_cp_reg.head())\n", "\n", "# --- Regression: Evaluate coverage and interval width ---\n", @@ -929,7 +909,7 @@ "\n", "print(f\"Interval coverage: {coverage_reg:.3f}\")\n", "print(f\"Average interval width: {avg_width:.3f}\")\n", - "print(f\"MAE (point prediction): {mae:.3f}\")" + "print(f\"MAE (point prediction): {mae:.3f}\")\n" ] } ], @@ -949,7 +929,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index 2d7734c2..6ed0695c 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -1,92 +1,151 @@ -"""Unit tests for conformal prediction wrappers.""" +"""Unit tests for conformal prediction wrappers using real datasets.""" import unittest - -from sklearn.datasets import make_classification, make_regression +import pandas as pd +import numpy as np +from rdkit import Chem from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.model_selection import train_test_split - +from sklearn.pipeline import Pipeline from molpipeline.experimental.uncertainty.conformal import ( CrossConformalCV, UnifiedConformalCV, ) +from molpipeline.any2mol import SmilesToMol +from molpipeline.mol2any import MolToMorganFP + + +class TestConformalCVWithRealData(unittest.TestCase): + """Unit tests for UnifiedConformalCV and CrossConformalCV using real datasets.""" + + @classmethod + def setUpClass(cls) -> None: + """Set up the test by loading the datasets.""" + # Paths to the datasets + logd_path = "tests/test_data/molecule_net_logd.tsv.gz" + bbbp_path = "tests/test_data/molecule_net_bbbp.tsv.gz" + + # Load the datasets directly from the .gz files + cls.logd_data = pd.read_csv(logd_path, compression="gzip", sep="\t", nrows=100) + cls.bbbp_data = pd.read_csv(bbbp_path, compression="gzip", sep="\t", nrows=100) + + # Initialize the pipeline + smi2mol = SmilesToMol() + mol2morgan = MolToMorganFP(radius=2, n_bits=2048) + cls.pipeline = Pipeline( + [ + ("smi2mol", smi2mol), + ("morgan", mol2morgan), + ] + ) + + def featurize_smiles(self, smiles: pd.Series, labels: pd.Series) -> tuple[np.ndarray, np.ndarray]: + """Featurize SMILES strings into Morgan fingerprints and filter corresponding labels.""" + # Validate SMILES strings + valid_smiles = [] + valid_labels = [] + for smi, label in zip(smiles, labels): + mol = Chem.MolFromSmiles(smi) + if mol is not None: + valid_smiles.append(smi) + valid_labels.append(label) + else: + print(f"Warning: Invalid SMILES string skipped: {smi}") + + # Transform valid SMILES to fingerprints + try: + matrix = self.pipeline.fit_transform(valid_smiles) + return matrix.toarray(), np.array(valid_labels) # Convert sparse matrix to dense array + except Exception as e: + print(f"Error during featurization: {e}") + raise + def test_unified_conformal_regressor_logd(self) -> None: + """Test UnifiedConformalCV with a regressor on the logd dataset.""" + x, y = self.featurize_smiles(self.logd_data["smiles"], self.logd_data["exp"]) + + # Split into train and calibration sets + x_train, x_calib, y_train, y_calib = train_test_split( + x, y, test_size=0.2, random_state=42 + ) + + # Initialize and test the UnifiedConformalCV regressor + reg = RandomForestRegressor(n_estimators=5, random_state=42) + cp = UnifiedConformalCV(reg, estimator_type="auto") + cp.fit(x_train, y_train) + cp.calibrate(x_calib, y_calib) + + # Prediction intervals + intervals = cp.predict_int(x_calib) + + # Assertions + self.assertEqual(intervals.shape[0], len(y_calib)) + self.assertEqual(intervals.shape[1], 2) # Lower and upper bounds + self.assertTrue(np.all(intervals[:, 0] <= intervals[:, 1])) # Valid intervals -class TestConformalCV(unittest.TestCase): - """Unit tests for UnifiedConformalCV and CrossConformalCV wrappers.""" + def test_unified_conformal_classifier_bbbp(self) -> None: + """Test UnifiedConformalCV with a classifier on the bbbp dataset.""" + x, y = self.featurize_smiles(self.bbbp_data["smiles"], self.bbbp_data["p_np"]) - def test_unified_conformal_classifier(self) -> None: - """Test UnifiedConformalCV with a classifier.""" - x, y = make_classification(n_samples=100, n_features=10, random_state=42) + # Split into train and calibration sets x_train, x_calib, y_train, y_calib = train_test_split( - x, - y, - test_size=0.2, - random_state=42, + x, y, test_size=0.2, random_state=42 ) - clf = RandomForestClassifier(random_state=42) - cp = UnifiedConformalCV(clf, estimator_type="classifier") + + # Initialize and test the UnifiedConformalCV classifier + clf = RandomForestClassifier(n_estimators=5, random_state=42) + cp = UnifiedConformalCV(clf, estimator_type="auto") cp.fit(x_train, y_train) cp.calibrate(x_calib, y_calib) + + # Predictions preds = cp.predict(x_calib) probs = cp.predict_proba(x_calib) sets = cp.predict_conformal_set(x_calib) + + # Assertions self.assertEqual(len(preds), len(y_calib)) self.assertEqual(probs.shape[0], len(y_calib)) self.assertEqual(len(sets), len(y_calib)) + self.assertTrue(all(len(s) > 0 for s in sets)) # Ensure non-empty sets - def test_unified_conformal_regressor(self) -> None: - """Test UnifiedConformalCV with a regressor.""" - x, y, _ = make_regression( - n_samples=100, - n_features=10, - random_state=42, - coef=True, - ) - x_train, x_calib, y_train, y_calib = train_test_split( - x, - y, - test_size=0.2, - random_state=42, - ) - reg = RandomForestRegressor(random_state=42) - cp = UnifiedConformalCV(reg, estimator_type="regressor") - cp.fit(x_train, y_train) - cp.calibrate(x_calib, y_calib) - intervals = cp.predict_int(x_calib) - self.assertEqual(intervals.shape[0], len(y_calib)) - self.assertEqual(intervals.shape[1], 2) + def test_cross_conformal_regressor_logd(self) -> None: + """Test CrossConformalCV with a regressor on the logd dataset.""" + x, y = self.featurize_smiles(self.logd_data["smiles"], self.logd_data["exp"]) - def test_cross_conformal_classifier(self) -> None: - """Test CrossConformalCV with a classifier.""" - x, y = make_classification(n_samples=100, n_features=10, random_state=42) - clf = RandomForestClassifier(random_state=42) - ccp = CrossConformalCV(clf, estimator_type="classifier", n_folds=3) + # Initialize and test the CrossConformalCV regressor + reg = RandomForestRegressor(n_estimators=5, random_state=42) + ccp = CrossConformalCV(reg, estimator_type="auto", n_folds=3) ccp.fit(x, y) + + # Prediction intervals + intervals = ccp.predict_int(x) + + # Assertions + self.assertEqual(intervals.shape[0], len(y)) + self.assertEqual(intervals.shape[1], 2) # Lower and upper bounds + self.assertTrue(np.all(intervals[:, 0] <= intervals[:, 1])) # Valid intervals + + def test_cross_conformal_classifier_bbbp(self) -> None: + """Test CrossConformalCV with a classifier on the bbbp dataset.""" + x, y = self.featurize_smiles(self.bbbp_data["smiles"], self.bbbp_data["p_np"]) + + # Initialize and test the CrossConformalCV classifier + clf = RandomForestClassifier(n_estimators=5, random_state=42) + ccp = CrossConformalCV(clf, estimator_type="auto", n_folds=3) + ccp.fit(x, y) + + # Predictions preds = ccp.predict(x) probs = ccp.predict_proba(x) sets = ccp.predict_conformal_set(x) + + # Assertions self.assertEqual(len(preds), len(y)) self.assertEqual(probs.shape[0], len(y)) self.assertEqual(len(sets), len(y)) - - def test_cross_conformal_regressor(self) -> None: - """Test CrossConformalCV with a regressor.""" - x, y, _ = make_regression( - n_samples=100, - n_features=10, - random_state=42, - coef=True, - ) - reg = RandomForestRegressor(random_state=42) - ccp = CrossConformalCV(reg, estimator_type="regressor", n_folds=3) - ccp.fit(x, y) - # Each model should produce intervals for all samples - for model in ccp.models_: - intervals = model.predict_int(x) - self.assertEqual(intervals.shape[0], len(y)) - self.assertEqual(intervals.shape[1], 2) + self.assertTrue(all(len(s) > 0 for s in sets)) # Ensure non-empty sets if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file From f1cf6e51f7af00e5148db4216231bad46d7d6fba Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Mon, 14 Jul 2025 23:39:09 +0200 Subject: [PATCH 13/20] addressed comments and made mondrian and split modular and wrote extensive tests --- .../experimental/uncertainty/__init__.py | 8 +- .../experimental/uncertainty/conformal.py | 1162 ++++++++++++----- .../test_uncertainty/test_conformal.py | 532 ++++++-- tests/test_pipeline.py | 14 +- 4 files changed, 1271 insertions(+), 445 deletions(-) diff --git a/molpipeline/experimental/uncertainty/__init__.py b/molpipeline/experimental/uncertainty/__init__.py index ffbbd9c0..27e28194 100644 --- a/molpipeline/experimental/uncertainty/__init__.py +++ b/molpipeline/experimental/uncertainty/__init__.py @@ -1,11 +1,11 @@ """Wrappers for conformal prediction in MolPipeline. -Provides CrossConformalCV and UnifiedConformalCV for robust uncertainty quantification. +Provides ConformalPredictor and CrossConformalPredictor for robust uncertainty quantification. """ from molpipeline.experimental.uncertainty.conformal import ( - CrossConformalCV, - UnifiedConformalCV, + ConformalPredictor, + CrossConformalPredictor, ) -__all__ = ["CrossConformalCV", "UnifiedConformalCV"] +__all__ = ["ConformalPredictor", "CrossConformalPredictor"] diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py index 7e65ce1b..dc0546f3 100644 --- a/molpipeline/experimental/uncertainty/conformal.py +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -1,25 +1,23 @@ -""" -Conformal prediction wrappers for classification and regression models. +"""Conformal prediction wrappers for classification and regression using crepes. -This module provides unified implementations of conformal prediction for -uncertainty quantification with both classification and regression models. +Provides unified and cross-conformal prediction with Mondrian and nonconformity options. """ -from crepes import WrapClassifier, WrapRegressor -from sklearn.base import is_classifier, is_regressor -from sklearn.model_selection import StratifiedKFold, KFold -from crepes.extras import hinge, margin, MondrianCategorizer, DifficultyEstimator +from collections.abc import Callable +from typing import Any, Literal + import numpy as np import numpy.typing as npt -from typing import Any, Callable, Optional, Literal, List, Union -from sklearn.base import BaseEstimator, clone -from sklearn.utils import check_random_state +from crepes import WrapClassifier, WrapRegressor +from crepes.extras import DifficultyEstimator, MondrianCategorizer from scipy.stats import mode +from sklearn.base import BaseEstimator, clone, is_classifier, is_regressor +from sklearn.model_selection import KFold, StratifiedKFold +from sklearn.utils import check_random_state def _bin_targets(y: npt.NDArray[Any], n_bins: int = 10) -> npt.NDArray[np.int_]: - """ - Bin continuous targets for stratified splitting in regression. + """Bin continuous targets for stratified splitting in regression. Parameters ---------- @@ -32,6 +30,7 @@ def _bin_targets(y: npt.NDArray[Any], n_bins: int = 10) -> npt.NDArray[np.int_]: ------- npt.NDArray[np.int_] Binned targets. + """ y = np.asarray(y) bins = np.linspace(np.min(y), np.max(y), n_bins + 1) @@ -40,252 +39,529 @@ def _bin_targets(y: npt.NDArray[Any], n_bins: int = 10) -> npt.NDArray[np.int_]: return y_binned -class UnifiedConformalCV(BaseEstimator): +def _detect_estimator_type( + estimator: BaseEstimator, +) -> Literal["classifier", "regressor"]: + """Automatically detect whether an estimator is a classifier or regressor. + + Parameters + ---------- + estimator : BaseEstimator + The estimator to check. + + Returns + ------- + Literal["classifier", "regressor"] + The detected estimator type. + + Raises + ------ + ValueError + If the estimator type cannot be determined. + + """ + if is_classifier(estimator): + return "classifier" + if is_regressor(estimator): + return "regressor" + raise ValueError( + f"Could not determine if {type(estimator).__name__} is a " + "classifier or regressor. Please specify estimator_type explicitly.", + ) + + +def _get_mondrian_param_classification( + mondrian: MondrianCategorizer | Callable[..., Any] | bool, + y_calib: npt.NDArray[Any], +) -> MondrianCategorizer | Callable[..., Any] | npt.NDArray[Any] | None: + """Get mondrian parameter for classification calibration. + + Returns + ------- + MondrianCategorizer | Callable[..., Any] | npt.NDArray[Any] | None + Mondrian parameter for classification calibration. + + """ + if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): + return mondrian + if mondrian is True: + return y_calib + return None + + +def _get_mondrian_param_regression( + mondrian: MondrianCategorizer | Callable[..., Any] | bool, +) -> MondrianCategorizer | None: + """Get mondrian parameter for regression calibration. + + Returns + ------- + MondrianCategorizer | None + Mondrian parameter for regression calibration. + + """ + if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): + return mondrian + return None + + +class ConformalPredictor(BaseEstimator): # pylint: disable=too-many-instance-attributes + """Conformal prediction wrapper for both classifiers and regressors. + + Uses crepes under the hood. + """ + def __init__( self, estimator: BaseEstimator, - mondrian: bool | Callable | MondrianCategorizer = False, + *, + mondrian: MondrianCategorizer | Callable[..., Any] | bool = False, confidence_level: float = 0.9, - estimator_type: Literal["auto", "classifier", "regressor"] = "auto", - nonconformity: Optional[Callable] = None, - difficulty_estimator: Optional[Callable] = None, - binning: Optional[int | Callable] = None, + estimator_type: Literal["classifier", "regressor", "auto"] = "auto", + nonconformity: ( + Callable[ + [npt.NDArray[Any], npt.NDArray[Any] | None, npt.NDArray[Any] | None], + npt.NDArray[Any], + ] + | None + ) = None, + difficulty_estimator: DifficultyEstimator | None = None, + binning: int | MondrianCategorizer | None = None, n_jobs: int = 1, - random_state: Optional[int] = None, - **kwargs: Any - ): - """ - Unified conformal prediction wrapper for both classifiers and regressors. + **kwargs: Any, + ) -> None: + """Initialize ConformalPredictor. Parameters ---------- estimator : BaseEstimator - The underlying model or pipeline to wrap. - mondrian : bool, callable, or MondrianCategorizer, optional - If True, use class-conditional (Mondrian) calibration. If callable or MondrianCategorizer, use as custom group function/categorizer. + The base estimator or pipeline to wrap. + mondrian : MondrianCategorizer | Callable[..., Any] | bool, optional + Mondrian calibration/grouping (default: False). confidence_level : float, optional Confidence level for prediction sets/intervals (default: 0.9). - estimator_type : Literal["auto", "classifier", "regressor"], optional - Type of estimator. If "auto", will infer using sklearn's is_classifier/is_regressor. - nonconformity : callable, optional - Nonconformity function for classification (e.g., hinge, margin, or custom). - difficulty_estimator : callable, optional - For regression: difficulty estimator for normalized conformal prediction. - binning : int or callable, optional - For regression: number of bins or binning function for Mondrian calibration. + estimator_type : Literal["classifier", "regressor", "auto"], optional + Type of estimator: 'classifier', 'regressor', or 'auto' to + detect automatically (default: 'auto'). + nonconformity : Callable, optional + Nonconformity function for classification that takes (X_prob, classes, y) + and returns non-conformity scores. Examples: hinge, margin from + crepes.extras. + difficulty_estimator : DifficultyEstimator | None, optional + Difficulty estimator for normalized conformal prediction (regression). + Should be a fitted DifficultyEstimator from crepes.extras. + binning : int | MondrianCategorizer | None, optional + Number of bins or MondrianCategorizer for Mondrian calibration (regression). n_jobs : int, optional - Number of parallel jobs to use. - random_state : int or None, optional - Random state for reproducibility. - **kwargs : dict + Number of parallel jobs (default: 1). + **kwargs : Any Additional keyword arguments for crepes. + """ self.estimator = estimator self.mondrian = mondrian self.confidence_level = confidence_level - self.estimator_type = estimator_type + if estimator_type == "auto": + self.estimator_type = _detect_estimator_type(estimator) + else: + self.estimator_type = estimator_type self.nonconformity = nonconformity self.difficulty_estimator = difficulty_estimator self.binning = binning self.n_jobs = n_jobs self.kwargs = kwargs - self.random_state = check_random_state(random_state) if random_state is not None else None + self._conformal: WrapClassifier | WrapRegressor | None = None self.fitted_ = False self.calibrated_ = False - self._conformal = None - - # Determine estimator_type if auto - if estimator_type == "auto": - if is_classifier(estimator): - self._resolved_estimator_type = "classifier" - elif is_regressor(estimator): - self._resolved_estimator_type = "regressor" - else: - raise ValueError( - "Could not automatically determine estimator_type. " - "Please specify 'classifier' or 'regressor'." - ) - else: - self._resolved_estimator_type = estimator_type - def _get_mondrian_param_classification(self, mondrian, y_calib): - if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): - return mondrian - elif mondrian is True: - return y_calib - else: - return None + def fit(self, x: npt.NDArray[Any], y: npt.NDArray[Any]) -> "ConformalPredictor": + """Fit the conformal predictor. - def _get_mondrian_param_regression(self, mondrian, y_calib): - if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): - return mondrian - elif mondrian is True: - return y_calib - else: - return None - - def get_params(self, deep=True): - return { - "estimator": self.estimator, - "mondrian": self.mondrian, - "confidence_level": self.confidence_level, - "estimator_type": self.estimator_type, - "nonconformity": self.nonconformity, - "difficulty_estimator": self.difficulty_estimator, - "binning": self.binning, - "n_jobs": self.n_jobs, - "random_state": self.random_state, - **self.kwargs, - } + Parameters + ---------- + x : npt.NDArray[Any] + Training features. + y : npt.NDArray[Any] + Training targets. - def set_params(self, **params): - for key, value in params.items(): - setattr(self, key, value) - return self + Returns + ------- + ConformalPredictor + Self. + + Raises + ------ + ValueError + If estimator_type is not 'classifier' or 'regressor'. - def fit(self, X: npt.NDArray[Any], y: npt.NDArray[Any], **fit_params: Any) -> "UnifiedConformalCV": - if self._resolved_estimator_type == "classifier": + """ + if self.estimator_type == "classifier": self._conformal = WrapClassifier(clone(self.estimator)) - elif self._resolved_estimator_type == "regressor": + elif self.estimator_type == "regressor": self._conformal = WrapRegressor(clone(self.estimator)) else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") - self._conformal.fit(X, y, **fit_params) - self.fitted_ = True - self.models_ = [self._conformal] + self._conformal.fit(x, y) + self.fitted_ = True return self def calibrate( self, - X_calib: npt.NDArray[Any], + x_calib: npt.NDArray[Any], y_calib: npt.NDArray[Any], **calib_params: Any, ) -> None: - if self._resolved_estimator_type == "classifier": - nc = self.nonconformity if self.nonconformity is not None else hinge - mc = self._get_mondrian_param_classification(self.mondrian, y_calib) - self._conformal.calibrate(X_calib, y_calib, nc=nc, mc=mc, **calib_params) - self.calibrated_ = True - - elif self._resolved_estimator_type == "regressor": - de = self.difficulty_estimator - mc = self._get_mondrian_param_regression(self.mondrian, y_calib) - self._conformal.calibrate(X_calib, y_calib, de=de, mc=mc, **calib_params) - self.calibrated_ = True + """Calibrate the conformal predictor. + + Parameters + ---------- + x_calib : npt.NDArray[Any] + Calibration features. + y_calib : npt.NDArray[Any] + Calibration targets. + calib_params : dict + Additional calibration parameters. + + Raises + ------ + ValueError + If estimator_type is not 'classifier' or 'regressor'. + RuntimeError + If the estimator must be fitted before calling calibrate. + + """ + if not self.fitted_ or self._conformal is None: + raise RuntimeError("Estimator must be fitted before calling calibrate") + if self.estimator_type == "classifier": + if self.mondrian is True: + self._conformal.calibrate( + x_calib, + y_calib, + class_cond=True, + **calib_params, + ) + elif isinstance( + self.mondrian, + (MondrianCategorizer, type(lambda: None)), + ) and callable(self.mondrian): + self._conformal.calibrate( + x_calib, + y_calib, + mc=self.mondrian, + **calib_params, + ) + else: + self._conformal.calibrate(x_calib, y_calib, **calib_params) + elif self.estimator_type == "regressor": + mc = _get_mondrian_param_regression(self.mondrian) + self._conformal.calibrate(x_calib, y_calib, mc=mc, **calib_params) else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") + self.calibrated_ = True + + def predict(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: + """Predict using the conformal predictor. - def predict(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: - return self._conformal.predict(X) + Parameters + ---------- + x : npt.NDArray[Any] + Features to predict. - def predict_proba(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: - if self._resolved_estimator_type != "classifier": + Returns + ------- + npt.NDArray[Any] + Predictions. + + Raises + ------ + ValueError + If estimator must be fitted before calling predict. + + """ + if not self.fitted_ or self._conformal is None: + raise ValueError("Estimator must be fitted before calling predict") + return self._conformal.predict(x) + + def predict_proba(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: + """Predict probabilities using the conformal predictor. + + Parameters + ---------- + x : npt.NDArray[Any] + Features to predict. + + Returns + ------- + npt.NDArray[Any] + Predicted probabilities. + + Raises + ------ + ValueError + If estimator must be fitted before calling predict_proba. + NotImplementedError + If called for a regressor. + RuntimeError + If the internal conformal wrapper is not of the expected type. + + """ + if not self.fitted_ or self._conformal is None: + raise ValueError("Estimator must be fitted before calling predict_proba") + if self.estimator_type != "classifier": raise NotImplementedError("predict_proba is for classifiers only.") - return self._conformal.predict_proba(X) + if isinstance(self._conformal, WrapClassifier): + return self._conformal.predict_proba(x) + raise RuntimeError("Expected WrapClassifier but got different type") def predict_conformal_set( self, - X: npt.NDArray[Any], + x: npt.NDArray[Any], confidence: float | None = None, - ) -> list[list[Any]]: + ) -> list[list[int]]: + """Predict conformal sets. + + Parameters + ---------- + x : npt.NDArray[Any] + Features to predict. + confidence : float, optional + Confidence level. + + Returns + ------- + list[list[int]] + Conformal prediction sets as list of lists containing class indices. + + Raises + ------ + ValueError + If estimator must be fitted before calling predict_conformal_set. + NotImplementedError + If called for a regressor. + RuntimeError + If the internal conformal wrapper is not of the expected type. + """ - Predict conformal sets for classification. + if not self.fitted_: + raise ValueError( + "Estimator must be fitted before calling predict_conformal_set", + ) + if self._conformal is None: + raise RuntimeError("Conformal wrapper is not initialized") + if self.estimator_type != "classifier": + raise NotImplementedError( + "predict_conformal_set is only for classification.", + ) + conf = confidence if confidence is not None else self.confidence_level + if isinstance(self._conformal, WrapClassifier): + prediction_sets_binary = self._conformal.predict_set(x, confidence=conf) + + prediction_sets = [] + for i in range(prediction_sets_binary.shape[0]): + class_indices = [ + j + for j in range(prediction_sets_binary.shape[1]) + if prediction_sets_binary[i, j] == 1 + ] + prediction_sets.append(class_indices) + + return prediction_sets + raise RuntimeError("Expected WrapClassifier but got different type") + + def predict_p(self, x: npt.NDArray[Any], **kwargs: Any) -> npt.NDArray[Any]: + """Predict p-values. Parameters ---------- - X : npt.NDArray[Any] - Input features. - confidence : float or None, optional - Confidence level for prediction set (default: self.confidence_level). + x : npt.NDArray[Any] + Features to predict. + kwargs : dict + Additional parameters. Returns ------- - list[list[Any]] - List of conformal sets (per sample), each a list of class labels. + npt.NDArray[Any] + p-values. + + Raises + ------ + ValueError + If estimator must be fitted before calling predict_p. + NotImplementedError + If called for a regressor. + RuntimeError + If the internal conformal wrapper is not of the expected type. + """ - if self._resolved_estimator_type != "classifier": - raise NotImplementedError("predict_conformal_set is only for classification.") if not self.fitted_: - raise RuntimeError("You must fit the model before calling predict_conformal_set.") - - # Default confidence to self.confidence_level if not provided - confidence = confidence if confidence is not None else self.confidence_level - - pred_set_bin = self._conformal.predict_set(X, confidence=confidence) - classes = self._conformal.learner.classes_ - return [list(np.array(classes)[row.astype(bool)]) for row in pred_set_bin] - - def predict_p(self, X: npt.NDArray[Any], **kwargs: Any) -> npt.NDArray[Any]: - if self._resolved_estimator_type != "classifier": + raise ValueError("Estimator must be fitted before calling predict_p") + if self._conformal is None: + raise RuntimeError("Conformal wrapper is not initialized") + if self.estimator_type != "classifier": raise NotImplementedError("predict_p is only for classification.") - return self._conformal.predict_p(X, **kwargs) + if isinstance(self._conformal, WrapClassifier): + return self._conformal.predict_p(x, **kwargs) + raise RuntimeError("Expected WrapClassifier but got different type") + + def predict_int( + self, + x: npt.NDArray[Any], + confidence: float | None = None, + ) -> npt.NDArray[Any]: + """Predict intervals. - def predict_int(self, X: npt.NDArray[Any], confidence: float | None = None) -> npt.NDArray[Any]: - """ - Predict confidence intervals for regression. - Parameters ---------- - X : npt.NDArray[Any] - Input features. - confidence : float or None, optional - Confidence level for intervals (default: self.confidence_level). - + x : npt.NDArray[Any] + Features to predict. + confidence : float, optional + Confidence level. + Returns ------- npt.NDArray[Any] - Array of prediction intervals, shape (n_samples, 2). + Prediction intervals of shape (n_samples, 2) with columns [lower, upper]. + + Raises + ------ + ValueError + If estimator must be fitted before calling predict_int. + NotImplementedError + If called for a classifier. + RuntimeError + If the internal conformal wrapper is not of the expected type. + """ - if self._resolved_estimator_type != "regressor": + if not self.fitted_: + raise ValueError("Estimator must be fitted before calling predict_int") + if self._conformal is None: + raise RuntimeError("Conformal wrapper is not initialized") + if self.estimator_type != "regressor": raise NotImplementedError("predict_int is only for regression.") conf = confidence if confidence is not None else self.confidence_level - return self._conformal.predict_int(X, confidence=conf) - + if isinstance(self._conformal, WrapRegressor): + return self._conformal.predict_int(x, confidence=conf) + raise RuntimeError("Expected WrapRegressor but got different type") + def get_params(self, deep: bool = True) -> dict[str, Any]: + """Get parameters for this estimator. + + Parameters + ---------- + deep : bool, optional + If True, will return the parameters for this estimator and + contained subobjects that are estimators. + + Returns + ------- + dict[str, Any] + Parameter names mapped to their values. + + """ + params = { + "estimator": self.estimator, + "mondrian": self.mondrian, + "confidence_level": self.confidence_level, + "estimator_type": self.estimator_type, + "nonconformity": self.nonconformity, + "difficulty_estimator": self.difficulty_estimator, + "binning": self.binning, + "n_jobs": self.n_jobs, + } + params.update(self.kwargs) + + if deep and hasattr(self.estimator, "get_params"): + estimator_params = self.estimator.get_params(deep=True) + params.update({f"estimator__{k}": v for k, v in estimator_params.items()}) + + return params + + def set_params(self, **params: Any) -> "ConformalPredictor": + """Set the parameters of this estimator. + + Parameters + ---------- + **params : dict + Estimator parameters. + + Returns + ------- + ConformalPredictor + This estimator. + + Raises + ------ + ValueError + + """ + valid_params = self.get_params(deep=False) + estimator_params: dict[str, Any] = {} + + for key, value in params.items(): + if key in valid_params: + setattr(self, key, value) + else: + raise ValueError( + f"Invalid parameter {key} for estimator {type(self).__name__}", + ) + + if estimator_params and hasattr(self.estimator, "set_params"): + self.estimator.set_params(**estimator_params) + + return self + + +class CrossConformalPredictor(BaseEstimator): # pylint: disable=too-many-instance-attributes + """Cross-conformal prediction using WrapClassifier/WrapRegressor.""" -class CrossConformalCV(BaseEstimator): def __init__( self, estimator: BaseEstimator, + *, n_folds: int = 5, confidence_level: float = 0.9, - mondrian: bool | Callable | MondrianCategorizer = False, - nonconformity: Optional[Callable] = None, - binning: Optional[int | Callable] = None, - estimator_type: Literal["auto", "classifier", "regressor"] = "auto", + mondrian: MondrianCategorizer | Callable[..., Any] | bool = False, + nonconformity: ( + Callable[ + [npt.NDArray[Any], npt.NDArray[Any] | None, npt.NDArray[Any] | None], + npt.NDArray[Any], + ] + | None + ) = None, + binning: int | MondrianCategorizer | None = None, + estimator_type: Literal["classifier", "regressor", "auto"] = "auto", n_bins: int = 10, - difficulty_estimator: Optional[Callable] = None, - random_state: Optional[int] = None, - **kwargs: Any - ): - """ - Cross-conformal prediction for both classifiers and regressors using WrapClassifier/WrapRegressor. + random_state: int | None = None, + **kwargs: Any, + ) -> None: + """Initialize CrossConformalPredictor. Parameters ---------- estimator : BaseEstimator - The underlying model or pipeline to wrap. + The base estimator or pipeline to wrap. n_folds : int, optional Number of cross-validation folds (default: 5). confidence_level : float, optional Confidence level for prediction sets/intervals (default: 0.9). - mondrian : bool, callable, or MondrianCategorizer, optional - Mondrian calibration/grouping. - nonconformity : callable, optional - Nonconformity function for classification (e.g., hinge, margin, or custom). - binning : int or callable, optional - For regression: number of bins or binning function for Mondrian calibration. - estimator_type : Literal["auto", "classifier", "regressor"], optional - Type of estimator. If "auto", will infer using sklearn's is_classifier/is_regressor. + mondrian : MondrianCategorizer | Callable[..., Any] | bool, optional + Mondrian calibration/grouping (default: False). + nonconformity : Callable, optional + Nonconformity function for classification that takes (X_prob, classes, y) + and returns non-conformity scores. Examples: hinge, margin from + crepes.extras. + binning : int | MondrianCategorizer | None, optional + Number of bins or MondrianCategorizer for Mondrian calibration (regression). + estimator_type : Literal["classifier", "regressor", "auto"], optional + Auto detects it automatically (default: 'auto'). n_bins : int, optional Number of bins for stratified splitting in regression (default: 10). - difficulty_estimator : callable, optional - For regression: difficulty estimator for normalized conformal prediction. - random_state : int or None, optional + random_state : int | None, optional Random state for reproducibility. - **kwargs : dict + **kwargs : Any Additional keyword arguments for crepes. + """ self.estimator = estimator self.n_folds = n_folds @@ -293,195 +569,433 @@ def __init__( self.mondrian = mondrian self.nonconformity = nonconformity self.binning = binning - self.estimator_type = estimator_type + if estimator_type == "auto": + self.estimator_type = _detect_estimator_type(estimator) + else: + self.estimator_type = estimator_type self.n_bins = n_bins - self.difficulty_estimator = difficulty_estimator + self.random_state = random_state # Store the original seed/state self.kwargs = kwargs - self.random_state = check_random_state(random_state) if random_state is not None else None + self.models_: list[WrapClassifier | WrapRegressor] = [] self.fitted_ = False - self.calibrated_ = False - - # Determine estimator_type if auto - if estimator_type == "auto": - if is_classifier(estimator): - self._resolved_estimator_type = "classifier" - elif is_regressor(estimator): - self._resolved_estimator_type = "regressor" - else: - raise ValueError( - "Could not automatically determine estimator_type. " - "Please specify 'classifier' or 'regressor'." - ) - else: - self._resolved_estimator_type = estimator_type - def get_params(self, deep=True): - return { - "estimator": self.estimator, - "n_folds": self.n_folds, - "confidence_level": self.confidence_level, - "mondrian": self.mondrian, - "nonconformity": self.nonconformity, - "binning": self.binning, - "estimator_type": self.estimator_type, - "n_bins": self.n_bins, - "difficulty_estimator": self.difficulty_estimator, - "random_state": self.random_state, - **self.kwargs, - } + def _create_splitter( + self, + y: npt.NDArray[Any], + rng: Any, + ) -> tuple[KFold | StratifiedKFold, npt.NDArray[Any]]: + """Create the appropriate splitter for cross-validation. - def set_params(self, **params): - for key, value in params.items(): - setattr(self, key, value) - return self + Parameters + ---------- + y : npt.NDArray[Any] + Target values. + rng : Any + Random state object. - def fit(self, X: npt.NDArray[Any], y: npt.NDArray[Any], **fit_params: Any) -> "CrossConformalCV": - X = np.asarray(X) - y = np.asarray(y) - self.models_ = [] - self.mondrian_categorizers_ = [] # Store categorizers for each fold - self.calib_bins_ = [] # Store calibration bins for each fold - - if self._resolved_estimator_type == "classifier": - splitter = StratifiedKFold(n_splits=self.n_folds, shuffle=True, random_state=self.random_state) + Returns + ------- + tuple[KFold | StratifiedKFold, npt.NDArray[Any]] + Splitter and y values for splitting. + + Raises + ------ + ValueError + If estimator_type is not 'classifier' or 'regressor'. + + """ + if self.estimator_type == "classifier": + splitter = StratifiedKFold( + n_splits=self.n_folds, + shuffle=True, + random_state=rng, + ) y_split = y - elif self._resolved_estimator_type == "regressor": - splitter = KFold(n_splits=self.n_folds, shuffle=True, random_state=self.random_state) + elif self.estimator_type == "regressor": + splitter = KFold( + n_splits=self.n_folds, + shuffle=True, + random_state=rng, + ) y_split = _bin_targets(y, n_bins=self.n_bins) else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") - - for train_idx, calib_idx in splitter.split(X, y_split): - if self._resolved_estimator_type == "classifier": - model = WrapClassifier(clone(self.estimator)) - model.fit(X[train_idx], y[train_idx]) - if self.mondrian: - model.calibrate(X[calib_idx], y[calib_idx], nc=self.nonconformity or hinge, class_cond=True) - else: - model.calibrate(X[calib_idx], y[calib_idx], nc=self.nonconformity or hinge, class_cond=False) - self.mondrian_categorizers_.append(None) - self.calib_bins_.append(None) + return splitter, y_split + + def _create_mondrian_categorizer( + self, + model: WrapRegressor, + y_calib_vals: npt.NDArray[Any], + ) -> tuple[MondrianCategorizer, Callable[..., Any]]: + """Create a MondrianCategorizer for regression binning. + + Parameters + ---------- + model : WrapRegressor + The fitted regression model. + y_calib_vals : npt.NDArray[Any] + Calibration target values. + + Returns + ------- + tuple[MondrianCategorizer, Callable[..., Any]] + Fitted MondrianCategorizer and binning function. + + """ + mc_obj = MondrianCategorizer() + y_min, y_max = np.min(y_calib_vals), np.max(y_calib_vals) + n_bins = self.binning + + def bin_func( + x_test: Any, + model: Any = model, + y_min: Any = y_min, + y_max: Any = y_max, + n_bins: Any = n_bins, + ) -> Any: + y_pred = model.predict(x_test) + bins = np.linspace(y_min, y_max, n_bins + 1) + binned = np.digitize(y_pred, bins) - 1 + return np.clip(binned, 0, n_bins - 1) + + return mc_obj, bin_func + + def _fit_single_model( + self, + x_array: npt.NDArray[Any], + y_array: npt.NDArray[Any], + train_idx: npt.NDArray[np.int_], + calib_idx: npt.NDArray[np.int_], + ) -> WrapClassifier | WrapRegressor: + """Fit and calibrate a single model for one fold. + + Parameters + ---------- + x_array : npt.NDArray[Any] + Feature array. + y_array : npt.NDArray[Any] + Target array. + train_idx : npt.NDArray[np.int_] + Training indices. + calib_idx : npt.NDArray[np.int_] + Calibration indices. + + Returns + ------- + WrapClassifier | WrapRegressor + Fitted and calibrated model. + + """ + if self.estimator_type == "classifier": + model = WrapClassifier(clone(self.estimator)) + model.fit(x_array[train_idx], y_array[train_idx]) + + if self.mondrian is True: + model.calibrate(x_array[calib_idx], y_array[calib_idx], class_cond=True) + elif isinstance( + self.mondrian, + (MondrianCategorizer, type(lambda: None)), + ) and callable(self.mondrian): + model.calibrate( + x_array[calib_idx], + y_array[calib_idx], + mc=self.mondrian, + ) else: - model = WrapRegressor(clone(self.estimator)) - model.fit(X[train_idx], y[train_idx]) - de = None - if self.difficulty_estimator is not None: - de = DifficultyEstimator() - de.fit(X[calib_idx], y=y[calib_idx]) - if self.mondrian: - if self.binning is not None: - mc = MondrianCategorizer() - mc.fit(X[calib_idx], f=lambda X: y[calib_idx], no_bins=self.binning) - else: - mc = MondrianCategorizer() - mc.fit(X[calib_idx], f=lambda X: y[calib_idx]) - model.calibrate(X[calib_idx], y[calib_idx], de=de, mc=mc) - self.mondrian_categorizers_.append(mc) - self.calib_bins_.append(None) - else: - model.calibrate(X[calib_idx], y[calib_idx], de=de) - self.mondrian_categorizers_.append(None) - self.calib_bins_.append(None) + model.calibrate(x_array[calib_idx], y_array[calib_idx]) + else: + model = WrapRegressor(clone(self.estimator)) + model.fit(x_array[train_idx], y_array[train_idx]) + mc = _get_mondrian_param_regression(self.mondrian) + if self.binning is not None and isinstance(self.binning, int): + mc_obj, bin_func = self._create_mondrian_categorizer( + model, + y_array[calib_idx], + ) + mc_obj.fit(x_array[calib_idx], f=bin_func, no_bins=self.binning) + mc = mc_obj + elif self.binning is not None: + mc = self.binning + model.calibrate(x_array[calib_idx], y_array[calib_idx], mc=mc) + return model + + def fit( + self, + x: npt.NDArray[Any], + y: npt.NDArray[Any], + ) -> "CrossConformalPredictor": + """Fit the cross-conformal predictor. + + Parameters + ---------- + x : npt.NDArray[Any] + Training features. + y : npt.NDArray[Any] + Training targets. + + Returns + ------- + CrossConformalPredictor + Self. + + """ + self.models_ = [] + rng = check_random_state(self.random_state) + splitter, y_split = self._create_splitter(y, rng) + + x_array = np.asarray(x) + y_array = np.asarray(y) + + for train_idx, calib_idx in splitter.split(x_array, y_split): + model = self._fit_single_model(x_array, y_array, train_idx, calib_idx) self.models_.append(model) - self.calibrated_ = True - self.fitted_ = True + self.fitted_ = True return self - def predict(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: - result = np.array([m.predict(X) for m in self.models_]) - if self._resolved_estimator_type == "regressor": - return np.mean(result, axis=0) + def predict(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: + """Predict using the cross-conformal predictor. + + Parameters + ---------- + x : npt.NDArray[Any] + Features to predict. + + Returns + ------- + npt.NDArray[Any] + Predictions (majority vote). + + Raises + ------ + ValueError + If estimator must be fitted before calling predict. + + """ + if not self.fitted_: + raise ValueError("Estimator must be fitted before calling predict") + result = np.array([m.predict(x) for m in self.models_]) pred_mode = mode(result, axis=0, keepdims=False) return np.ravel(pred_mode.mode) - def predict_proba(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: - result = np.array([m.predict_proba(X) for m in self.models_]) - proba = np.atleast_2d(np.mean(result, axis=0)) - return proba + def predict_proba(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: + """Predict probabilities using the cross-conformal predictor. + + Parameters + ---------- + x : npt.NDArray[Any] + Features to predict. + + Returns + ------- + npt.NDArray[Any] + Predicted probabilities (averaged). + + Raises + ------ + ValueError + If estimator must be fitted before calling predict_proba. + NotImplementedError + If called for a regressor. + + """ + if not self.fitted_: + raise ValueError("Estimator must be fitted before calling predict_proba") + if self.estimator_type != "classifier": + raise NotImplementedError("predict_proba is for classifiers only.") + result = np.array([m.predict_proba(x) for m in self.models_]) + return np.atleast_2d(np.mean(result, axis=0)) def predict_conformal_set( self, - X: npt.NDArray[Any], + x: npt.NDArray[Any], confidence: float | None = None, - ) -> List[List[Union[int]]]: - """ - Predict conformal sets for classification by union across folds. + ) -> list[list[int]]: + """Predict conformal sets using the cross-conformal predictor. Parameters ---------- - X : npt.NDArray[Any] - Input features. - confidence : float or None, optional - Confidence level for prediction set (default: self.confidence_level). + x : npt.NDArray[Any] + Features to predict. + confidence : float, optional + Confidence level. Returns ------- - List[List[Union[int]]] - List of conformal sets (per sample), each containing the class labels - that might be the true class with the specified confidence level. - For example, for a binary classifier with classes [0, 1], might return - [[0, 1], [1], [0, 1]] for three samples. + list[list[int]] + Conformal prediction sets as list of lists containing class indices. + + Raises + ------ + ValueError + If estimator must be fitted before calling predict_conformal_set. + NotImplementedError + If called for a regressor. + """ - if self._resolved_estimator_type != "classifier": - raise NotImplementedError("predict_conformal_set is only for classification.") if not self.fitted_: - raise RuntimeError("You must fit the model before calling predict_conformal_set.") - - # Default confidence to self.confidence_level if not provided - confidence = confidence if confidence is not None else self.confidence_level - - sets = [] - for m in self.models_: - pred_set_bin = m.predict_set(X, confidence=confidence) - classes = getattr(m.learner, "classes_", None) - if classes is None: - raise AttributeError("Underlying estimator does not expose 'classes_'.") - sets.append([list(np.array(classes)[row.astype(bool)]) for row in pred_set_bin]) - - n = len(X) - union_sets: list[list[Any]] = [] - for i in range(n): - union = set() - for s in sets: - union.update(s[i]) - union_sets.append(list(union)) - return union_sets - - def predict_int(self, X: npt.NDArray[Any], confidence: float | None = None) -> npt.NDArray[Any]: + raise ValueError( + "Estimator must be fitted before calling predict_conformal_set", + ) + if self.estimator_type != "classifier": + raise NotImplementedError( + "predict_conformal_set is only for classification.", + ) + conf = confidence if confidence is not None else self.confidence_level + + p_values_list = [m.predict_p(x) for m in self.models_] + aggregated_p_values = np.mean(p_values_list, axis=0) + + prediction_sets_binary = (aggregated_p_values >= 1 - conf).astype(int) + + prediction_sets = [] + for i in range(prediction_sets_binary.shape[0]): + class_indices = [ + j + for j in range(prediction_sets_binary.shape[1]) + if prediction_sets_binary[i, j] == 1 + ] + prediction_sets.append(class_indices) + + return prediction_sets + + def predict_p(self, x: npt.NDArray[Any], **kwargs: Any) -> npt.NDArray[Any]: + """Predict p-values using the cross-conformal predictor. + + Parameters + ---------- + x : npt.NDArray[Any] + Features to predict. + kwargs : dict + Additional parameters. + + Returns + ------- + npt.NDArray[Any] + Aggregated p-values from all folds. + + Raises + ------ + ValueError + If estimator must be fitted before calling predict_p. + NotImplementedError + If called for a regressor. + """ - Predict confidence intervals for regression. - + if not self.fitted_: + raise ValueError("Estimator must be fitted before calling predict_p") + if self.estimator_type != "classifier": + raise NotImplementedError("predict_p is only for classification.") + + p_values_list = [m.predict_p(x, **kwargs) for m in self.models_] + return np.mean(p_values_list, axis=0) + + def predict_int( + self, + x: npt.NDArray[Any], + confidence: float | None = None, + ) -> npt.NDArray[Any]: + """Predict intervals using the cross-conformal predictor. + Parameters ---------- - X : npt.NDArray[Any] - Input features. - confidence : float or None, optional - Confidence level for intervals (default: self.confidence_level). - + x : npt.NDArray[Any] + Features to predict. + confidence : float, optional + Confidence level. + Returns ------- npt.NDArray[Any] - Array of prediction intervals, shape (n_samples, 2). + Prediction intervals based on aggregated predictions. + + Raises + ------ + ValueError + If estimator must be fitted before calling predict_int. + NotImplementedError + If called for a classifier. + """ - if self._resolved_estimator_type != "regressor": + if not self.fitted_: + raise ValueError("Estimator must be fitted before calling predict_int") + if self.estimator_type != "regressor": raise NotImplementedError("predict_int is only for regression.") + conf = confidence if confidence is not None else self.confidence_level - intervals = [] - for i, model in enumerate(self.models_): - interval = model.predict_int(X, confidence=conf) - intervals.append(np.array(interval)) - # Return average lower/upper bounds across folds - intervals = np.array(intervals) # shape: (n_folds, n_samples, 2) - avg_intervals = np.nanmean(intervals, axis=0) - return avg_intervals - - - def predict_p(self, X: npt.NDArray[Any]) -> npt.NDArray[Any]: - """Return averaged conformal p-values across folds (classification only).""" - if self._resolved_estimator_type != "classifier": - raise NotImplementedError("predict_p is only for classification.") - # Each model in self.models_ has predict_p - pvals = np.array([m.predict_p(X) for m in self.models_]) # shape: (n_folds, n_samples, n_classes) - avg_pvals = np.mean(pvals, axis=0) # shape: (n_samples, n_classes) - return avg_pvals \ No newline at end of file + + intervals_list = [m.predict_int(x, confidence=conf) for m in self.models_] + + intervals_array = np.array(intervals_list) # shape: (n_folds, n_samples, 2) + lower_bounds = np.nanmean(intervals_array[:, :, 0], axis=0) + upper_bounds = np.nanmean(intervals_array[:, :, 1], axis=0) + + return np.column_stack([lower_bounds, upper_bounds]) + + def get_params(self, deep: bool = True) -> dict[str, Any]: + """Get parameters for this estimator. + + Parameters + ---------- + deep : bool, optional + If True, will return the parameters for this estimator and + contained subobjects that are estimators. + + Returns + ------- + dict[str, Any] + Parameter names mapped to their values. + + """ + params = { + "estimator": self.estimator, + "n_folds": self.n_folds, + "confidence_level": self.confidence_level, + "mondrian": self.mondrian, + "nonconformity": self.nonconformity, + "binning": self.binning, + "estimator_type": self.estimator_type, + "n_bins": self.n_bins, + "random_state": self.random_state, + } + params.update(self.kwargs) + + if deep and hasattr(self.estimator, "get_params"): + estimator_params = self.estimator.get_params(deep=True) + params.update({f"estimator__{k}": v for k, v in estimator_params.items()}) + + return params + + def set_params(self, **params: Any) -> "CrossConformalPredictor": + """Set the parameters of this estimator. + + Parameters + ---------- + **params : dict + Estimator parameters. + + Returns + ------- + CrossConformalPredictor + This estimator. + + Raises + ------ + ValueError + + """ + valid_params = self.get_params(deep=False) + estimator_params: dict[str, Any] = {} + + for key, value in params.items(): + if key in valid_params: + setattr(self, key, value) + else: + raise ValueError( + f"Invalid parameter {key} for estimator {type(self).__name__}", + ) + + if estimator_params and hasattr(self.estimator, "set_params"): + self.estimator.set_params(**estimator_params) + + return self diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index 6ed0695c..d81742ef 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -1,151 +1,463 @@ -"""Unit tests for conformal prediction wrappers using real datasets.""" +"""Unit tests for conformal prediction wrappers.""" import unittest -import pandas as pd +from pathlib import Path +from typing import Any + import numpy as np -from rdkit import Chem +import numpy.typing as npt +import pandas as pd +from crepes.extras import MondrianCategorizer, hinge, margin from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.model_selection import train_test_split -from sklearn.pipeline import Pipeline + +from molpipeline import Pipeline +from molpipeline.any2mol import SmilesToMol from molpipeline.experimental.uncertainty.conformal import ( - CrossConformalCV, - UnifiedConformalCV, + ConformalPredictor, + CrossConformalPredictor, ) -from molpipeline.any2mol import SmilesToMol from molpipeline.mol2any import MolToMorganFP +# Test data directory +TEST_DATA_DIR = Path(__file__).parent.parent.parent / "test_data" -class TestConformalCVWithRealData(unittest.TestCase): - """Unit tests for UnifiedConformalCV and CrossConformalCV using real datasets.""" +# Constants for fingerprints +FP_RADIUS = 2 +FP_SIZE = 1024 + + +class TestConformalCV(unittest.TestCase): + """Unit tests for ConformalPredictor and CrossConformalPredictor wrappers.""" + + # Class attributes for test data + x_clf: npt.NDArray[Any] + y_clf: npt.NDArray[Any] + x_reg: npt.NDArray[Any] + y_reg: npt.NDArray[Any] @classmethod def setUpClass(cls) -> None: - """Set up the test by loading the datasets.""" - # Paths to the datasets - logd_path = "tests/test_data/molecule_net_logd.tsv.gz" - bbbp_path = "tests/test_data/molecule_net_bbbp.tsv.gz" - - # Load the datasets directly from the .gz files - cls.logd_data = pd.read_csv(logd_path, compression="gzip", sep="\t", nrows=100) - cls.bbbp_data = pd.read_csv(bbbp_path, compression="gzip", sep="\t", nrows=100) - - # Initialize the pipeline - smi2mol = SmilesToMol() - mol2morgan = MolToMorganFP(radius=2, n_bits=2048) - cls.pipeline = Pipeline( - [ - ("smi2mol", smi2mol), - ("morgan", mol2morgan), - ] - ) + """Set up test data once for all tests.""" + # Load data + bbbp_df = pd.read_csv(TEST_DATA_DIR / "molecule_net_bbbp.tsv.gz", + sep="\t", compression="gzip") + logd_df = pd.read_csv(TEST_DATA_DIR / "molecule_net_logd.tsv.gz", + sep="\t", compression="gzip") + + # Set up pipeline stages separately to handle invalid molecules + smi2mol = SmilesToMol(n_jobs=1) + morgan = MolToMorganFP(radius=FP_RADIUS, n_bits=FP_SIZE, n_jobs=1) + + # Process classification data + bbbp_clean = bbbp_df.dropna(subset=["smiles", "p_np"]) + smiles_list = bbbp_clean["smiles"].tolist() + labels_list = bbbp_clean["p_np"].tolist() + + # Convert SMILES to molecules first, filter out invalid ones + molecules = smi2mol.fit_transform(smiles_list) + valid_clf_data = [] + + for mol, label in zip(molecules, labels_list, strict=False): + # Skip InvalidInstance objects + if mol is None or hasattr(mol, "_fields"): # InvalidInstance is a NamedTuple + continue + # Generate fingerprint for valid molecule + fp = morgan.transform([mol])[0] + if fp is not None and hasattr(fp, "toarray"): + valid_clf_data.append((fp.toarray().flatten(), label)) - def featurize_smiles(self, smiles: pd.Series, labels: pd.Series) -> tuple[np.ndarray, np.ndarray]: - """Featurize SMILES strings into Morgan fingerprints and filter corresponding labels.""" - # Validate SMILES strings - valid_smiles = [] - valid_labels = [] - for smi, label in zip(smiles, labels): - mol = Chem.MolFromSmiles(smi) - if mol is not None: - valid_smiles.append(smi) - valid_labels.append(label) - else: - print(f"Warning: Invalid SMILES string skipped: {smi}") - - # Transform valid SMILES to fingerprints - try: - matrix = self.pipeline.fit_transform(valid_smiles) - return matrix.toarray(), np.array(valid_labels) # Convert sparse matrix to dense array - except Exception as e: - print(f"Error during featurization: {e}") - raise - - def test_unified_conformal_regressor_logd(self) -> None: - """Test UnifiedConformalCV with a regressor on the logd dataset.""" - x, y = self.featurize_smiles(self.logd_data["smiles"], self.logd_data["exp"]) - - # Split into train and calibration sets + if not valid_clf_data: + raise ValueError("No valid classification data found") + + cls.x_clf, cls.y_clf = map(np.array, zip(*valid_clf_data, strict=False)) + + # Process regression data + logd_clean = logd_df.dropna(subset=["smiles", "exp"]) + smiles_list_reg = logd_clean["smiles"].tolist() + labels_list_reg = logd_clean["exp"].tolist() + + # Convert SMILES to molecules first, filter out invalid ones + molecules_reg = smi2mol.transform(smiles_list_reg) + valid_reg_data = [] + + for mol, label in zip(molecules_reg, labels_list_reg, strict=False): + # Skip InvalidInstance objects + if mol is None or hasattr(mol, "_fields"): # InvalidInstance is a NamedTuple + continue + # Generate fingerprint for valid molecule - ensure mol is valid + try: + fp = morgan.transform([mol])[0] + if fp is not None and hasattr(fp, "toarray"): + valid_reg_data.append((fp.toarray().flatten(), label)) + except (AttributeError, TypeError): + # Skip molecules that can't be processed + continue + + if not valid_reg_data: + raise ValueError("No valid regression data found") + + cls.x_reg, cls.y_reg = map(np.array, zip(*valid_reg_data, strict=False)) + + def test_conformal_prediction_classifier(self) -> None: + """Test ConformalPredictor with a classifier.""" x_train, x_calib, y_train, y_calib = train_test_split( - x, y, test_size=0.2, random_state=42 + self.x_clf, + self.y_clf, + test_size=0.2, + random_state=42, ) - - # Initialize and test the UnifiedConformalCV regressor - reg = RandomForestRegressor(n_estimators=5, random_state=42) - cp = UnifiedConformalCV(reg, estimator_type="auto") + clf = RandomForestClassifier(random_state=42, n_estimators=5) + cp = ConformalPredictor(clf, estimator_type="classifier") cp.fit(x_train, y_train) cp.calibrate(x_calib, y_calib) + preds = cp.predict(x_calib) + probs = cp.predict_proba(x_calib) + sets = cp.predict_conformal_set(x_calib) + p_values = cp.predict_p(x_calib) + + self.assertEqual(len(preds), len(y_calib)) + self.assertEqual(probs.shape[0], len(y_calib)) + self.assertEqual(len(sets), len(y_calib)) + self.assertEqual(len(p_values), len(y_calib)) - # Prediction intervals + def test_conformal_prediction_regressor(self) -> None: + """Test ConformalPredictor with a regressor.""" + x_train, x_calib, y_train, y_calib = train_test_split( + self.x_reg, + self.y_reg, + test_size=0.2, + random_state=42, + ) + reg = RandomForestRegressor(random_state=42, n_estimators=5) + cp = ConformalPredictor(reg, estimator_type="regressor") + cp.fit(x_train, y_train) + cp.calibrate(x_calib, y_calib) intervals = cp.predict_int(x_calib) - # Assertions self.assertEqual(intervals.shape[0], len(y_calib)) - self.assertEqual(intervals.shape[1], 2) # Lower and upper bounds - self.assertTrue(np.all(intervals[:, 0] <= intervals[:, 1])) # Valid intervals - - def test_unified_conformal_classifier_bbbp(self) -> None: - """Test UnifiedConformalCV with a classifier on the bbbp dataset.""" - x, y = self.featurize_smiles(self.bbbp_data["smiles"], self.bbbp_data["p_np"]) + self.assertEqual(intervals.shape[1], 2) - # Split into train and calibration sets + def test_confidence_level_effect_regression(self) -> None: + """Test that increasing confidence level increases interval width.""" x_train, x_calib, y_train, y_calib = train_test_split( - x, y, test_size=0.2, random_state=42 + self.x_reg, + self.y_reg, + test_size=0.2, + random_state=42, ) + reg = RandomForestRegressor(random_state=42, n_estimators=5) + cp = ConformalPredictor(reg, estimator_type="regressor") + cp.fit(x_train, y_train) + cp.calibrate(x_calib, y_calib) + + # Test different confidence levels + intervals_90 = cp.predict_int(x_calib, confidence=0.90) + intervals_95 = cp.predict_int(x_calib, confidence=0.95) + intervals_99 = cp.predict_int(x_calib, confidence=0.99) + + # Calculate average interval widths + width_90 = float(np.mean(intervals_90[:, 1] - intervals_90[:, 0])) + width_95 = float(np.mean(intervals_95[:, 1] - intervals_95[:, 0])) + width_99 = float(np.mean(intervals_99[:, 1] - intervals_99[:, 0])) - # Initialize and test the UnifiedConformalCV classifier - clf = RandomForestClassifier(n_estimators=5, random_state=42) - cp = UnifiedConformalCV(clf, estimator_type="auto") + # Higher confidence should lead to wider intervals + self.assertLess(width_90, width_95) + self.assertLess(width_95, width_99) + + def test_confidence_level_effect_classification(self) -> None: + """Test that lower confidence level increases prediction set size.""" + x_train, x_calib, y_train, y_calib = train_test_split( + self.x_clf, + self.y_clf, + test_size=0.2, + random_state=42, + ) + clf = RandomForestClassifier(random_state=42, n_estimators=5) + cp = ConformalPredictor(clf, estimator_type="classifier") cp.fit(x_train, y_train) cp.calibrate(x_calib, y_calib) - # Predictions - preds = cp.predict(x_calib) - probs = cp.predict_proba(x_calib) - sets = cp.predict_conformal_set(x_calib) + # Test different confidence levels + sets_90 = cp.predict_conformal_set(x_calib, confidence=0.90) + sets_95 = cp.predict_conformal_set(x_calib, confidence=0.95) + sets_99 = cp.predict_conformal_set(x_calib, confidence=0.99) - # Assertions - self.assertEqual(len(preds), len(y_calib)) - self.assertEqual(probs.shape[0], len(y_calib)) - self.assertEqual(len(sets), len(y_calib)) - self.assertTrue(all(len(s) > 0 for s in sets)) # Ensure non-empty sets + # Calculate average prediction set sizes + size_90 = float(np.mean([len(s) for s in sets_90])) + size_95 = float(np.mean([len(s) for s in sets_95])) + size_99 = float(np.mean([len(s) for s in sets_99])) + # Higher confidence should lead to larger prediction sets + self.assertLessEqual(size_90, size_95) + self.assertLessEqual(size_95, size_99) + + def test_cross_conformal_classifier(self) -> None: + """Test CrossConformalPredictor with a classifier.""" + clf = RandomForestClassifier(random_state=42, n_estimators=5) + ccp = CrossConformalPredictor(clf, estimator_type="classifier", n_folds=3) + ccp.fit(self.x_clf, self.y_clf) + preds = ccp.predict(self.x_clf) + probs = ccp.predict_proba(self.x_clf) + sets = ccp.predict_conformal_set(self.x_clf) + p_values = ccp.predict_p(self.x_clf) + + self.assertEqual(len(preds), len(self.y_clf)) + self.assertEqual(probs.shape[0], len(self.y_clf)) + self.assertEqual(len(sets), len(self.y_clf)) + self.assertEqual(len(p_values), len(self.y_clf)) + + def test_cross_conformal_regressor(self) -> None: + """Test CrossConformalPredictor with a regressor.""" + reg = RandomForestRegressor(random_state=42, n_estimators=5) + ccp = CrossConformalPredictor(reg, estimator_type="regressor", n_folds=3) + ccp.fit(self.x_reg, self.y_reg) + intervals = ccp.predict_int(self.x_reg) + + # Each model should produce intervals for all samples + for model in ccp.models_: + model_intervals = model.predict_int(self.x_reg) + self.assertEqual(model_intervals.shape[0], len(self.y_reg)) + self.assertEqual(model_intervals.shape[1], 2) + + # Aggregated intervals should have correct shape + self.assertEqual(intervals.shape[0], len(self.y_reg)) + self.assertEqual(intervals.shape[1], 2) + + def test_cross_conformal_confidence_effect_regression(self) -> None: + """Test confidence level effect in cross-conformal regression.""" + reg = RandomForestRegressor(random_state=42, n_estimators=5) + ccp = CrossConformalPredictor(reg, estimator_type="regressor", n_folds=3) + ccp.fit(self.x_reg, self.y_reg) + + # Test different confidence levels + intervals_90 = ccp.predict_int(self.x_reg, confidence=0.90) + intervals_95 = ccp.predict_int(self.x_reg, confidence=0.95) + intervals_99 = ccp.predict_int(self.x_reg, confidence=0.99) + + # Calculate average interval widths + width_90 = float(np.mean(intervals_90[:, 1] - intervals_90[:, 0])) + width_95 = float(np.mean(intervals_95[:, 1] - intervals_95[:, 0])) + width_99 = float(np.mean(intervals_99[:, 1] - intervals_99[:, 0])) + + # Higher confidence should lead to wider intervals + self.assertLess(width_90, width_95) + self.assertLess(width_95, width_99) + + def test_cross_conformal_confidence_effect_classification(self) -> None: + """Test confidence level effect in cross-conformal classification.""" + clf = RandomForestClassifier(random_state=42, n_estimators=5) + ccp = CrossConformalPredictor(clf, estimator_type="classifier", n_folds=3) + ccp.fit(self.x_clf, self.y_clf) + + # Test different confidence levels + sets_90 = ccp.predict_conformal_set(self.x_clf, confidence=0.90) + sets_95 = ccp.predict_conformal_set(self.x_clf, confidence=0.95) + sets_99 = ccp.predict_conformal_set(self.x_clf, confidence=0.99) + + # Calculate average prediction set sizes + size_90 = float(np.mean([len(s) for s in sets_90])) + size_95 = float(np.mean([len(s) for s in sets_95])) + size_99 = float(np.mean([len(s) for s in sets_99])) + + # Higher confidence should lead to larger prediction sets + self.assertLessEqual(size_90, size_95) + self.assertLessEqual(size_95, size_99) + + def test_auto_detection(self) -> None: + """Test automatic estimator type detection.""" + # Test classifier auto-detection + clf = RandomForestClassifier(random_state=42) + cp_clf = ConformalPredictor(clf, estimator_type="auto") + self.assertEqual(cp_clf.estimator_type, "classifier") + + # Test regressor auto-detection + reg = RandomForestRegressor(random_state=42) + cp_reg = ConformalPredictor(reg, estimator_type="auto") + self.assertEqual(cp_reg.estimator_type, "regressor") + + def test_nonconformity_functions(self) -> None: + """Test nonconformity functions for classification.""" + x_train, x_calib, y_train, y_calib = train_test_split( + self.x_clf, self.y_clf, test_size=0.2, random_state=42, + ) + + clf = RandomForestClassifier(random_state=42, n_estimators=5) + + # Test with hinge nonconformity + cp_hinge = ConformalPredictor(clf, estimator_type="classifier", + nonconformity=hinge) + cp_hinge.fit(x_train, y_train) + cp_hinge.calibrate(x_calib, y_calib) + sets_hinge = cp_hinge.predict_conformal_set(x_calib) + p_values_hinge = cp_hinge.predict_p(x_calib) + + # Test with margin nonconformity + cp_margin = ConformalPredictor(clf, estimator_type="classifier", + nonconformity=margin) + cp_margin.fit(x_train, y_train) + cp_margin.calibrate(x_calib, y_calib) + sets_margin = cp_margin.predict_conformal_set(x_calib) + p_values_margin = cp_margin.predict_p(x_calib) + + # Verify outputs have correct shapes + self.assertEqual(len(sets_hinge), len(y_calib)) + self.assertEqual(len(sets_margin), len(y_calib)) + self.assertEqual(len(p_values_hinge), len(y_calib)) + self.assertEqual(len(p_values_margin), len(y_calib)) + + # Different nonconformity functions should give different results + self.assertNotEqual(sets_hinge, sets_margin) + + def test_mondrian_conformal_classification(self) -> None: + """Test Mondrian conformal prediction for classification.""" + x_train, x_calib, y_train, y_calib = train_test_split( + self.x_clf, self.y_clf, test_size=0.2, random_state=42, + ) + + clf = RandomForestClassifier(random_state=42, n_estimators=5) + + # Test with custom MondrianCategorizer (skip mondrian=True for now) + mc = MondrianCategorizer() + # Simple categorizer based on first feature + mc.fit(x_calib, + f=lambda x: (x[:, 0] > np.median(x[:, 0])).astype(int), + no_bins=2) + + cp_mondrian_custom = ConformalPredictor(clf, estimator_type="classifier", + mondrian=mc) + cp_mondrian_custom.fit(x_train, y_train) + cp_mondrian_custom.calibrate(x_calib, y_calib) + sets_custom = cp_mondrian_custom.predict_conformal_set(x_calib) + p_values_custom = cp_mondrian_custom.predict_p(x_calib) + + # Test without Mondrian (baseline) + cp_baseline = ConformalPredictor(clf, estimator_type="classifier", + mondrian=False) + cp_baseline.fit(x_train, y_train) + cp_baseline.calibrate(x_calib, y_calib) + sets_baseline = cp_baseline.predict_conformal_set(x_calib) + + # Verify outputs have correct shapes + self.assertEqual(len(sets_custom), len(sets_baseline)) + self.assertEqual(len(p_values_custom), len(y_calib)) + + # Verify that prediction sets contain valid class indices + for pred_set in sets_custom: + self.assertIsInstance(pred_set, list) + for class_idx in pred_set: + self.assertIsInstance(class_idx, (int, np.integer)) + self.assertGreaterEqual(class_idx, 0) + + self.assertTrue(np.all(p_values_custom >= 0)) + self.assertTrue(np.all(p_values_custom <= 1)) + + def test_mondrian_conformal_regression(self) -> None: + """Test Mondrian conformal prediction for regression.""" + x_train, x_calib, y_train, y_calib = train_test_split( + self.x_reg, self.y_reg, test_size=0.2, random_state=42, + ) + + reg = RandomForestRegressor(random_state=42, n_estimators=5) + + # Test with custom MondrianCategorizer for regression + mc = MondrianCategorizer() + # Categorize based on median of first feature + mc.fit(x_calib, + f=lambda x: (x[:, 0] > np.median(x[:, 0])).astype(int), + no_bins=2) + + cp_mondrian = ConformalPredictor(reg, estimator_type="regressor", + mondrian=mc) + cp_mondrian.fit(x_train, y_train) + cp_mondrian.calibrate(x_calib, y_calib) + intervals_mondrian = cp_mondrian.predict_int(x_calib) + + # Test without Mondrian (baseline) + cp_baseline = ConformalPredictor(reg, estimator_type="regressor", + mondrian=False) + cp_baseline.fit(x_train, y_train) + cp_baseline.calibrate(x_calib, y_calib) + intervals_baseline = cp_baseline.predict_int(x_calib) + + # Verify outputs have correct shapes + self.assertEqual(intervals_mondrian.shape, (len(y_calib), 2)) + self.assertEqual(intervals_baseline.shape, (len(y_calib), 2)) + + # Mondrian should give different results than baseline + self.assertFalse(np.array_equal(intervals_mondrian, intervals_baseline)) + + def test_cross_conformal_mondrian_both_classes(self) -> None: + """Test Mondrian with CrossConformalPredictors.""" + # Test classification with custom MondrianCategorizer + clf = RandomForestClassifier(random_state=42, n_estimators=5) + + # Create a simple Mondrian categorizer for classification + mc_clf = MondrianCategorizer() + mc_clf.fit(self.x_clf, + f=lambda x: (x[:, 0] > np.median(x[:, 0])).astype(int), + no_bins=2) + + ccp_clf = CrossConformalPredictor(clf, estimator_type="classifier", + n_folds=3, mondrian=mc_clf, random_state=42) + ccp_clf.fit(self.x_clf, self.y_clf) + sets_mondrian = ccp_clf.predict_conformal_set(self.x_clf[:10]) + p_values_mondrian = ccp_clf.predict_p(self.x_clf[:10]) + + # Test without Mondrian for comparison + ccp_clf_baseline = CrossConformalPredictor(clf, estimator_type="classifier", + n_folds=3, mondrian=False, + random_state=42) + ccp_clf_baseline.fit(self.x_clf, self.y_clf) + sets_baseline = ccp_clf_baseline.predict_conformal_set(self.x_clf[:10]) + + # Verify shapes + self.assertEqual(len(sets_mondrian), len(sets_baseline)) + self.assertEqual(len(p_values_mondrian), 10) + + # Test regression with binning (Mondrian-style for regression) + reg = RandomForestRegressor(random_state=42, n_estimators=5) + ccp_reg = CrossConformalPredictor(reg, estimator_type="regressor", + n_folds=3, binning=3, random_state=42) + ccp_reg.fit(self.x_reg, self.y_reg) + intervals_binned = ccp_reg.predict_int(self.x_reg[:10]) - def test_cross_conformal_regressor_logd(self) -> None: - """Test CrossConformalCV with a regressor on the logd dataset.""" - x, y = self.featurize_smiles(self.logd_data["smiles"], self.logd_data["exp"]) + # Test without binning for comparison + ccp_reg_baseline = CrossConformalPredictor(reg, estimator_type="regressor", + n_folds=3, binning=None, + random_state=42) + ccp_reg_baseline.fit(self.x_reg, self.y_reg) + intervals_baseline_reg = ccp_reg_baseline.predict_int(self.x_reg[:10]) - # Initialize and test the CrossConformalCV regressor - reg = RandomForestRegressor(n_estimators=5, random_state=42) - ccp = CrossConformalCV(reg, estimator_type="auto", n_folds=3) - ccp.fit(x, y) + # Verify shapes + self.assertEqual(intervals_binned.shape, (10, 2)) + self.assertEqual(intervals_baseline_reg.shape, (10, 2)) - # Prediction intervals - intervals = ccp.predict_int(x) + def test_error_handling(self) -> None: + """Test error handling for various invalid operations.""" + clf = RandomForestClassifier(random_state=42, n_estimators=5) + cp = ConformalPredictor(clf, estimator_type="classifier") - # Assertions - self.assertEqual(intervals.shape[0], len(y)) - self.assertEqual(intervals.shape[1], 2) # Lower and upper bounds - self.assertTrue(np.all(intervals[:, 0] <= intervals[:, 1])) # Valid intervals + # Test prediction before fitting + with self.assertRaises(ValueError): + cp.predict(self.x_clf[:5]) - def test_cross_conformal_classifier_bbbp(self) -> None: - """Test CrossConformalCV with a classifier on the bbbp dataset.""" - x, y = self.featurize_smiles(self.bbbp_data["smiles"], self.bbbp_data["p_np"]) + # Test calibration before fitting + with self.assertRaises(RuntimeError): + cp.calibrate(self.x_clf[:10], self.y_clf[:10]) - # Initialize and test the CrossConformalCV classifier - clf = RandomForestClassifier(n_estimators=5, random_state=42) - ccp = CrossConformalCV(clf, estimator_type="auto", n_folds=3) - ccp.fit(x, y) + # Test predict_proba on regressor + reg = RandomForestRegressor(random_state=42, n_estimators=5) + cp_reg = ConformalPredictor(reg, estimator_type="regressor") + cp_reg.fit(self.x_reg[:50], self.y_reg[:50]) - # Predictions - preds = ccp.predict(x) - probs = ccp.predict_proba(x) - sets = ccp.predict_conformal_set(x) + with self.assertRaises(NotImplementedError): + cp_reg.predict_proba(self.x_reg[:5]) - # Assertions - self.assertEqual(len(preds), len(y)) - self.assertEqual(probs.shape[0], len(y)) - self.assertEqual(len(sets), len(y)) - self.assertTrue(all(len(s) > 0 for s in sets)) # Ensure non-empty sets + # Test predict_int on classifier + cp.fit(self.x_clf[:50], self.y_clf[:50]) + with self.assertRaises(NotImplementedError): + cp.predict_int(self.x_clf[:5]) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index c8e5bb53..4d3625f6 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -22,8 +22,8 @@ from molpipeline import ErrorFilter, FilterReinserter, Pipeline, PostPredictionWrapper from molpipeline.any2mol import AutoToMol, SmilesToMol from molpipeline.experimental.uncertainty.conformal import ( - CrossConformalCV, - UnifiedConformalCV, + ConformalPredictor, + CrossConformalPredictor ) from molpipeline.mol2any import MolToMorganFP, MolToRDKitPhysChem, MolToSmiles from molpipeline.mol2mol import ( @@ -405,7 +405,7 @@ def test_conformal_pipeline_classifier(self) -> None: # Build a pipeline: SMILES -> Mol -> MorganFP -> RF smi2mol = SmilesToMol() mol2morgan = MolToMorganFP(radius=2, n_bits=128) - rf = RandomForestClassifier(n_estimators=10, random_state=42) + rf = RandomForestClassifier(n_estimators=5, random_state=42) pipeline = Pipeline( [ ("smi2mol", smi2mol), @@ -422,8 +422,8 @@ def test_conformal_pipeline_classifier(self) -> None: random_state=42, ) - # UnifiedConformalCV - cp = UnifiedConformalCV(pipeline, estimator_type="classifier") + # ConformalPredictor + cp = ConformalPredictor(pipeline, estimator_type="classifier") cp.fit(X_train, y_train) cp.calibrate(X_calib, y_calib) preds = cp.predict(X_calib) @@ -433,8 +433,8 @@ def test_conformal_pipeline_classifier(self) -> None: self.assertEqual(probs.shape[0], len(y_calib)) self.assertEqual(len(sets), len(y_calib)) - # CrossConformalCV - ccp = CrossConformalCV(pipeline, estimator_type="classifier", n_folds=3) + # CrossConformalPredictor + ccp = CrossConformalPredictor(pipeline, estimator_type="classifier", n_folds=3) ccp.fit(smiles, y) preds_ccp = ccp.predict(smiles) probs_ccp = ccp.predict_proba(smiles) From bc239eb287d337da14a07be6c5f736e42df8cc92 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Tue, 15 Jul 2025 01:09:56 +0200 Subject: [PATCH 14/20] linted and formatted --- .../experimental/uncertainty/conformal.py | 418 ++++++++++-------- .../test_uncertainty/test_conformal.py | 53 +-- 2 files changed, 263 insertions(+), 208 deletions(-) diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py index dc0546f3..ef02299f 100644 --- a/molpipeline/experimental/uncertainty/conformal.py +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -1,7 +1,4 @@ -"""Conformal prediction wrappers for classification and regression using crepes. - -Provides unified and cross-conformal prediction with Mondrian and nonconformity options. -""" +"""Conformal prediction wrappers for classification and regression using crepes.""" from collections.abc import Callable from typing import Any, Literal @@ -19,23 +16,15 @@ def _bin_targets(y: npt.NDArray[Any], n_bins: int = 10) -> npt.NDArray[np.int_]: """Bin continuous targets for stratified splitting in regression. - Parameters - ---------- - y : npt.NDArray[Any] - Target values. - n_bins : int, optional - Number of bins (default: 10). - Returns ------- - npt.NDArray[np.int_] Binned targets. """ y = np.asarray(y) bins = np.linspace(np.min(y), np.max(y), n_bins + 1) - y_binned = np.digitize(y, bins) - 1 # bins start at 1 - y_binned[y_binned == n_bins] = n_bins - 1 # edge case + y_binned = np.digitize(y, bins) - 1 + y_binned[y_binned == n_bins] = n_bins - 1 return y_binned @@ -44,11 +33,6 @@ def _detect_estimator_type( ) -> Literal["classifier", "regressor"]: """Automatically detect whether an estimator is a classifier or regressor. - Parameters - ---------- - estimator : BaseEstimator - The estimator to check. - Returns ------- Literal["classifier", "regressor"] @@ -57,7 +41,7 @@ def _detect_estimator_type( Raises ------ ValueError - If the estimator type cannot be determined. + If type cannot be determined. """ if is_classifier(estimator): @@ -70,41 +54,6 @@ def _detect_estimator_type( ) -def _get_mondrian_param_classification( - mondrian: MondrianCategorizer | Callable[..., Any] | bool, - y_calib: npt.NDArray[Any], -) -> MondrianCategorizer | Callable[..., Any] | npt.NDArray[Any] | None: - """Get mondrian parameter for classification calibration. - - Returns - ------- - MondrianCategorizer | Callable[..., Any] | npt.NDArray[Any] | None - Mondrian parameter for classification calibration. - - """ - if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): - return mondrian - if mondrian is True: - return y_calib - return None - - -def _get_mondrian_param_regression( - mondrian: MondrianCategorizer | Callable[..., Any] | bool, -) -> MondrianCategorizer | None: - """Get mondrian parameter for regression calibration. - - Returns - ------- - MondrianCategorizer | None - Mondrian parameter for regression calibration. - - """ - if isinstance(mondrian, MondrianCategorizer) or callable(mondrian): - return mondrian - return None - - class ConformalPredictor(BaseEstimator): # pylint: disable=too-many-instance-attributes """Conformal prediction wrapper for both classifiers and regressors. @@ -141,15 +90,11 @@ def __init__( confidence_level : float, optional Confidence level for prediction sets/intervals (default: 0.9). estimator_type : Literal["classifier", "regressor", "auto"], optional - Type of estimator: 'classifier', 'regressor', or 'auto' to - detect automatically (default: 'auto'). + Type of estimator (default: "auto"). nonconformity : Callable, optional - Nonconformity function for classification that takes (X_prob, classes, y) - and returns non-conformity scores. Examples: hinge, margin from - crepes.extras. + Nonconformity function for classification. difficulty_estimator : DifficultyEstimator | None, optional Difficulty estimator for normalized conformal prediction (regression). - Should be a fitted DifficultyEstimator from crepes.extras. binning : int | MondrianCategorizer | None, optional Number of bins or MondrianCategorizer for Mondrian calibration (regression). n_jobs : int, optional @@ -157,14 +102,42 @@ def __init__( **kwargs : Any Additional keyword arguments for crepes. + Raises + ------ + ValueError + For invalid parameters. + """ + if not 0 < confidence_level < 1: + raise ValueError( + f"confidence_level must be in (0, 1), got {confidence_level}", + ) + + if estimator_type == "auto": + estimator_type = _detect_estimator_type(estimator) + elif estimator_type not in {"classifier", "regressor"}: + raise ValueError( + f"estimator_type must be 'classifier', 'regressor', " + f"or 'auto', got {estimator_type}", + ) + + if estimator_type == "regressor" and mondrian is True: + raise ValueError( + "mondrian=True is supported for classification.", + ) + + if binning is not None and estimator_type == "classifier": + raise ValueError( + "binning parameter is only supported for regression.", + ) + + if isinstance(binning, int) and binning <= 0: + raise ValueError(f"binning must be positive integer, got {binning}") + self.estimator = estimator self.mondrian = mondrian self.confidence_level = confidence_level - if estimator_type == "auto": - self.estimator_type = _detect_estimator_type(estimator) - else: - self.estimator_type = estimator_type + self.estimator_type = estimator_type self.nonconformity = nonconformity self.difficulty_estimator = difficulty_estimator self.binning = binning @@ -192,7 +165,9 @@ def fit(self, x: npt.NDArray[Any], y: npt.NDArray[Any]) -> "ConformalPredictor": Raises ------ ValueError - If estimator_type is not 'classifier' or 'regressor'. + For invalid types and uninitialized. + RuntimeError + For initialization failures. """ if self.estimator_type == "classifier": @@ -202,6 +177,8 @@ def fit(self, x: npt.NDArray[Any], y: npt.NDArray[Any]) -> "ConformalPredictor": else: raise ValueError("estimator_type must be 'classifier' or 'regressor'") + if self._conformal is None: # Type narrowing + raise RuntimeError("Failed to initialize conformal wrapper") self._conformal.fit(x, y) self.fitted_ = True return self @@ -225,39 +202,38 @@ def calibrate( Raises ------ - ValueError - If estimator_type is not 'classifier' or 'regressor'. RuntimeError - If the estimator must be fitted before calling calibrate. + If not fitted before calibrating. + ValueError + For validation errors. """ if not self.fitted_ or self._conformal is None: raise RuntimeError("Estimator must be fitted before calling calibrate") + + if self.estimator_type not in {"classifier", "regressor"}: + raise ValueError("estimator_type must be 'classifier' or 'regressor'") + kwargs: dict[str, Any] = calib_params.copy() if self.estimator_type == "classifier": + if self.nonconformity is not None: + kwargs["nc"] = self.nonconformity if self.mondrian is True: - self._conformal.calibrate( - x_calib, - y_calib, - class_cond=True, - **calib_params, - ) - elif isinstance( + kwargs["class_cond"] = True + elif isinstance(self.mondrian, MondrianCategorizer) or callable( self.mondrian, - (MondrianCategorizer, type(lambda: None)), - ) and callable(self.mondrian): - self._conformal.calibrate( - x_calib, - y_calib, - mc=self.mondrian, - **calib_params, - ) - else: - self._conformal.calibrate(x_calib, y_calib, **calib_params) - elif self.estimator_type == "regressor": - mc = _get_mondrian_param_regression(self.mondrian) - self._conformal.calibrate(x_calib, y_calib, mc=mc, **calib_params) - else: - raise ValueError("estimator_type must be 'classifier' or 'regressor'") + ): + kwargs["mc"] = self.mondrian + self._conformal.calibrate(x_calib, y_calib, **kwargs) + else: # regressor + if isinstance(self.mondrian, MondrianCategorizer) or callable( + self.mondrian, + ): + kwargs["mc"] = self.mondrian + if self.difficulty_estimator is not None: + kwargs["de"] = self.difficulty_estimator + if isinstance(self.binning, MondrianCategorizer): + kwargs["mc"] = self.binning + self._conformal.calibrate(x_calib, y_calib, **kwargs) self.calibrated_ = True def predict(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: @@ -276,7 +252,7 @@ def predict(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: Raises ------ ValueError - If estimator must be fitted before calling predict. + If not fitted. """ if not self.fitted_ or self._conformal is None: @@ -299,11 +275,11 @@ def predict_proba(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: Raises ------ ValueError - If estimator must be fitted before calling predict_proba. - NotImplementedError - If called for a regressor. + If not fitted. RuntimeError - If the internal conformal wrapper is not of the expected type. + If wrapper type is incorrect. + NotImplementedError + If called for regressor. """ if not self.fitted_ or self._conformal is None: @@ -326,7 +302,7 @@ def predict_conformal_set( x : npt.NDArray[Any] Features to predict. confidence : float, optional - Confidence level. + Confidence level. Must be in (0, 1). Returns ------- @@ -336,34 +312,38 @@ def predict_conformal_set( Raises ------ ValueError - If estimator must be fitted before calling predict_conformal_set. - NotImplementedError - If called for a regressor. + If not fitted or invalid confidence. RuntimeError - If the internal conformal wrapper is not of the expected type. + If wrapper not initialized. + NotImplementedError + If called for regressor. """ if not self.fitted_: raise ValueError( - "Estimator must be fitted before calling predict_conformal_set", + "Estimator must be fitted and calibrated before calling predict", ) if self._conformal is None: raise RuntimeError("Conformal wrapper is not initialized") + if not self.calibrated_: + raise ValueError( + "Conformal predictor must be calibrated before making predictions", + ) if self.estimator_type != "classifier": raise NotImplementedError( "predict_conformal_set is only for classification.", ) + conf = confidence if confidence is not None else self.confidence_level + if not 0 < conf < 1: + raise ValueError(f"confidence must be in (0, 1), got {conf}") + if isinstance(self._conformal, WrapClassifier): prediction_sets_binary = self._conformal.predict_set(x, confidence=conf) prediction_sets = [] for i in range(prediction_sets_binary.shape[0]): - class_indices = [ - j - for j in range(prediction_sets_binary.shape[1]) - if prediction_sets_binary[i, j] == 1 - ] + class_indices = np.where(prediction_sets_binary[i, :])[0].tolist() prediction_sets.append(class_indices) return prediction_sets @@ -387,17 +367,23 @@ def predict_p(self, x: npt.NDArray[Any], **kwargs: Any) -> npt.NDArray[Any]: Raises ------ ValueError - If estimator must be fitted before calling predict_p. - NotImplementedError - If called for a regressor. + If not fitted or not calibrated. RuntimeError - If the internal conformal wrapper is not of the expected type. + If wrapper not initialized. + NotImplementedError + If called for regressor. """ if not self.fitted_: - raise ValueError("Estimator must be fitted before calling predict_p") + raise ValueError( + "Estimator must be fitted and calibrated before calling predict_p", + ) if self._conformal is None: raise RuntimeError("Conformal wrapper is not initialized") + if not self.calibrated_: + raise ValueError( + "Conformal predictor must be calibrated before making predictions", + ) if self.estimator_type != "classifier": raise NotImplementedError("predict_p is only for classification.") if isinstance(self._conformal, WrapClassifier): @@ -416,7 +402,7 @@ def predict_int( x : npt.NDArray[Any] Features to predict. confidence : float, optional - Confidence level. + Confidence level. Must be in (0, 1). Returns ------- @@ -426,20 +412,31 @@ def predict_int( Raises ------ ValueError - If estimator must be fitted before calling predict_int. - NotImplementedError - If called for a classifier. + If not fitted or invalid confidence. RuntimeError - If the internal conformal wrapper is not of the expected type. + If wrapper not initialized. + NotImplementedError + If called for classifier. """ + if self.estimator_type != "regressor": + raise NotImplementedError("predict_int is only for regression.") + if not self.fitted_: - raise ValueError("Estimator must be fitted before calling predict_int") + raise ValueError( + "Estimator must be fitted and calibrated before calling predict_int", + ) if self._conformal is None: raise RuntimeError("Conformal wrapper is not initialized") - if self.estimator_type != "regressor": - raise NotImplementedError("predict_int is only for regression.") + if not self.calibrated_: + raise ValueError( + "Conformal predictor must be calibrated before making predictions", + ) + conf = confidence if confidence is not None else self.confidence_level + if not 0 < conf < 1: + raise ValueError(f"confidence must be in (0, 1), got {conf}") + if isinstance(self._conformal, WrapRegressor): return self._conformal.predict_int(x, confidence=conf) raise RuntimeError("Expected WrapRegressor but got different type") @@ -493,17 +490,23 @@ def set_params(self, **params: Any) -> "ConformalPredictor": Raises ------ ValueError + If invalid parameter provided. """ valid_params = self.get_params(deep=False) estimator_params: dict[str, Any] = {} for key, value in params.items(): - if key in valid_params: + if key.startswith("estimator__"): + # Handle nested estimator parameters + nested_key = key[len("estimator__") :] + estimator_params[nested_key] = value + elif key in valid_params: setattr(self, key, value) else: raise ValueError( - f"Invalid parameter {key} for estimator {type(self).__name__}", + f"Invalid parameter {key} for estimator {type(self).__name__}. " + f"Valid parameters: {list(valid_params.keys())}", ) if estimator_params and hasattr(self.estimator, "set_params"): @@ -512,8 +515,11 @@ def set_params(self, **params: Any) -> "ConformalPredictor": return self -class CrossConformalPredictor(BaseEstimator): # pylint: disable=too-many-instance-attributes - """Cross-conformal prediction using WrapClassifier/WrapRegressor.""" +class CrossConformalPredictor(ConformalPredictor): # pylint: disable=too-many-instance-attributes + """Cross-conformal prediction using WrapClassifier/WrapRegressor. + + Inherits from ConformalPredictor and extends it with cross-validation functionality. + """ def __init__( self, @@ -547,6 +553,7 @@ def __init__( Confidence level for prediction sets/intervals (default: 0.9). mondrian : MondrianCategorizer | Callable[..., Any] | bool, optional Mondrian calibration/grouping (default: False). + - True: Use class-conditional calibration for classification nonconformity : Callable, optional Nonconformity function for classification that takes (X_prob, classes, y) and returns non-conformity scores. Examples: hinge, margin from @@ -554,7 +561,7 @@ def __init__( binning : int | MondrianCategorizer | None, optional Number of bins or MondrianCategorizer for Mondrian calibration (regression). estimator_type : Literal["classifier", "regressor", "auto"], optional - Auto detects it automatically (default: 'auto'). + Type of estimator (default: 'auto'). n_bins : int, optional Number of bins for stratified splitting in regression (default: 10). random_state : int | None, optional @@ -562,22 +569,37 @@ def __init__( **kwargs : Any Additional keyword arguments for crepes. + Raises + ------ + ValueError + If parameter validation fails. + """ - self.estimator = estimator + # Additional validation for cross-conformal specific parameters + if n_folds <= 1: + raise ValueError(f"n_folds must be > 1, got {n_folds}") + + if n_bins <= 0: + raise ValueError(f"n_bins must be positive, got {n_bins}") + + # Initialize parent class + super().__init__( + estimator=estimator, + mondrian=mondrian, + confidence_level=confidence_level, + estimator_type=estimator_type, + nonconformity=nonconformity, + difficulty_estimator=None, # Not used in cross-conformal + binning=binning, + n_jobs=1, # Not used in cross-conformal + **kwargs, + ) + + # Cross-conformal specific attributes self.n_folds = n_folds - self.confidence_level = confidence_level - self.mondrian = mondrian - self.nonconformity = nonconformity - self.binning = binning - if estimator_type == "auto": - self.estimator_type = _detect_estimator_type(estimator) - else: - self.estimator_type = estimator_type self.n_bins = n_bins - self.random_state = random_state # Store the original seed/state - self.kwargs = kwargs + self.random_state = random_state self.models_: list[WrapClassifier | WrapRegressor] = [] - self.fitted_ = False def _create_splitter( self, @@ -686,37 +708,43 @@ def _fit_single_model( Fitted and calibrated model. """ + kwargs: dict[str, Any] = {} if self.estimator_type == "classifier": model = WrapClassifier(clone(self.estimator)) model.fit(x_array[train_idx], y_array[train_idx]) + if self.nonconformity is not None: + kwargs["nc"] = self.nonconformity if self.mondrian is True: - model.calibrate(x_array[calib_idx], y_array[calib_idx], class_cond=True) - elif isinstance( + kwargs["class_cond"] = True + elif isinstance(self.mondrian, MondrianCategorizer) or callable( self.mondrian, - (MondrianCategorizer, type(lambda: None)), - ) and callable(self.mondrian): - model.calibrate( - x_array[calib_idx], - y_array[calib_idx], - mc=self.mondrian, - ) - else: - model.calibrate(x_array[calib_idx], y_array[calib_idx]) - else: + ): + kwargs["mc"] = self.mondrian + + model.calibrate(x_array[calib_idx], y_array[calib_idx], **kwargs) + + else: # regressor model = WrapRegressor(clone(self.estimator)) model.fit(x_array[train_idx], y_array[train_idx]) - mc = _get_mondrian_param_regression(self.mondrian) + + if isinstance(self.mondrian, MondrianCategorizer) or callable( + self.mondrian, + ): + kwargs["mc"] = self.mondrian + if self.binning is not None and isinstance(self.binning, int): mc_obj, bin_func = self._create_mondrian_categorizer( model, y_array[calib_idx], ) mc_obj.fit(x_array[calib_idx], f=bin_func, no_bins=self.binning) - mc = mc_obj - elif self.binning is not None: - mc = self.binning - model.calibrate(x_array[calib_idx], y_array[calib_idx], mc=mc) + kwargs["mc"] = mc_obj + elif isinstance(self.binning, MondrianCategorizer): + kwargs["mc"] = self.binning + + model.calibrate(x_array[calib_idx], y_array[calib_idx], **kwargs) + return model def fit( @@ -751,8 +779,30 @@ def fit( self.models_.append(model) self.fitted_ = True + self.calibrated_ = True # Models are calibrated during fit return self + def calibrate( + self, + x_calib: npt.NDArray[Any], + y_calib: npt.NDArray[Any], + **calib_params: Any, + ) -> None: + """Calibrate method for cross-conformal predictor. + + Note: For CrossConformalPredictor, calibration happens automatically + during the fit() method. + + Raises + ------ + NotImplementedError + Cross-conformal calibration happens during fit(). + + """ + raise NotImplementedError( + "CrossConformalPredictor performs calibration automatically during fit(). ", + ) + def predict(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: """Predict using the cross-conformal predictor. @@ -764,7 +814,7 @@ def predict(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: Returns ------- npt.NDArray[Any] - Predictions (majority vote). + Predictions (majority vote for classification, mean for regression). Raises ------ @@ -774,9 +824,13 @@ def predict(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: """ if not self.fitted_: raise ValueError("Estimator must be fitted before calling predict") + + if self.estimator_type == "classifier": + result = np.array([m.predict(x) for m in self.models_]) + pred_mode = mode(result, axis=0, keepdims=False) + return np.ravel(pred_mode.mode) result = np.array([m.predict(x) for m in self.models_]) - pred_mode = mode(result, axis=0, keepdims=False) - return np.ravel(pred_mode.mode) + return np.mean(result, axis=0) def predict_proba(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: """Predict probabilities using the cross-conformal predictor. @@ -818,7 +872,7 @@ def predict_conformal_set( x : npt.NDArray[Any] Features to predict. confidence : float, optional - Confidence level. + Confidence level. Must be in (0, 1). Returns ------- @@ -841,7 +895,10 @@ def predict_conformal_set( raise NotImplementedError( "predict_conformal_set is only for classification.", ) + conf = confidence if confidence is not None else self.confidence_level + if not 0 < conf < 1: + raise ValueError(f"confidence must be in (0, 1), got {conf}") p_values_list = [m.predict_p(x) for m in self.models_] aggregated_p_values = np.mean(p_values_list, axis=0) @@ -850,11 +907,7 @@ def predict_conformal_set( prediction_sets = [] for i in range(prediction_sets_binary.shape[0]): - class_indices = [ - j - for j in range(prediction_sets_binary.shape[1]) - if prediction_sets_binary[i, j] == 1 - ] + class_indices = np.where(prediction_sets_binary[i, :])[0].tolist() prediction_sets.append(class_indices) return prediction_sets @@ -902,7 +955,7 @@ def predict_int( x : npt.NDArray[Any] Features to predict. confidence : float, optional - Confidence level. + Confidence level. Must be in (0, 1). Returns ------- @@ -912,7 +965,8 @@ def predict_int( Raises ------ ValueError - If estimator must be fitted before calling predict_int. + If estimator must be fitted before calling predict_int + or if confidence is not in valid range. NotImplementedError If called for a classifier. @@ -923,12 +977,14 @@ def predict_int( raise NotImplementedError("predict_int is only for regression.") conf = confidence if confidence is not None else self.confidence_level + if not 0 < conf < 1: + raise ValueError(f"confidence must be in (0, 1), got {conf}") intervals_list = [m.predict_int(x, confidence=conf) for m in self.models_] intervals_array = np.array(intervals_list) # shape: (n_folds, n_samples, 2) - lower_bounds = np.nanmean(intervals_array[:, :, 0], axis=0) - upper_bounds = np.nanmean(intervals_array[:, :, 1], axis=0) + lower_bounds = np.mean(intervals_array[:, :, 0], axis=0) + upper_bounds = np.mean(intervals_array[:, :, 1], axis=0) return np.column_stack([lower_bounds, upper_bounds]) @@ -938,8 +994,7 @@ def get_params(self, deep: bool = True) -> dict[str, Any]: Parameters ---------- deep : bool, optional - If True, will return the parameters for this estimator and - contained subobjects that are estimators. + If True, will return the parameters for this estimator. Returns ------- @@ -947,22 +1002,14 @@ def get_params(self, deep: bool = True) -> dict[str, Any]: Parameter names mapped to their values. """ - params = { - "estimator": self.estimator, + params = super().get_params(deep=deep) + + cross_params = { "n_folds": self.n_folds, - "confidence_level": self.confidence_level, - "mondrian": self.mondrian, - "nonconformity": self.nonconformity, - "binning": self.binning, - "estimator_type": self.estimator_type, "n_bins": self.n_bins, "random_state": self.random_state, } - params.update(self.kwargs) - - if deep and hasattr(self.estimator, "get_params"): - estimator_params = self.estimator.get_params(deep=True) - params.update({f"estimator__{k}": v for k, v in estimator_params.items()}) + params.update(cross_params) return params @@ -982,17 +1029,22 @@ def set_params(self, **params: Any) -> "CrossConformalPredictor": Raises ------ ValueError + If invalid parameter provided. """ valid_params = self.get_params(deep=False) estimator_params: dict[str, Any] = {} for key, value in params.items(): - if key in valid_params: + if key.startswith("estimator__"): + nested_key = key[len("estimator__") :] + estimator_params[nested_key] = value + elif key in valid_params: setattr(self, key, value) else: raise ValueError( - f"Invalid parameter {key} for estimator {type(self).__name__}", + f"Invalid parameter {key} for estimator {type(self).__name__}. " + f"Valid parameters: {list(valid_params.keys())}", ) if estimator_params and hasattr(self.estimator, "set_params"): diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index d81742ef..5febcb11 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -11,7 +11,6 @@ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.model_selection import train_test_split -from molpipeline import Pipeline from molpipeline.any2mol import SmilesToMol from molpipeline.experimental.uncertainty.conformal import ( ConformalPredictor, @@ -48,46 +47,50 @@ def setUpClass(cls) -> None: # Set up pipeline stages separately to handle invalid molecules smi2mol = SmilesToMol(n_jobs=1) morgan = MolToMorganFP(radius=FP_RADIUS, n_bits=FP_SIZE, n_jobs=1) - + # Process classification data bbbp_clean = bbbp_df.dropna(subset=["smiles", "p_np"]) smiles_list = bbbp_clean["smiles"].tolist() labels_list = bbbp_clean["p_np"].tolist() - + # Convert SMILES to molecules first, filter out invalid ones molecules = smi2mol.fit_transform(smiles_list) valid_clf_data = [] - + for mol, label in zip(molecules, labels_list, strict=False): # Skip InvalidInstance objects if mol is None or hasattr(mol, "_fields"): # InvalidInstance is a NamedTuple continue # Generate fingerprint for valid molecule - fp = morgan.transform([mol])[0] - if fp is not None and hasattr(fp, "toarray"): - valid_clf_data.append((fp.toarray().flatten(), label)) + try: + fp = morgan.transform([mol])[0] # type: ignore[list-item] + if fp is not None and hasattr(fp, "toarray"): + valid_clf_data.append((fp.toarray().flatten(), label)) + except (AttributeError, TypeError): + # Skip molecules that can't be processed + continue if not valid_clf_data: raise ValueError("No valid classification data found") - + cls.x_clf, cls.y_clf = map(np.array, zip(*valid_clf_data, strict=False)) # Process regression data logd_clean = logd_df.dropna(subset=["smiles", "exp"]) smiles_list_reg = logd_clean["smiles"].tolist() labels_list_reg = logd_clean["exp"].tolist() - + # Convert SMILES to molecules first, filter out invalid ones molecules_reg = smi2mol.transform(smiles_list_reg) valid_reg_data = [] - + for mol, label in zip(molecules_reg, labels_list_reg, strict=False): # Skip InvalidInstance objects if mol is None or hasattr(mol, "_fields"): # InvalidInstance is a NamedTuple continue # Generate fingerprint for valid molecule - ensure mol is valid try: - fp = morgan.transform([mol])[0] + fp = morgan.transform([mol])[0] # type: ignore[list-item] if fp is not None and hasattr(fp, "toarray"): valid_reg_data.append((fp.toarray().flatten(), label)) except (AttributeError, TypeError): @@ -96,7 +99,7 @@ def setUpClass(cls) -> None: if not valid_reg_data: raise ValueError("No valid regression data found") - + cls.x_reg, cls.y_reg = map(np.array, zip(*valid_reg_data, strict=False)) def test_conformal_prediction_classifier(self) -> None: @@ -285,7 +288,7 @@ def test_nonconformity_functions(self) -> None: # Test with hinge nonconformity cp_hinge = ConformalPredictor(clf, estimator_type="classifier", - nonconformity=hinge) + nonconformity=hinge) cp_hinge.fit(x_train, y_train) cp_hinge.calibrate(x_calib, y_calib) sets_hinge = cp_hinge.predict_conformal_set(x_calib) @@ -293,7 +296,7 @@ def test_nonconformity_functions(self) -> None: # Test with margin nonconformity cp_margin = ConformalPredictor(clf, estimator_type="classifier", - nonconformity=margin) + nonconformity=margin) cp_margin.fit(x_train, y_train) cp_margin.calibrate(x_calib, y_calib) sets_margin = cp_margin.predict_conformal_set(x_calib) @@ -324,7 +327,7 @@ def test_mondrian_conformal_classification(self) -> None: no_bins=2) cp_mondrian_custom = ConformalPredictor(clf, estimator_type="classifier", - mondrian=mc) + mondrian=mc) cp_mondrian_custom.fit(x_train, y_train) cp_mondrian_custom.calibrate(x_calib, y_calib) sets_custom = cp_mondrian_custom.predict_conformal_set(x_calib) @@ -332,7 +335,7 @@ def test_mondrian_conformal_classification(self) -> None: # Test without Mondrian (baseline) cp_baseline = ConformalPredictor(clf, estimator_type="classifier", - mondrian=False) + mondrian=False) cp_baseline.fit(x_train, y_train) cp_baseline.calibrate(x_calib, y_calib) sets_baseline = cp_baseline.predict_conformal_set(x_calib) @@ -347,7 +350,7 @@ def test_mondrian_conformal_classification(self) -> None: for class_idx in pred_set: self.assertIsInstance(class_idx, (int, np.integer)) self.assertGreaterEqual(class_idx, 0) - + self.assertTrue(np.all(p_values_custom >= 0)) self.assertTrue(np.all(p_values_custom <= 1)) @@ -367,14 +370,14 @@ def test_mondrian_conformal_regression(self) -> None: no_bins=2) cp_mondrian = ConformalPredictor(reg, estimator_type="regressor", - mondrian=mc) + mondrian=mc) cp_mondrian.fit(x_train, y_train) cp_mondrian.calibrate(x_calib, y_calib) intervals_mondrian = cp_mondrian.predict_int(x_calib) # Test without Mondrian (baseline) cp_baseline = ConformalPredictor(reg, estimator_type="regressor", - mondrian=False) + mondrian=False) cp_baseline.fit(x_train, y_train) cp_baseline.calibrate(x_calib, y_calib) intervals_baseline = cp_baseline.predict_int(x_calib) @@ -398,15 +401,15 @@ def test_cross_conformal_mondrian_both_classes(self) -> None: no_bins=2) ccp_clf = CrossConformalPredictor(clf, estimator_type="classifier", - n_folds=3, mondrian=mc_clf, random_state=42) + n_folds=3, mondrian=mc_clf, random_state=42) ccp_clf.fit(self.x_clf, self.y_clf) sets_mondrian = ccp_clf.predict_conformal_set(self.x_clf[:10]) p_values_mondrian = ccp_clf.predict_p(self.x_clf[:10]) # Test without Mondrian for comparison ccp_clf_baseline = CrossConformalPredictor(clf, estimator_type="classifier", - n_folds=3, mondrian=False, - random_state=42) + n_folds=3, mondrian=False, + random_state=42) ccp_clf_baseline.fit(self.x_clf, self.y_clf) sets_baseline = ccp_clf_baseline.predict_conformal_set(self.x_clf[:10]) @@ -417,14 +420,14 @@ def test_cross_conformal_mondrian_both_classes(self) -> None: # Test regression with binning (Mondrian-style for regression) reg = RandomForestRegressor(random_state=42, n_estimators=5) ccp_reg = CrossConformalPredictor(reg, estimator_type="regressor", - n_folds=3, binning=3, random_state=42) + n_folds=3, binning=3, random_state=42) ccp_reg.fit(self.x_reg, self.y_reg) intervals_binned = ccp_reg.predict_int(self.x_reg[:10]) # Test without binning for comparison ccp_reg_baseline = CrossConformalPredictor(reg, estimator_type="regressor", - n_folds=3, binning=None, - random_state=42) + n_folds=3, binning=None, + random_state=42) ccp_reg_baseline.fit(self.x_reg, self.y_reg) intervals_baseline_reg = ccp_reg_baseline.predict_int(self.x_reg[:10]) From 832e1d6ff6e03383fcc07e3ab5c70fff062c3e1b Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Tue, 15 Jul 2025 01:31:25 +0200 Subject: [PATCH 15/20] moved 2 functions to utils --- .../experimental/uncertainty/conformal.py | 96 ++----------------- 1 file changed, 9 insertions(+), 87 deletions(-) diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py index ef02299f..5df0d0e9 100644 --- a/molpipeline/experimental/uncertainty/conformal.py +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -8,50 +8,14 @@ from crepes import WrapClassifier, WrapRegressor from crepes.extras import DifficultyEstimator, MondrianCategorizer from scipy.stats import mode -from sklearn.base import BaseEstimator, clone, is_classifier, is_regressor +from sklearn.base import BaseEstimator, clone from sklearn.model_selection import KFold, StratifiedKFold from sklearn.utils import check_random_state - -def _bin_targets(y: npt.NDArray[Any], n_bins: int = 10) -> npt.NDArray[np.int_]: - """Bin continuous targets for stratified splitting in regression. - - Returns - ------- - Binned targets. - - """ - y = np.asarray(y) - bins = np.linspace(np.min(y), np.max(y), n_bins + 1) - y_binned = np.digitize(y, bins) - 1 - y_binned[y_binned == n_bins] = n_bins - 1 - return y_binned - - -def _detect_estimator_type( - estimator: BaseEstimator, -) -> Literal["classifier", "regressor"]: - """Automatically detect whether an estimator is a classifier or regressor. - - Returns - ------- - Literal["classifier", "regressor"] - The detected estimator type. - - Raises - ------ - ValueError - If type cannot be determined. - - """ - if is_classifier(estimator): - return "classifier" - if is_regressor(estimator): - return "regressor" - raise ValueError( - f"Could not determine if {type(estimator).__name__} is a " - "classifier or regressor. Please specify estimator_type explicitly.", - ) +from molpipeline.experimental.uncertainty.utils import ( + _bin_targets, + _detect_estimator_type, +) class ConformalPredictor(BaseEstimator): # pylint: disable=too-many-instance-attributes @@ -447,8 +411,7 @@ def get_params(self, deep: bool = True) -> dict[str, Any]: Parameters ---------- deep : bool, optional - If True, will return the parameters for this estimator and - contained subobjects that are estimators. + If True, will return the parameters for this estimator. Returns ------- @@ -498,15 +461,13 @@ def set_params(self, **params: Any) -> "ConformalPredictor": for key, value in params.items(): if key.startswith("estimator__"): - # Handle nested estimator parameters nested_key = key[len("estimator__") :] estimator_params[nested_key] = value elif key in valid_params: setattr(self, key, value) else: raise ValueError( - f"Invalid parameter {key} for estimator {type(self).__name__}. " - f"Valid parameters: {list(valid_params.keys())}", + f"Invalid parameter {key} for estimator {type(self).__name__}. ", ) if estimator_params and hasattr(self.estimator, "set_params"): @@ -516,10 +477,7 @@ def set_params(self, **params: Any) -> "ConformalPredictor": class CrossConformalPredictor(ConformalPredictor): # pylint: disable=too-many-instance-attributes - """Cross-conformal prediction using WrapClassifier/WrapRegressor. - - Inherits from ConformalPredictor and extends it with cross-validation functionality. - """ + """Cross-conformal prediction using WrapClassifier/WrapRegressor.""" def __init__( self, @@ -569,20 +527,7 @@ def __init__( **kwargs : Any Additional keyword arguments for crepes. - Raises - ------ - ValueError - If parameter validation fails. - """ - # Additional validation for cross-conformal specific parameters - if n_folds <= 1: - raise ValueError(f"n_folds must be > 1, got {n_folds}") - - if n_bins <= 0: - raise ValueError(f"n_bins must be positive, got {n_bins}") - - # Initialize parent class super().__init__( estimator=estimator, mondrian=mondrian, @@ -595,7 +540,6 @@ def __init__( **kwargs, ) - # Cross-conformal specific attributes self.n_folds = n_folds self.n_bins = n_bins self.random_state = random_state @@ -782,27 +726,6 @@ def fit( self.calibrated_ = True # Models are calibrated during fit return self - def calibrate( - self, - x_calib: npt.NDArray[Any], - y_calib: npt.NDArray[Any], - **calib_params: Any, - ) -> None: - """Calibrate method for cross-conformal predictor. - - Note: For CrossConformalPredictor, calibration happens automatically - during the fit() method. - - Raises - ------ - NotImplementedError - Cross-conformal calibration happens during fit(). - - """ - raise NotImplementedError( - "CrossConformalPredictor performs calibration automatically during fit(). ", - ) - def predict(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: """Predict using the cross-conformal predictor. @@ -1043,8 +966,7 @@ def set_params(self, **params: Any) -> "CrossConformalPredictor": setattr(self, key, value) else: raise ValueError( - f"Invalid parameter {key} for estimator {type(self).__name__}. " - f"Valid parameters: {list(valid_params.keys())}", + f"Invalid parameter {key} for estimator {type(self).__name__}. ", ) if estimator_params and hasattr(self.estimator, "set_params"): From d2ac8fbf185ae89b52a559a7bdc93a54ed70ef25 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Tue, 15 Jul 2025 01:41:10 +0200 Subject: [PATCH 16/20] recommit moved 2 functions to utils --- .gitignore | 4 ++ molpipeline/experimental/uncertainty/utils.py | 48 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 molpipeline/experimental/uncertainty/utils.py diff --git a/.gitignore b/.gitignore index ab5c6116..0ed059fb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,7 @@ lib/ build/ lightning_logs/ +.mypy_cache/ +__pycache__/ +*.pyc +*instructions.md diff --git a/molpipeline/experimental/uncertainty/utils.py b/molpipeline/experimental/uncertainty/utils.py new file mode 100644 index 00000000..e41a86be --- /dev/null +++ b/molpipeline/experimental/uncertainty/utils.py @@ -0,0 +1,48 @@ +"""Conformal prediction utils""" + +from typing import Any, Literal + +import numpy as np +import numpy.typing as npt +from sklearn.base import BaseEstimator, is_classifier, is_regressor + + +def _bin_targets(y: npt.NDArray[Any], n_bins: int = 10) -> npt.NDArray[np.int_]: + """Bin continuous targets for stratified splitting in regression. + + Returns + ------- + Binned targets. + + """ + y = np.asarray(y) + bins = np.linspace(np.min(y), np.max(y), n_bins + 1) + y_binned = np.digitize(y, bins) - 1 + y_binned[y_binned == n_bins] = n_bins - 1 + return y_binned + + +def _detect_estimator_type( + estimator: BaseEstimator, +) -> Literal["classifier", "regressor"]: + """Automatically detect whether an estimator is a classifier or regressor. + + Returns + ------- + Literal["classifier", "regressor"] + The detected estimator type. + + Raises + ------ + ValueError + If type cannot be determined. + + """ + if is_classifier(estimator): + return "classifier" + if is_regressor(estimator): + return "regressor" + raise ValueError( + f"Could not determine if {type(estimator).__name__} is a " + "classifier or regressor. Please specify estimator_type explicitly.", + ) From 836ba4f5b731f46e9c9f6ea1949ac1fead3838c9 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Tue, 15 Jul 2025 02:05:27 +0200 Subject: [PATCH 17/20] linters, formatters,docsig --- .../experimental/uncertainty/conformal.py | 21 +++ molpipeline/experimental/uncertainty/utils.py | 17 +- .../advanced_04_conformal_prediction.ipynb | 122 +++++++++------ .../test_uncertainty/test_conformal.py | 147 +++++++++++++----- tests/test_pipeline.py | 42 ++--- 5 files changed, 241 insertions(+), 108 deletions(-) diff --git a/molpipeline/experimental/uncertainty/conformal.py b/molpipeline/experimental/uncertainty/conformal.py index 5df0d0e9..ee7a0186 100644 --- a/molpipeline/experimental/uncertainty/conformal.py +++ b/molpipeline/experimental/uncertainty/conformal.py @@ -619,6 +619,27 @@ def bin_func( y_max: Any = y_max, n_bins: Any = n_bins, ) -> Any: + """Binning function for Mondrian categorization. + + Parameters + ---------- + x_test : Any + Test features. + model : Any, optional + Fitted model. + y_min : Any, optional + Minimum target value. + y_max : Any, optional + Maximum target value. + n_bins : Any, optional + Number of bins. + + Returns + ------- + Any + Binned predictions. + + """ y_pred = model.predict(x_test) bins = np.linspace(y_min, y_max, n_bins + 1) binned = np.digitize(y_pred, bins) - 1 diff --git a/molpipeline/experimental/uncertainty/utils.py b/molpipeline/experimental/uncertainty/utils.py index e41a86be..3d9df3b4 100644 --- a/molpipeline/experimental/uncertainty/utils.py +++ b/molpipeline/experimental/uncertainty/utils.py @@ -1,4 +1,4 @@ -"""Conformal prediction utils""" +"""Conformal prediction utils.""" from typing import Any, Literal @@ -10,9 +10,17 @@ def _bin_targets(y: npt.NDArray[Any], n_bins: int = 10) -> npt.NDArray[np.int_]: """Bin continuous targets for stratified splitting in regression. + Parameters + ---------- + y : npt.NDArray[Any] + Continuous target values to bin. + n_bins : int, default=10 + Number of bins to create. + Returns ------- - Binned targets. + npt.NDArray[np.int_] + Binned targets as integer indices. """ y = np.asarray(y) @@ -27,6 +35,11 @@ def _detect_estimator_type( ) -> Literal["classifier", "regressor"]: """Automatically detect whether an estimator is a classifier or regressor. + Parameters + ---------- + estimator : BaseEstimator + The sklearn estimator to check. + Returns ------- Literal["classifier", "regressor"] diff --git a/notebooks/advanced_04_conformal_prediction.ipynb b/notebooks/advanced_04_conformal_prediction.ipynb index ba2203a4..828a8bba 100644 --- a/notebooks/advanced_04_conformal_prediction.ipynb +++ b/notebooks/advanced_04_conformal_prediction.ipynb @@ -126,7 +126,7 @@ " \"\"\"\n", " eps = 1e-12\n", " entropy = -probs * np.log(probs + eps) - (1 - probs) * np.log(1 - probs + eps)\n", - " return np.mean(entropy)\n" + " return np.mean(entropy)" ] }, { @@ -161,12 +161,15 @@ "# Featurization pipeline (NaN-safe)\n", "error_filter = ErrorFilter(filter_everything=True)\n", "error_replacer = FilterReinserter.from_error_filter(error_filter, fill_value=np.nan)\n", - "featurizer = Pipeline([\n", - " (\"smi2mol\", SmilesToMol()),\n", - " (\"error_filter\", error_filter),\n", - " (\"morgan\", MolToMorganFP(radius=2, n_bits=256, return_as=\"dense\")),\n", - " (\"error_replacer\", PostPredictionWrapper(error_replacer)),\n", - "], n_jobs=1)\n", + "featurizer = Pipeline(\n", + " [\n", + " (\"smi2mol\", SmilesToMol()),\n", + " (\"error_filter\", error_filter),\n", + " (\"morgan\", MolToMorganFP(radius=2, n_bits=256, return_as=\"dense\")),\n", + " (\"error_replacer\", PostPredictionWrapper(error_replacer)),\n", + " ],\n", + " n_jobs=1,\n", + ")\n", "X_feat = featurizer.transform(smiles)\n", "\n", "print(f\"Shape of X={X_feat.shape}, y_class={y_class.shape}, y_reg={y_reg.shape}\")\n", @@ -174,7 +177,10 @@ "# Generate indices for a single split\n", "indices = np.arange(len(y_class))\n", "train_idx, test_idx = train_test_split(\n", - " indices, test_size=0.3, random_state=42, stratify=y_class,\n", + " indices,\n", + " test_size=0.3,\n", + " random_state=42,\n", + " stratify=y_class,\n", ")\n", "\n", "# Use these indices for all splits\n", @@ -207,8 +213,16 @@ " \"ensemble_rf\": RandomForestClassifier(n_estimators=100, random_state=42),\n", "}\n", "metrics_list = [\n", - " \"NLL\", \"ECE\", \"Brier\", \"Uncertainty Error Correlation\", \"Sharpness\",\n", - " \"Balanced Accuracy\", \"AUROC\", \"AUPRC\", \"F1 Score\", \"MCC\",\n", + " \"NLL\",\n", + " \"ECE\",\n", + " \"Brier\",\n", + " \"Uncertainty Error Correlation\",\n", + " \"Sharpness\",\n", + " \"Balanced Accuracy\",\n", + " \"AUROC\",\n", + " \"AUPRC\",\n", + " \"F1 Score\",\n", + " \"MCC\",\n", "]\n", "results = []\n", "results_cp = []\n", @@ -232,10 +246,13 @@ "\n", " # --- Conformal Prediction (CrossConformalCV) ---\n", " rf = RandomForestClassifier(n_estimators=100, random_state=42)\n", - " rf_pipeline = Pipeline([\n", - " (\"featurizer\", featurizer),\n", - " (\"rf\", rf),\n", - " ], n_jobs=1)\n", + " rf_pipeline = Pipeline(\n", + " [\n", + " (\"featurizer\", featurizer),\n", + " (\"rf\", rf),\n", + " ],\n", + " n_jobs=1,\n", + " )\n", " cc_clf = CrossConformalCV(\n", " estimator=rf_pipeline,\n", " n_folds=5,\n", @@ -244,11 +261,12 @@ " )\n", " cc_clf.fit(smiles_tr, y_tr)\n", " # Average ensemble probabilities for the validation fold\n", - " probs_cp_ensemble = np.mean([m.predict_p(smiles_val) for m in cc_clf.models_],\n", - " axis=0)\n", - " probs_cp_ensemble_raw = np.mean([m.predict_proba(smiles_val) for m\n", - " in cc_clf.models_],\n", - " axis=0)\n", + " probs_cp_ensemble = np.mean(\n", + " [m.predict_p(smiles_val) for m in cc_clf.models_], axis=0\n", + " )\n", + " probs_cp_ensemble_raw = np.mean(\n", + " [m.predict_proba(smiles_val) for m in cc_clf.models_], axis=0\n", + " )\n", " p0 = probs_cp_ensemble[:, 0]\n", " p1 = probs_cp_ensemble[:, 1]\n", " p1_norm = p1 / (p0 + p1 + 1e-12)\n", @@ -256,12 +274,14 @@ " oof_preds_cp_raw[val_idx] = probs_cp_ensemble_raw[:, 1]\n", "\n", "# Create a DataFrame to compare raw and normalized conformal probabilities\n", - "df_oof_compare = pd.DataFrame({\n", - " \"y_true\": y_train,\n", - " \"StandardModel\": oof_preds,\n", - " \"ConformalRaw\": oof_preds_cp_raw,\n", - " \"ConformalNorm\": oof_preds_cp_norm,\n", - "})\n", + "df_oof_compare = pd.DataFrame(\n", + " {\n", + " \"y_true\": y_train,\n", + " \"StandardModel\": oof_preds,\n", + " \"ConformalRaw\": oof_preds_cp_raw,\n", + " \"ConformalNorm\": oof_preds_cp_norm,\n", + " }\n", + ")\n", "\n", "# Compute metrics for out-of-fold predictions (standard model)\n", "mean_pred = (oof_preds >= THRESHOLD).astype(int)\n", @@ -353,8 +373,9 @@ "bins = np.linspace(0, 1, 21)\n", "\n", "\n", - "def plot_percentage_line(probs: np.ndarray, bins: np.ndarray, label: str,\n", - " color: str) -> None:\n", + "def plot_percentage_line(\n", + " probs: np.ndarray, bins: np.ndarray, label: str, color: str\n", + ") -> None:\n", " \"\"\"Plot percentage of predictions in each probability bin.\"\"\"\n", " counts, bin_edges = np.histogram(probs, bins=bins)\n", " percent = 100 * counts / len(probs)\n", @@ -619,14 +640,16 @@ "p1 = p_vals[:, 1]\n", "p1_norm = p1 / (p0 + p1 + 1e-12)\n", "\n", - "df_cp_class = pd.DataFrame({\n", - " \"SMILES\": smiles_test,\n", - " \"p0\": p0,\n", - " \"p1\": p1,\n", - " \"p1_norm\": p1_norm,\n", - " \"conformal_set\": conf_pred_sets,\n", - " \"true_label\": y_test,\n", - "})\n", + "df_cp_class = pd.DataFrame(\n", + " {\n", + " \"SMILES\": smiles_test,\n", + " \"p0\": p0,\n", + " \"p1\": p1,\n", + " \"p1_norm\": p1_norm,\n", + " \"conformal_set\": conf_pred_sets,\n", + " \"true_label\": y_test,\n", + " }\n", + ")\n", "display(df_cp_class.head())\n", "\n", "\n", @@ -656,7 +679,7 @@ "print(\"Brier:\", brier_score_loss(y_test, p1_norm))\n", "print(\"AUROC:\", roc_auc_score(y_test, p1_norm))\n", "print(\"F1:\", f1_score(y_test, (p1_norm >= THRESHOLD).astype(int)))\n", - "print(\"MCC:\", matthews_corrcoef(y_test, (p1_norm >= THRESHOLD).astype(int)))\n" + "print(\"MCC:\", matthews_corrcoef(y_test, (p1_norm >= THRESHOLD).astype(int)))" ] }, { @@ -874,9 +897,12 @@ "\n", "# --- Wrap regressor with CrossConformalCV ---\n", "rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)\n", - "rf_reg_pipeline = Pipeline([\n", - " (\"rf\", rf_reg),\n", - "], n_jobs=1)\n", + "rf_reg_pipeline = Pipeline(\n", + " [\n", + " (\"rf\", rf_reg),\n", + " ],\n", + " n_jobs=1,\n", + ")\n", "\n", "cc_reg = CrossConformalCV(\n", " estimator=rf_reg_pipeline,\n", @@ -893,13 +919,15 @@ "upper = intervals_mean[:, 1]\n", "point_pred = np.mean([m.predict(X_test_reg) for m in cc_reg.models_], axis=0)\n", "\n", - "df_cp_reg = pd.DataFrame({\n", - " \"pubchem_smiles\": smiles_test_reg,\n", - " \"pIC50\": y_test_reg,\n", - " \"pred_lower\": lower,\n", - " \"pred_upper\": upper,\n", - " \"point_pred\": point_pred,\n", - "})\n", + "df_cp_reg = pd.DataFrame(\n", + " {\n", + " \"pubchem_smiles\": smiles_test_reg,\n", + " \"pIC50\": y_test_reg,\n", + " \"pred_lower\": lower,\n", + " \"pred_upper\": upper,\n", + " \"point_pred\": point_pred,\n", + " }\n", + ")\n", "display(df_cp_reg.head())\n", "\n", "# --- Regression: Evaluate coverage and interval width ---\n", @@ -909,7 +937,7 @@ "\n", "print(f\"Interval coverage: {coverage_reg:.3f}\")\n", "print(f\"Average interval width: {avg_width:.3f}\")\n", - "print(f\"MAE (point prediction): {mae:.3f}\")\n" + "print(f\"MAE (point prediction): {mae:.3f}\")" ] } ], diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index 5febcb11..117c1906 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -37,12 +37,24 @@ class TestConformalCV(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - """Set up test data once for all tests.""" + """Set up test data once for all tests. + + Raises + ------ + ValueError: If no valid data is found after processing. + + """ # Load data - bbbp_df = pd.read_csv(TEST_DATA_DIR / "molecule_net_bbbp.tsv.gz", - sep="\t", compression="gzip") - logd_df = pd.read_csv(TEST_DATA_DIR / "molecule_net_logd.tsv.gz", - sep="\t", compression="gzip") + bbbp_df = pd.read_csv( + TEST_DATA_DIR / "molecule_net_bbbp.tsv.gz", + sep="\t", + compression="gzip", + ) + logd_df = pd.read_csv( + TEST_DATA_DIR / "molecule_net_logd.tsv.gz", + sep="\t", + compression="gzip", + ) # Set up pipeline stages separately to handle invalid molecules smi2mol = SmilesToMol(n_jobs=1) @@ -59,7 +71,10 @@ def setUpClass(cls) -> None: for mol, label in zip(molecules, labels_list, strict=False): # Skip InvalidInstance objects - if mol is None or hasattr(mol, "_fields"): # InvalidInstance is a NamedTuple + if mol is None or hasattr( + mol, + "_fields", + ): # InvalidInstance is a NamedTuple continue # Generate fingerprint for valid molecule try: @@ -86,7 +101,10 @@ def setUpClass(cls) -> None: for mol, label in zip(molecules_reg, labels_list_reg, strict=False): # Skip InvalidInstance objects - if mol is None or hasattr(mol, "_fields"): # InvalidInstance is a NamedTuple + if mol is None or hasattr( + mol, + "_fields", + ): # InvalidInstance is a NamedTuple continue # Generate fingerprint for valid molecule - ensure mol is valid try: @@ -281,22 +299,31 @@ def test_auto_detection(self) -> None: def test_nonconformity_functions(self) -> None: """Test nonconformity functions for classification.""" x_train, x_calib, y_train, y_calib = train_test_split( - self.x_clf, self.y_clf, test_size=0.2, random_state=42, + self.x_clf, + self.y_clf, + test_size=0.2, + random_state=42, ) clf = RandomForestClassifier(random_state=42, n_estimators=5) # Test with hinge nonconformity - cp_hinge = ConformalPredictor(clf, estimator_type="classifier", - nonconformity=hinge) + cp_hinge = ConformalPredictor( + clf, + estimator_type="classifier", + nonconformity=hinge, + ) cp_hinge.fit(x_train, y_train) cp_hinge.calibrate(x_calib, y_calib) sets_hinge = cp_hinge.predict_conformal_set(x_calib) p_values_hinge = cp_hinge.predict_p(x_calib) # Test with margin nonconformity - cp_margin = ConformalPredictor(clf, estimator_type="classifier", - nonconformity=margin) + cp_margin = ConformalPredictor( + clf, + estimator_type="classifier", + nonconformity=margin, + ) cp_margin.fit(x_train, y_train) cp_margin.calibrate(x_calib, y_calib) sets_margin = cp_margin.predict_conformal_set(x_calib) @@ -314,7 +341,10 @@ def test_nonconformity_functions(self) -> None: def test_mondrian_conformal_classification(self) -> None: """Test Mondrian conformal prediction for classification.""" x_train, x_calib, y_train, y_calib = train_test_split( - self.x_clf, self.y_clf, test_size=0.2, random_state=42, + self.x_clf, + self.y_clf, + test_size=0.2, + random_state=42, ) clf = RandomForestClassifier(random_state=42, n_estimators=5) @@ -322,20 +352,28 @@ def test_mondrian_conformal_classification(self) -> None: # Test with custom MondrianCategorizer (skip mondrian=True for now) mc = MondrianCategorizer() # Simple categorizer based on first feature - mc.fit(x_calib, - f=lambda x: (x[:, 0] > np.median(x[:, 0])).astype(int), - no_bins=2) + mc.fit( + x_calib, + f=lambda x: (x[:, 0] > np.median(x[:, 0])).astype(int), + no_bins=2, + ) - cp_mondrian_custom = ConformalPredictor(clf, estimator_type="classifier", - mondrian=mc) + cp_mondrian_custom = ConformalPredictor( + clf, + estimator_type="classifier", + mondrian=mc, + ) cp_mondrian_custom.fit(x_train, y_train) cp_mondrian_custom.calibrate(x_calib, y_calib) sets_custom = cp_mondrian_custom.predict_conformal_set(x_calib) p_values_custom = cp_mondrian_custom.predict_p(x_calib) # Test without Mondrian (baseline) - cp_baseline = ConformalPredictor(clf, estimator_type="classifier", - mondrian=False) + cp_baseline = ConformalPredictor( + clf, + estimator_type="classifier", + mondrian=False, + ) cp_baseline.fit(x_train, y_train) cp_baseline.calibrate(x_calib, y_calib) sets_baseline = cp_baseline.predict_conformal_set(x_calib) @@ -357,7 +395,10 @@ def test_mondrian_conformal_classification(self) -> None: def test_mondrian_conformal_regression(self) -> None: """Test Mondrian conformal prediction for regression.""" x_train, x_calib, y_train, y_calib = train_test_split( - self.x_reg, self.y_reg, test_size=0.2, random_state=42, + self.x_reg, + self.y_reg, + test_size=0.2, + random_state=42, ) reg = RandomForestRegressor(random_state=42, n_estimators=5) @@ -365,19 +406,23 @@ def test_mondrian_conformal_regression(self) -> None: # Test with custom MondrianCategorizer for regression mc = MondrianCategorizer() # Categorize based on median of first feature - mc.fit(x_calib, - f=lambda x: (x[:, 0] > np.median(x[:, 0])).astype(int), - no_bins=2) + mc.fit( + x_calib, + f=lambda x: (x[:, 0] > np.median(x[:, 0])).astype(int), + no_bins=2, + ) - cp_mondrian = ConformalPredictor(reg, estimator_type="regressor", - mondrian=mc) + cp_mondrian = ConformalPredictor(reg, estimator_type="regressor", mondrian=mc) cp_mondrian.fit(x_train, y_train) cp_mondrian.calibrate(x_calib, y_calib) intervals_mondrian = cp_mondrian.predict_int(x_calib) # Test without Mondrian (baseline) - cp_baseline = ConformalPredictor(reg, estimator_type="regressor", - mondrian=False) + cp_baseline = ConformalPredictor( + reg, + estimator_type="regressor", + mondrian=False, + ) cp_baseline.fit(x_train, y_train) cp_baseline.calibrate(x_calib, y_calib) intervals_baseline = cp_baseline.predict_int(x_calib) @@ -396,20 +441,31 @@ def test_cross_conformal_mondrian_both_classes(self) -> None: # Create a simple Mondrian categorizer for classification mc_clf = MondrianCategorizer() - mc_clf.fit(self.x_clf, - f=lambda x: (x[:, 0] > np.median(x[:, 0])).astype(int), - no_bins=2) + mc_clf.fit( + self.x_clf, + f=lambda x: (x[:, 0] > np.median(x[:, 0])).astype(int), + no_bins=2, + ) - ccp_clf = CrossConformalPredictor(clf, estimator_type="classifier", - n_folds=3, mondrian=mc_clf, random_state=42) + ccp_clf = CrossConformalPredictor( + clf, + estimator_type="classifier", + n_folds=3, + mondrian=mc_clf, + random_state=42, + ) ccp_clf.fit(self.x_clf, self.y_clf) sets_mondrian = ccp_clf.predict_conformal_set(self.x_clf[:10]) p_values_mondrian = ccp_clf.predict_p(self.x_clf[:10]) # Test without Mondrian for comparison - ccp_clf_baseline = CrossConformalPredictor(clf, estimator_type="classifier", - n_folds=3, mondrian=False, - random_state=42) + ccp_clf_baseline = CrossConformalPredictor( + clf, + estimator_type="classifier", + n_folds=3, + mondrian=False, + random_state=42, + ) ccp_clf_baseline.fit(self.x_clf, self.y_clf) sets_baseline = ccp_clf_baseline.predict_conformal_set(self.x_clf[:10]) @@ -419,15 +475,24 @@ def test_cross_conformal_mondrian_both_classes(self) -> None: # Test regression with binning (Mondrian-style for regression) reg = RandomForestRegressor(random_state=42, n_estimators=5) - ccp_reg = CrossConformalPredictor(reg, estimator_type="regressor", - n_folds=3, binning=3, random_state=42) + ccp_reg = CrossConformalPredictor( + reg, + estimator_type="regressor", + n_folds=3, + binning=3, + random_state=42, + ) ccp_reg.fit(self.x_reg, self.y_reg) intervals_binned = ccp_reg.predict_int(self.x_reg[:10]) # Test without binning for comparison - ccp_reg_baseline = CrossConformalPredictor(reg, estimator_type="regressor", - n_folds=3, binning=None, - random_state=42) + ccp_reg_baseline = CrossConformalPredictor( + reg, + estimator_type="regressor", + n_folds=3, + binning=None, + random_state=42, + ) ccp_reg_baseline.fit(self.x_reg, self.y_reg) intervals_baseline_reg = ccp_reg_baseline.predict_int(self.x_reg[:10]) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 4d3625f6..a5741201 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -23,7 +23,7 @@ from molpipeline.any2mol import AutoToMol, SmilesToMol from molpipeline.experimental.uncertainty.conformal import ( ConformalPredictor, - CrossConformalPredictor + CrossConformalPredictor, ) from molpipeline.mol2any import MolToMorganFP, MolToRDKitPhysChem, MolToSmiles from molpipeline.mol2mol import ( @@ -127,16 +127,19 @@ def test_salt_removal(self) -> None: ("mol2smi", mol2smi), ], ) - generated_smiles = salt_remover_pipeline.transform(smiles_with_salt_list) - for generated_smiles, smiles_without_salt in zip( - generated_smiles, + generated_smiles_list = salt_remover_pipeline.transform(smiles_with_salt_list) + for generated_smi, smiles_without_salt in zip( + generated_smiles_list, smiles_without_salt_list, strict=False, ): - self.assertEqual(generated_smiles, smiles_without_salt) + self.assertEqual(generated_smi, smiles_without_salt) def test_json_generation(self) -> None: - """Test that the json representation of a pipeline can be loaded back into a pipeline.""" + """Test that the json representation of a pipeline can be loaded back. + + This test verifies that a pipeline can be loaded back into a pipeline. + """ # Create pipeline smi2mol = SmilesToMol() metal_disconnector = MetalDisconnector() @@ -201,11 +204,12 @@ def test_fit_transform_record_remove_nones(self) -> None: # Run pipeline matrix = pipeline.fit_transform(TEST_SMILES + FAULTY_TEST_SMILES) - # Compare with expected output (Which is the same as the output without the faulty smiles) + # Compare with expected output + # (Which is the same as the output without the faulty smiles) self.assertTrue(are_equal(EXPECTED_OUTPUT, matrix)) def test_caching(self) -> None: - """Test if the caching gives the same results and is faster on the second run.""" + """Test if the caching gives the same results & is faster on the second run.""" molecule_net_logd_df = pd.read_csv( TEST_DATA_DIR / "molecule_net_logd.tsv.gz", sep="\t", @@ -247,7 +251,8 @@ def test_caching(self) -> None: n_transformations = pipeline.named_steps["mol2concat"].n_transformations if cache_activated: - # Fit is called twice, but the transform is only called once, since the second run is cached + # Fit is called twice, but the transform is only called once, + # since the second run is cached self.assertEqual(n_transformations, 1) else: self.assertEqual(n_transformations, 2) @@ -285,7 +290,8 @@ def test_gridsearchcv(self) -> None: element = test_data_dict["element"] param_grid = test_data_dict["param_grid"] - # set up a pipeline that trains a random forest classifier on morgan fingerprints + # set up a pipeline that trains + # a random forest classifier on morgan fingerprints pipeline = Pipeline( [ ("auto2mol", AutoToMol()), @@ -319,7 +325,7 @@ def test_gridsearchcv(self) -> None: self.assertIn(grid_search_cv.best_params_[k], value) def test_gridsearch_cache(self) -> None: - """Run a short GridSearchCV and check if the caching and not caching gives the same results.""" + """Run GridSearchCV and check caching vs not caching gives same results.""" h_params = { "rf__n_estimators": [1, 2], } @@ -393,7 +399,7 @@ def test_calibrated_classifier(self) -> None: self.assertEqual(predicted_value_array.shape, (len(TEST_SMILES),)) self.assertEqual(predicted_proba_array.shape, (len(TEST_SMILES), 2)) - def test_conformal_pipeline_classifier(self) -> None: + def test_conformal_pipeline_classifier(self) -> None: # noqa: PLR0914 """Test conformal prediction with a pipeline on SMILES data. This test does not take any parameters and does not return a value. @@ -415,7 +421,7 @@ def test_conformal_pipeline_classifier(self) -> None: ) # Split data - X_train, X_calib, y_train, y_calib = train_test_split( + x_train, x_calib, y_train, y_calib = train_test_split( smiles, y, test_size=0.3, @@ -424,11 +430,11 @@ def test_conformal_pipeline_classifier(self) -> None: # ConformalPredictor cp = ConformalPredictor(pipeline, estimator_type="classifier") - cp.fit(X_train, y_train) - cp.calibrate(X_calib, y_calib) - preds = cp.predict(X_calib) - probs = cp.predict_proba(X_calib) - sets = cp.predict_conformal_set(X_calib) + cp.fit(x_train, y_train) + cp.calibrate(x_calib, y_calib) + preds = cp.predict(x_calib) + probs = cp.predict_proba(x_calib) + sets = cp.predict_conformal_set(x_calib) self.assertEqual(len(preds), len(y_calib)) self.assertEqual(probs.shape[0], len(y_calib)) self.assertEqual(len(sets), len(y_calib)) From bd37fcf6083c73711ae45f58953ebc6f813fbe43 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Tue, 15 Jul 2025 02:09:29 +0200 Subject: [PATCH 18/20] removed conformal test from pipeline --- tests/test_pipeline.py | 56 +----------------------------------------- 1 file changed, 1 insertion(+), 55 deletions(-) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index a5741201..9b7510d5 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -16,15 +16,11 @@ from sklearn.base import BaseEstimator from sklearn.calibration import CalibratedClassifierCV from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor -from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.model_selection import GridSearchCV from sklearn.tree import DecisionTreeClassifier from molpipeline import ErrorFilter, FilterReinserter, Pipeline, PostPredictionWrapper from molpipeline.any2mol import AutoToMol, SmilesToMol -from molpipeline.experimental.uncertainty.conformal import ( - ConformalPredictor, - CrossConformalPredictor, -) from molpipeline.mol2any import MolToMorganFP, MolToRDKitPhysChem, MolToSmiles from molpipeline.mol2mol import ( ChargeParentExtractor, @@ -399,56 +395,6 @@ def test_calibrated_classifier(self) -> None: self.assertEqual(predicted_value_array.shape, (len(TEST_SMILES),)) self.assertEqual(predicted_proba_array.shape, (len(TEST_SMILES), 2)) - def test_conformal_pipeline_classifier(self) -> None: # noqa: PLR0914 - """Test conformal prediction with a pipeline on SMILES data. - - This test does not take any parameters and does not return a value. - """ - # Use the global test data - smiles = np.array(TEST_SMILES) - y = np.array(CONTAINS_OX) - - # Build a pipeline: SMILES -> Mol -> MorganFP -> RF - smi2mol = SmilesToMol() - mol2morgan = MolToMorganFP(radius=2, n_bits=128) - rf = RandomForestClassifier(n_estimators=5, random_state=42) - pipeline = Pipeline( - [ - ("smi2mol", smi2mol), - ("morgan", mol2morgan), - ("rf", rf), - ], - ) - - # Split data - x_train, x_calib, y_train, y_calib = train_test_split( - smiles, - y, - test_size=0.3, - random_state=42, - ) - - # ConformalPredictor - cp = ConformalPredictor(pipeline, estimator_type="classifier") - cp.fit(x_train, y_train) - cp.calibrate(x_calib, y_calib) - preds = cp.predict(x_calib) - probs = cp.predict_proba(x_calib) - sets = cp.predict_conformal_set(x_calib) - self.assertEqual(len(preds), len(y_calib)) - self.assertEqual(probs.shape[0], len(y_calib)) - self.assertEqual(len(sets), len(y_calib)) - - # CrossConformalPredictor - ccp = CrossConformalPredictor(pipeline, estimator_type="classifier", n_folds=3) - ccp.fit(smiles, y) - preds_ccp = ccp.predict(smiles) - probs_ccp = ccp.predict_proba(smiles) - sets_ccp = ccp.predict_conformal_set(smiles) - self.assertEqual(len(preds_ccp), len(y)) - self.assertEqual(probs_ccp.shape[0], len(y)) - self.assertEqual(len(sets_ccp), len(y)) - if __name__ == "__main__": unittest.main() From a1b336d117506a058f4a4ba1eb754c4365efcabe Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Tue, 15 Jul 2025 02:22:06 +0200 Subject: [PATCH 19/20] added ignore for too many variables in the test_conformal --- tests/test_experimental/test_uncertainty/test_conformal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index 117c1906..895f06cb 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -36,7 +36,7 @@ class TestConformalCV(unittest.TestCase): y_reg: npt.NDArray[Any] @classmethod - def setUpClass(cls) -> None: + def setUpClass(cls) -> None: # pylint: disable=too-many-locals """Set up test data once for all tests. Raises @@ -78,7 +78,7 @@ def setUpClass(cls) -> None: continue # Generate fingerprint for valid molecule try: - fp = morgan.transform([mol])[0] # type: ignore[list-item] + fp = morgan.transform([mol]) if fp is not None and hasattr(fp, "toarray"): valid_clf_data.append((fp.toarray().flatten(), label)) except (AttributeError, TypeError): From e8c7c26050779088b5b065c9fe21815ff16db269 Mon Sep 17 00:00:00 2001 From: soulios-basf Date: Tue, 15 Jul 2025 02:24:11 +0200 Subject: [PATCH 20/20] flaked and ruffed --- tests/test_experimental/test_uncertainty/test_conformal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_experimental/test_uncertainty/test_conformal.py b/tests/test_experimental/test_uncertainty/test_conformal.py index 895f06cb..a5fceb0c 100644 --- a/tests/test_experimental/test_uncertainty/test_conformal.py +++ b/tests/test_experimental/test_uncertainty/test_conformal.py @@ -36,7 +36,7 @@ class TestConformalCV(unittest.TestCase): y_reg: npt.NDArray[Any] @classmethod - def setUpClass(cls) -> None: # pylint: disable=too-many-locals + def setUpClass(cls) -> None: # pylint: disable=too-many-locals """Set up test data once for all tests. Raises