Merge pull request #15 from jrudar/jrudar-patch-1

jrudar · web-flow · commit dbf7e354d5ab · 2025-04-04T17:58:36.000-04:00
- Removed dependencies on skorch
- Updated numpy, scipy, scikit-learn, pandas, torch dependencies
- Updated vesion to 2.1.2
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -11,18 +11,16 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.10, 3.11, 3.12]
+        python-version: [3.11, 3.12, 3.13]
 
     steps:
-    - uses: actions/checkout@v2
-    - uses: conda-incubator/setup-miniconda@v2
+    - uses: actions/checkout@v4
+    - uses: conda-incubator/setup-miniconda@v3
       with:
-        miniforge-variant: Mambaforge
-        miniforge-version: latest
+        miniconda-version: "latest"
         activate-environment: LANDMark
         channel-priority: strict
         environment-file: environment.yml
-        use-mamba: true
 
     - name: Install dependencies
       run: |
diff --git a/LANDMarkClassifier/lm_nnet_clfs.py b/LANDMarkClassifier/lm_nnet_clfs.py
@@ -1,14 +1,13 @@
 from sklearn.utils import resample
 from sklearn.base import ClassifierMixin, BaseEstimator
-from sklearn.preprocessing import LabelEncoder
+from sklearn.preprocessing import LabelBinarizer
 
 from math import ceil
 
 import numpy as np
 
 import torch as pyt
-
-from skorch import NeuralNetClassifier
+from torch.cuda import is_available as is_gpu_available
 
 from scipy.sparse import issparse
 
@@ -39,7 +38,12 @@ def __init__(self, n_in, n_out):
         self.D_3 = pyt.nn.Linear(
             in_features=self.n_out * 16, out_features=self.n_out
         )  # in_features = *8
-        self.O = pyt.nn.Softmax(dim=-1)
+
+        if self.n_out > 2:
+            self.O = pyt.nn.Softmax(dim=-1)
+
+        else:
+            self.O = pyt.nn.Sigmoid()
 
     def forward(self, x):
         o = self.IN(x)
@@ -50,10 +54,10 @@ def forward(self, x):
         o = self.D_2(o)
         o = self.A_2(o)
         o = self.Dr_2(o)
-        o = self.D_3(o)
-        o = self.O(o)
+        logit = self.D_3(o)
+        probs = self.O(logit)
 
-        return o
+        return logit, probs
 
 
 class ANNClassifier(ClassifierMixin, BaseEstimator):
@@ -71,7 +75,7 @@ def fit(self, X, y):
             X_not_sparse = X
 
         # Encode y
-        self.y_transformer = LabelEncoder().fit(y)
+        self.y_transformer = LabelBinarizer().fit(y)
 
         # Select features
         if X_not_sparse.shape[1] >= 4:
@@ -92,41 +96,96 @@ def fit(self, X, y):
             stratify=y,
         )
         X_trf = X_trf.astype(np.float32)
-        y_trf = self.y_transformer.transform(y_trf).astype(np.int64)
+        y_trf = self.y_transformer.transform(y_trf).astype(np.float32)
 
         # Determine if minimum class count exists
-        self.classes_, y_counts = np.unique(y_trf, return_counts=True)
+        self.classes_, y_counts = np.unique(y, return_counts=True)
 
         self.y_min = min(y_counts) * 0.8
 
         # Use neural network if more than 6 samples are present in the minority class
         if self.y_min > self.minority:
             self.n_in = X_trf.shape[1]
+
             self.n_out = self.classes_.shape[0]
+            if self.n_out == 2:
+                self.n_out = 1
+            
+            # Get device
+            use_autocast = False
+            if is_gpu_available():
+                use_autocast = True
+                device_type = "cuda:0"
+                self.device = pyt.device("cuda:0")
 
-            if pyt.cuda.is_available():
-                device = "cuda"
             else:
-                device = "cpu"
-
-            clf = NeuralNetClassifier(
-                LMNNet(n_in=X_trf.shape[1], n_out=self.classes_.shape[0]),
-                optimizer=pyt.optim.AdamW,
-                lr=0.001,
-                max_epochs=100,
-                batch_size=16,
-                device=device,
-                iterator_train__shuffle=True,
-                verbose=0,
-            )
+                device_type = "cpu"
+                self.device = pyt.device("cpu")
+
+            # Prepare data
+            X_trf = pyt.tensor(X_trf)
+            y_trf = pyt.tensor(y_trf)
+
+            dataset_train = pyt.utils.data.DataLoader(
+                list(zip(X_trf, y_trf)),
+                shuffle=True,
+                batch_size=16
+                )
+
+            # Prepare model and load it onto the GPU or CPU
+            self.model = LMNNet(n_in=self.n_in, n_out=self.n_out)
+            self.model.to(self.device)
+
+            # Prepare scheduler and optimizer
+            optimizer=pyt.optim.AdamW(self.model.parameters(), lr = 0.01)
+
+            # Prepare loss function
+            if self.n_out > 2:
+                loss_fn = pyt.nn.CrossEntropyLoss().to(self.device)
+
+            else:
+                loss_fn = pyt.nn.BCEWithLogitsLoss().to(self.device)
+
+            scaler = pyt.amp.GradScaler(self.device)
+
+            # Training loop
+            for epoch in range(100):
+
+                if is_gpu_available():
+                    pyt.cuda.empty_cache()
+
+                # Training steps
+                self.model.train()
+
+                for batch_num, batch in enumerate(dataset_train):
+                    x_in, y_in = batch
+                    x_in = x_in.to(self.device)
+                    y_in = y_in.to(self.device)
 
-            clf.fit(X_trf, y_trf)
+                    with pyt.amp.autocast(
+                        device_type=device_type,
+                        dtype=pyt.bfloat16,
+                        enabled=use_autocast
+                    ):
+                    
+                        x_logit, x_probs = self.model(x_in)
 
-            self.params = clf.module.state_dict()
+                        # Calculate loss - BCE
+                        total_loss = loss_fn(x_logit, y_in)
+                    
+                    # Backwards pass
+                    optimizer.zero_grad()
+                    scaler.scale(total_loss).backward()
 
-            del clf
+                    # Update weights
+                    scaler.step(optimizer)
+                    scaler.update()
 
-            return self, self.decision_function(X)
+                self.params = self.model.state_dict()
+
+                del self.model
+
+                return self, self.decision_function(X)
 
         # Otherwise use an Extra Trees Classifier or Nothing
         else:
@@ -149,7 +208,8 @@ def predict_proba(self, X):
 
         predictions = []
         for start in n_batch:
-            p = clf(X_tensor[start : start + 16]).detach().cpu().numpy()
+            _, p = clf(X_tensor[start : start + 16])
+            p = p.detach().cpu().numpy()
             predictions.extend(p)
 
         predictions = np.asarray(predictions)
@@ -161,11 +221,18 @@ def predict_proba(self, X):
     def decision_function(self, X):
         D = self.predict_proba(X)
 
-        return np.where(D > 0.5, 1, -1)
+        D = np.where(D > 0.5, 1, -1)
+
+        if self.n_out == 1:
+            D = D.flatten()
+
+        return D
 
     def predict(self, X):
         predictions = self.predict_proba(X)
 
         predictions = np.argmax(predictions, axis=1)
 
-        return self.y_transformer.inverse_transform(predictions)
+        predictions = np.asarray([self.y_transformer.classes_[x] for x in predictions])
+
+        return predictions
diff --git a/environment.yml b/environment.yml
@@ -1,13 +1,12 @@
 name: LANDMark
 
 dependencies:
-    - python >=3.10
+    - python >=3.11
     - pip
     - pip:
-        - numpy >= 1.26
-        - scikit-learn >= 1.1.2
-        - joblib >= 1.2.0
-        - pandas >= 1.5.0
-        - scipy >= 1.8.1
-        - skorch >= 0.13.0
-        - torch >= 2.0.1
+        - numpy == 2.1.3
+        - scikit-learn >= 1.6.1
+        - joblib >= 1.4.2
+        - pandas >= 2.2.3
+        - scipy >= 1.15.2
+        - torch >= 2.6.0
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,9 +4,9 @@ requires = ["hatchling"]
 
 [project]
 name = "LANDMarkClassifier"
-version = "2.1.1"
+version = "2.1.2"
 authors = [
-    {name = "Josip Rudar", email = "rudarj@uoguelph.ca"},
+    {name = "Josip Rudar", email = "joe.rudar@inspection.gc.ca"},
     {name = "Teresita M. Porter"},
     {name = "Michael Wright"},
     {name = "G. Brian Golding"},
@@ -21,27 +21,26 @@ keywords = [
 ]
 license = {file = "LICENSE"}
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 classifiers = [
     "Development Status :: 5 - Production/Stable",
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: MIT License",
     "Natural Language :: English",
     "Operating System :: OS Independent",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Programming Language :: Python :: 3 :: Only",
 ]
 dependencies = [
-    "numpy >= 1.26",
-    "scikit-learn >= 1.1.2",
-    "joblib >= 1.2.0",
-    "pandas >= 1.5.0",
-    "scipy >= 1.8.1",
-    "skorch >= 0.13.0",
-    "torch >= 2.0.1"
+    "numpy == 2.1.3",
+    "scikit-learn >= 1.6.1",
+    "joblib >= 1.4.2",
+    "pandas >= 2.2.3",
+    "scipy >= 1.15.2",
+    "torch >= 2.6.0"
 ]
 
 [project.urls]