🚑 📝 ✨ ✨ Update pyproject.toml to include development dependencies; enhance docstrings and unit tests for clarity and coverage

garethcmurphy · garethcmurphy · commit 617857a01129 · 2024-12-24T22:40:22.000+01:00
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,9 +11,15 @@ pandas = "^2.2.3"
 scikit-learn = "^1.6.0"
 numpy = "^2.2.1"
 matplotlib = "^3.10.0"
-pytest = "^8.3.4"
 
 
+[tool.poetry.group.dev.dependencies]
+ruff = "^0.8.4"
+black = "^24.10.0"
+flake8 = "^7.1.1"
+autopep8 = "^2.3.1"
+pytest = "^8.3.4"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
diff --git a/src/battle_clustering.py b/src/battle_clustering.py
@@ -6,7 +6,9 @@
 from sklearn.preprocessing import StandardScaler
 import matplotlib.pyplot as plt
 
+
 class BattleClustering:
+    """Clustering battles using K-Means"""
     def __init__(self, data_path, n_clusters=3, random_state=42):
         self.data_path = data_path
         self.n_clusters = n_clusters
@@ -24,8 +26,15 @@ def load_data(self):
     def preprocess_data(self):
         """Preprocess data by encoding categorical columns and scaling features"""
         categorical_columns = ["Terrain", "Weather", "Key_Factors"]
-        self.data_encoded = pd.get_dummies(self.data, columns=categorical_columns, drop_first=True)
-        X = self.data_encoded.drop(columns=["Outcome", "Battle_Name"])  # Drop the target and non-feature columns
+        self.data_encoded = pd.get_dummies(
+            self.data, columns=categorical_columns, drop_first=True
+        )
+        X = self.data_encoded.drop(
+            columns=[
+                "Outcome",
+                "Battle_Name",
+            ]
+        )  # Drop the target and non-feature columns
         scaler = StandardScaler()
         self.X_scaled = scaler.fit_transform(X)
 
@@ -40,12 +49,15 @@ def save_results(self, output_path):
 
     def visualize_clusters(self, output_path):
         """Visualize the clusters (if dataset is 2D or reduced to 2D)"""
-        plt.scatter(self.X_scaled[:, 0], self.X_scaled[:, 1], c=self.clusters, cmap="viridis")
+        plt.scatter(
+            self.X_scaled[:, 0], self.X_scaled[:, 1], c=self.clusters, cmap="viridis"
+        )
         plt.title("Battle Clusters")
         plt.xlabel("Feature 1")
         plt.ylabel("Feature 2")
         plt.savefig(output_path)
 
+
 if __name__ == "__main__":
     clustering = BattleClustering(data_path="data/napoleon_battles.csv")
     clustering.load_data()
diff --git a/src/battle_predict.py b/src/battle_predict.py
@@ -24,6 +24,7 @@ def __init__(self):
         self.y = None
 
     def load_data(self):
+        """load data"""
         data = {
             "Battle Name": ["Austerlitz", "Waterloo", "Borodino"],
             "Date": ["1805-12-02", "1815-06-18", "1812-09-07"],
@@ -48,6 +49,7 @@ def load_data(self):
         self.data.to_csv("battles.csv", index=False)
 
     def preprocess_data(self):
+        """preprocess data"""
         self.data["Outcome"] = self.label_encoder.fit_transform(
             self.data["Outcome"]
         )  # Encode outcome
@@ -73,6 +75,7 @@ def preprocess_data(self):
         self.X = self.scaler.fit_transform(self.X)
 
     def train_model(self):
+        """train model"""
         X_train, X_test, y_train, y_test = train_test_split(
             self.X, self.y, test_size=0.2, random_state=42
         )
diff --git a/tests/test_cluster.py b/tests/test_cluster.py
@@ -1,9 +1,15 @@
+#!/usr/bin/env python3
+"""test_cluster.py - unit tests for the BattleClustering class"""
+import os
 import unittest
+
 import pandas as pd
-import os
+
 from battle_clustering import BattleClustering
 
+
 class TestBattleClustering(unittest.TestCase):
+    """Unit tests for the BattleClustering class"""
     @classmethod
     def setUpClass(cls):
         # Create a sample dataset for testing
@@ -32,24 +38,29 @@ def setUp(self):
         self.clustering = BattleClustering(data_path=self.sample_data_path)
 
     def test_load_data(self):
+        """Test the load_data method"""
         self.clustering.load_data()
         self.assertIsNotNone(self.clustering.data)
         self.assertEqual(len(self.clustering.data), 3)
 
     def test_preprocess_data(self):
+        """Test the preprocess_data method"""
         self.clustering.load_data()
         self.clustering.preprocess_data()
         self.assertIsNotNone(self.clustering.X_scaled)
         self.assertEqual(self.clustering.X_scaled.shape[0], 3)
 
     def test_perform_clustering(self):
+        """Test the perform_clustering method"""
         self.clustering.load_data()
         self.clustering.preprocess_data()
         self.clustering.perform_clustering()
         self.assertIsNotNone(self.clustering.clusters)
         self.assertEqual(len(self.clustering.clusters), 3)
 
     def test_save_results(self):
+        """Test the save_results method
+        """
         self.clustering.load_data()
         self.clustering.preprocess_data()
         self.clustering.perform_clustering()