Merge pull request #40 from EliahKagan/strings

bazingagin · web-flow · commit 6558e070ebc8 · 2023-08-03T17:33:50.000-04:00
Improve code style for strings
diff --git a/examples/ag_news.py b/examples/ag_news.py
@@ -70,17 +70,17 @@ def fit_model(
 
 
 def main() -> None:
-    print(f"Fetching data...")
+    print("Fetching data...")
     ((train_text, train_labels), (test_text, test_labels)) = get_data()
 
-    print(f"Fitting model...")
+    print("Fitting model...")
     model = fit_model(train_text, train_labels)
     random_indicies = np.random.choice(test_text.shape[0], 1000, replace=False)
 
     sample_test_text = test_text[random_indicies]
     sample_test_labels = test_labels[random_indicies]
 
-    print(f"Generating predictions...")
+    print("Generating predictions...")
     top_k = 1
 
     # Here we use the `sampling_percentage` to save time
diff --git a/examples/imdb.py b/examples/imdb.py
@@ -70,10 +70,10 @@ def fit_model(
 
 
 def main() -> None:
-    print(f"Fetching data...")
+    print("Fetching data...")
     ((train_text, train_labels), (test_text, test_labels)) = get_data()
 
-    print(f"Fitting model...")
+    print("Fitting model...")
     model = fit_model(train_text, train_labels)
 
     # Randomly sampling from the test set.
@@ -87,7 +87,7 @@ def main() -> None:
     sample_test_text = test_text[random_indicies]
     sample_test_labels = test_labels[random_indicies]
 
-    print(f"Generating predictions...")
+    print("Generating predictions...")
     top_k = 1
 
     # Here we use the `sampling_percentage` to save time
diff --git a/npc_gzip/exceptions.py b/npc_gzip/exceptions.py
@@ -11,7 +11,7 @@ class InvalidCompressorException(Exception):
 
     def __init__(self, compression_library: str) -> None:
         self.message = f"""
-        Compression Library ({compression_library}) 
+        Compression Library ({compression_library})
         is not currently supported.
         """
         super().__init__(self.message)
@@ -25,10 +25,10 @@ class MissingDependencyException(Exception):
 
     def __init__(self, compression_library: str) -> None:
         self.message = f"""
-        Compression Library ({compression_library}) 
-        is missing an underlying dependency. Try 
-        installing those missing dependencies and 
-        load this again. 
+        Compression Library ({compression_library})
+        is missing an underlying dependency. Try
+        installing those missing dependencies and
+        load this again.
 
         Common missing dependencies for:
 
@@ -50,7 +50,7 @@ def __init__(
         self.message = f"""
         Unable to aggregate ({stringa}) and ({stringb}).
         One or both of the two strings are too short to concatenate.
-        
+
         """
 
         if function_name is not None:
@@ -66,11 +66,11 @@ def __init__(
         compressed_value_b: Optional[float] = None,
         function_name: Optional[str] = None,
     ) -> None:
-        self.message = f"""
-        The combination of compressed values passed equal zero. 
+        self.message = """
+        The combination of compressed values passed equal zero.
         This will result in a divide by zero error.
 
-        
+
         """
 
         if function_name is not None:
@@ -91,7 +91,7 @@ def __init__(
             arg1: {type(a)}
             arg2: {type(b)}
             arg3: {type(c)}
-        
+
         """
 
         if function_name is not None:
@@ -112,7 +112,7 @@ def __init__(
             arg1: {array_a.shape}
             arg2: {array_b.shape}
             arg3: {array_c.shape}
-        
+
         """
 
         if function_name is not None:
@@ -128,11 +128,11 @@ def __init__(
         function_name: Optional[str] = None,
     ) -> None:
         self.message = f"""
-        The `distance_metric` ({distance_metric}) provided is not 
+        The `distance_metric` ({distance_metric}) provided is not
         currently supported. Please submit an Issue and/or
         Pull Request here to add support:
         https://github.com/bazingagin/npc_gzip
-        
+
         """
 
         if supported_distance_metrics is not None:
@@ -153,9 +153,9 @@ def __init__(
         function_name: Optional[str] = None,
     ) -> None:
         self.message = f"""
-        The type passed ({passed_type}) provided is not 
-        currently supported. 
-        
+        The type passed ({passed_type}) provided is not
+        currently supported.
+
         """
 
         if supported_types is not None:
@@ -174,13 +174,13 @@ def __init__(
         function_name: Optional[str] = None,
     ) -> None:
         self.message = f"""
-        If training labels are passed, the number 
-        of training data samples must equal the 
+        If training labels are passed, the number
+        of training data samples must equal the
         number of training label samples
-        
+
         training_samples: {training_samples}
         label_samples: {label_samples}
-        
+
         """
 
         if function_name is not None:
diff --git a/npc_gzip/knn_classifier.py b/npc_gzip/knn_classifier.py
@@ -27,7 +27,12 @@ class KnnClassifier:
     >>> training_labels = [random.randint(0, 1) for _ in range(len(training_data))]
     >>> assert len(training_data) == len(training_labels)
 
-    >>> model = KnnClassifier(compressor=GZipCompressor(), training_inputs=training_data, training_labels=training_labels, distance_metric="ncd")
+    >>> model = KnnClassifier(
+    ...     compressor=GZipCompressor(),
+    ...     training_inputs=training_data,
+    ...     training_labels=training_labels,
+    ...     distance_metric="ncd",
+    ... )
 
     >>> test = np.array(["hey", "you are a real pain in my ass", "go away please"])
 
diff --git a/npc_gzip/utils.py b/npc_gzip/utils.py
@@ -27,7 +27,7 @@ def generate_sentence(number_of_words: int = 10) -> str:
         str: Sentence of random numbers and letters.
     """
 
-    assert number_of_words > 0, f"`number_of_words` must be greater than zero."
+    assert number_of_words > 0, "`number_of_words` must be greater than zero."
 
     words = []
     for word in range(number_of_words):
@@ -58,7 +58,7 @@ def generate_dataset(number_of_sentences: int) -> list:
         list: List of sentences (str).
     """
 
-    assert number_of_sentences > 0, f"`number_of_sentences` must be greater than zero."
+    assert number_of_sentences > 0, "`number_of_sentences` must be greater than zero."
 
     dataset = []
     for sentence in range(number_of_sentences):
diff --git a/original_codebase/experiments.py b/original_codebase/experiments.py
@@ -25,13 +25,14 @@ def calc_dis(
         self, data: list, train_data: Optional[list] = None, fast: bool = False
     ) -> None:
         """
-        Calculates the distance between either `data` and itself or `data` and `train_data`
-        and appends the distance to `self.distance_matrix`.
+        Calculates the distance between either `data` and itself or `data` and
+        `train_data` and appends the distance to `self.distance_matrix`.
 
         Arguments:
             data (list): Data to compute distance between.
             train_data (list): [Optional] Training data to compute distance from `data`.
-            fast (bool): [Optional] Uses the _fast compression length function of `self.compressor`.
+            fast (bool): [Optional] Uses the _fast compression length function
+                                    of `self.compressor`.
 
         Returns:
             None: None
@@ -68,13 +69,14 @@ def calc_dis_with_single_compressed_given(
         self, data: list, data_len: list = None, train_data: Optional[list] = None
     ) -> None:
         """
-        Calculates the distance between either `data`, `data_len`, or `train_data`
-        and appends the distance to `self.distance_matrix`.
+        Calculates the distance between either `data`, `data_len`, or
+        `train_data` and appends the distance to `self.distance_matrix`.
 
         Arguments:
             data (list): Data to compute distance between.
             train_data (list): [Optional] Training data to compute distance from `data`.
-            fast (bool): [Optional] Uses the _fast compression length function of `self.compressor`.
+            fast (bool): [Optional] Uses the _fast compression length function
+                                    of `self.compressor`.
 
         Returns:
             None: None
@@ -186,7 +188,8 @@ def calc_acc(
             k (int?): TODO
             label (list): Predicted Labels.
             train_label (list): Correct Labels.
-            provided_distance_matrix (list): Calculated Distance Matrix to use instead of `self.distance_matrix`.
+            provided_distance_matrix (list): Calculated Distance Matrix to use
+                                             instead of `self.distance_matrix`.
             rand (bool): TODO
 
         Returns:
@@ -245,7 +248,8 @@ def combine_dis_acc(
         train_label: Optional[list] = None,
     ) -> tuple:
         """
-        Calculates the distance and the accuracy of the algorithm for data with training.
+        Calculates the distance and the accuracy of the algorithm for data with
+        training.
 
         Arguments:
             k (int?): TODO
@@ -304,7 +308,8 @@ def combine_dis_acc_single(
         label: Any,  # int, as used in this application
     ) -> tuple:
         """
-        Calculates the distance and the accuracy of the algorithm for a single datum with training.
+        Calculates the distance and the accuracy of the algorithm for a single
+        datum with training.
 
         Arguments:
             k (int?): TODO
diff --git a/original_codebase/utils.py b/original_codebase/utils.py
@@ -12,7 +12,8 @@ def NCD(c1: float, c2: float, c12: float) -> float:
     Arguments:
         c1 (float): The compressed length of the first object.
         c2 (float): The compressed length of the second object.
-        c12 (float): The compressed length of the concatenation of the first and second objects.
+        c12 (float): The compressed length of the concatenation of the first
+                     and second objects.
 
     Returns:
         float: The Normalized Compression Distance c1 and c2.
@@ -51,7 +52,8 @@ def CDM(c1: float, c2: float, c12: float) -> float:
     Arguments:
         c1 (float): The compressed length of the first object.
         c2 (float): The compressed length of the second object.
-        c12 (float): The compressed length of the concatenation of the first and second objects.
+        c12 (float): The compressed length of the concatenation of the first
+                     and second objects.
 
     Returns:
         float: The Compound Dissimilarity Measure value between c1 and c2.
@@ -72,7 +74,8 @@ def MSE(v1: np.ndarray, v2: np.ndarray) -> float:
         v2 (np.ndarray): The second array.
 
     Returns:
-        float: The Mean Squared Error value, representing the average squared difference between v1 and v2.
+        float: The Mean Squared Error value, representing the average squared
+               difference between v1 and v2.
 
     Formula:
         MSE(v1, v2) = Σ((v1 - v2) ** 2) / len(v1)
@@ -200,8 +203,8 @@ def agg_by_min_or_max(
     Arguments:
         i1 (torch.Tensor): First series of numbers.
         i2 (torch.Tensor): Second series of numbers.
-        aggregate_by_minimum (bool): True if you want to take the minimum of the two series.
-                                     False if you want to take the maximum instead.
+        aggregate_by_minimum (bool): True to take the minimum of the two series.
+                                     False to take the maximum instead.
 
     Returns:
         torch.Tensor: Average of the two series.