Update linear_regression.py

PurvalBhude · PurvalBhude · commit 46fbd9391149 · 2024-09-15T15:26:05.000+05:30
add error handling function. it can handle errors and still run the linear regression algorithm.
diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py
@@ -17,19 +17,27 @@ def collect_dataset():
     The dataset contains ADR vs Rating of a Player
     :return : dataset obtained from the link, as matrix
     """
-    response = requests.get(
-        "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
-        "master/Week1/ADRvsRating.csv",
-        timeout=10,
-    )
-    lines = response.text.splitlines()
-    data = []
-    for item in lines:
-        item = item.split(",")
-        data.append(item)
-    data.pop(0)  # This is for removing the labels from the list
-    dataset = np.matrix(data)
-    return dataset
+    try:
+        response = requests.get(
+            "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
+            "master/Week1/ADRvsRating.csv",
+            timeout=10,
+        )
+        response.raise_for_status()  # Check for HTTP errors
+        lines = response.text.splitlines()
+        data = []
+        for item in lines:
+            item = item.split(",")
+            data.append(item)
+        data.pop(0)  # This is for removing the labels from the list
+        dataset = np.matrix(data)
+        return dataset
+    except requests.exceptions.RequestException as e:
+        print(f"Error in fetching dataset: {e}")
+        return None
+    except Exception as e:
+        print(f"Unexpected error in processing dataset: {e}")
+        return None
 
 
 def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
@@ -42,13 +50,16 @@ def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
     ;param return    : Updated Feature's, using
                        curr_features - alpha_ * gradient(w.r.t. feature)
     """
-    n = len_data
-
-    prod = np.dot(theta, data_x.transpose())
-    prod -= data_y.transpose()
-    sum_grad = np.dot(prod, data_x)
-    theta = theta - (alpha / n) * sum_grad
-    return theta
+    try:
+        n = len_data
+        prod = np.dot(theta, data_x.transpose())
+        prod -= data_y.transpose()
+        sum_grad = np.dot(prod, data_x)
+        theta = theta - (alpha / n) * sum_grad
+        return theta
+    except Exception as e:
+        print(f"Error during gradient descent: {e}")
+        return None
 
 
 def sum_of_square_error(data_x, data_y, len_data, theta):
@@ -59,11 +70,15 @@ def sum_of_square_error(data_x, data_y, len_data, theta):
     :param theta     : contains the feature vector
     :return          : sum of square error computed from given feature's
     """
-    prod = np.dot(theta, data_x.transpose())
-    prod -= data_y.transpose()
-    sum_elem = np.sum(np.square(prod))
-    error = sum_elem / (2 * len_data)
-    return error
+    try:
+        prod = np.dot(theta, data_x.transpose())
+        prod -= data_y.transpose()
+        sum_elem = np.sum(np.square(prod))
+        error = sum_elem / (2 * len_data)
+        return error
+    except Exception as e:
+        print(f"Error in calculating sum of square error: {e}")
+        return None
 
 
 def run_linear_regression(data_x, data_y):
@@ -72,20 +87,31 @@ def run_linear_regression(data_x, data_y):
     :param data_y  : contains the output (result vector)
     :return        : feature for line of best fit (Feature vector)
     """
-    iterations = 100000
-    alpha = 0.0001550
+    try:
+        iterations = 100000
+        alpha = 0.0001550
 
-    no_features = data_x.shape[1]
-    len_data = data_x.shape[0] - 1
+        no_features = data_x.shape[1]
+        len_data = data_x.shape[0]
 
-    theta = np.zeros((1, no_features))
+        theta = np.zeros((1, no_features))
 
-    for i in range(iterations):
-        theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
-        error = sum_of_square_error(data_x, data_y, len_data, theta)
-        print(f"At Iteration {i + 1} - Error is {error:.5f}")
+        for i in range(iterations):
+            theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
+            if theta is None:  # If gradient descent fails, exit
+                print("Gradient descent failed. Exiting.")
+                return None
+            error = sum_of_square_error(data_x, data_y, len_data, theta)
+            if error is None:  # If error calculation fails, exit
+                print("Error calculation failed. Exiting.")
+                return None
+            if i % 1000 == 0:  # Print every 1000 iterations
+                print(f"At Iteration {i + 1} - Error is {error:.5f}")
 
-    return theta
+        return theta
+    except Exception as e:
+        print(f"Error in linear regression: {e}")
+        return None
 
 
 def mean_absolute_error(predicted_y, original_y):
@@ -94,23 +120,37 @@ def mean_absolute_error(predicted_y, original_y):
     :param original_y    : contains values of expected outcome
     :return          : mean absolute error computed from given feature's
     """
-    total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
-    return total / len(original_y)
+    try:
+        total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
+        return total / len(original_y)
+    except Exception as e:
+        print(f"Error in calculating mean absolute error: {e}")
+        return None
 
 
 def main():
     """Driver function"""
     data = collect_dataset()
-
-    len_data = data.shape[0]
-    data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
-    data_y = data[:, -1].astype(float)
-
-    theta = run_linear_regression(data_x, data_y)
-    len_result = theta.shape[1]
-    print("Resultant Feature vector : ")
-    for i in range(len_result):
-        print(f"{theta[0, i]:.5f}")
+    if data is None:
+        print("Failed to collect or process the dataset. Exiting.")
+        return
+
+    try:
+        len_data = data.shape[0]
+        data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
+        data_y = data[:, -1].astype(float)
+
+        theta = run_linear_regression(data_x, data_y)
+        if theta is None:
+            print("Linear regression failed. Exiting.")
+            return
+
+        len_result = theta.shape[1]
+        print("Resultant Feature vector:")
+        for i in range(len_result):
+            print(f"{theta[0, i]:.5f}")
+    except Exception as e:
+        print(f"Unexpected error in main: {e}")
 
 
 if __name__ == "__main__":