From b928fb8ba8dceb80bdf2fbf0e27d29000c9f0ebe Mon Sep 17 00:00:00 2001
From: DevPatel-11 <dev99p2004a@gmail.com>
Date: Sun, 5 Oct 2025 20:10:19 +0530
Subject: [PATCH 1/4] Added vectorized approach of Gradient descent. Also added
 typehints to gradient_descent.py

---
 machine_learning/gradient_descent.py | 93 +++++++++++++++++++++++++---
 1 file changed, 85 insertions(+), 8 deletions(-)

diff --git a/machine_learning/gradient_descent.py b/machine_learning/gradient_descent.py
index 95463faf5635..06f068ef5528 100644
--- a/machine_learning/gradient_descent.py
+++ b/machine_learning/gradient_descent.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+from typing import Tuple, List, Union
 
 # List of input, output pairs
 train_data = (
@@ -19,7 +20,7 @@
 LEARNING_RATE = 0.009
 
 
-def _error(example_no, data_set="train"):
+def _error(example_no: int, data_set: str = "train") -> float:
     """
     :param data_set: train data or test data
     :param example_no: example number whose error has to be checked
@@ -30,7 +31,7 @@ def _error(example_no, data_set="train"):
     )
 
 
-def _hypothesis_value(data_input_tuple):
+def _hypothesis_value(data_input_tuple: Tuple[float, ...]) -> float:
     """
     Calculates hypothesis function value for a given input
     :param data_input_tuple: Input tuple of a particular example
@@ -46,7 +47,7 @@ def _hypothesis_value(data_input_tuple):
     return hyp_val
 
 
-def output(example_no, data_set):
+def output(example_no: int, data_set: str) -> float:
     """
     :param data_set: test data or train data
     :param example_no: example whose output is to be fetched
@@ -59,7 +60,7 @@ def output(example_no, data_set):
     return None
 
 
-def calculate_hypothesis_value(example_no, data_set):
+def calculate_hypothesis_value(example_no: int, data_set: str) -> float:
     """
     Calculates hypothesis value for a given example
     :param data_set: test data or train_data
@@ -73,7 +74,7 @@ def calculate_hypothesis_value(example_no, data_set):
     return None
 
 
-def summation_of_cost_derivative(index, end=m):
+def summation_of_cost_derivative(index: int, end: int = m) -> float:
     """
     Calculates the sum of cost function derivative
     :param index: index wrt derivative is being calculated
@@ -91,7 +92,7 @@ def summation_of_cost_derivative(index, end=m):
     return summation_value
 
 
-def get_cost_derivative(index):
+def get_cost_derivative(index: int) -> float:
     """
     :param index: index of the parameter vector wrt to derivative is to be calculated
     :return: derivative wrt to that index
@@ -102,7 +103,7 @@ def get_cost_derivative(index):
     return cost_derivative_value
 
 
-def run_gradient_descent():
+def run_gradient_descent() -> None:
     global parameter_vector
     # Tune these values to set a tolerance value for predicted output
     absolute_error_limit = 0.000002
@@ -127,13 +128,89 @@ def run_gradient_descent():
     print(("Number of iterations:", j))
 
 
-def test_gradient_descent():
+def test_gradient_descent() -> None:
     for i in range(len(test_data)):
         print(("Actual output value:", output(i, "test")))
         print(("Hypothesis output:", calculate_hypothesis_value(i, "test")))
 
 
+def run_gradient_descent_vectorized() -> None:
+    """
+    Vectorized implementation of gradient descent for a linear hypothesis
+    using NumPy arrays for efficient matrix operations.
+    """
+    global parameter_vector
+
+    # Convert training data into NumPy arrays
+    X = np.array([x for x, _ in train_data])
+    y = np.array([y for _, y in train_data])
+
+    # Add bias term (column of ones)
+    X = np.hstack((np.ones((X.shape[0], 1)), X))
+
+    # Convert parameter vector to NumPy array
+    theta = np.array(parameter_vector, dtype=float)
+
+    absolute_error_limit = 0.000002
+    relative_error_limit = 0
+    j = 0
+
+    while True:
+        j += 1
+
+        # Compute predictions
+        predictions = X @ theta
+
+        # Compute errors
+        errors = predictions - y
+
+        # Compute gradient
+        gradient = (X.T @ errors) / len(y)
+
+        # Update parameters
+        new_theta = theta - LEARNING_RATE * gradient
+
+        # Check for convergence
+        if np.allclose(
+            theta,
+            new_theta,
+            atol=absolute_error_limit,
+            rtol=relative_error_limit,
+        ):
+            break
+
+        theta = new_theta
+
+    parameter_vector = theta.tolist()
+    print(("Number of iterations (vectorized):", j))
+
+
+def test_gradient_descent_vectorized() -> None:
+    """
+    Tests the vectorized gradient descent implementation on test data
+    and prints predicted vs actual outputs.
+    """
+    X_test = np.array([x for x, _ in test_data])
+    y_test = np.array([y for _, y in test_data])
+
+    # Add bias term
+    X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))
+
+    theta = np.array(parameter_vector, dtype=float)
+    predictions = X_test @ theta
+
+    for i in range(len(test_data)):
+        print(("Actual output value:", y_test[i]))
+        print(("Hypothesis output:", predictions[i]))
+
+
 if __name__ == "__main__":
+    print("Running naive (loop-based) gradient descent...\n")
     run_gradient_descent()
     print("\nTesting gradient descent for a linear hypothesis function.\n")
     test_gradient_descent()
+
+    print("\nRunning vectorized gradient descent using NumPy...\n")
+    run_gradient_descent_vectorized()
+    print("\nTesting vectorized gradient descent.\n")
+    test_gradient_descent_vectorized()

From a194b3822372cf35501b1eb43835dc52f4116ae0 Mon Sep 17 00:00:00 2001
From: DevPatel-11 <dev99p2004a@gmail.com>
Date: Sun, 5 Oct 2025 20:56:51 +0530
Subject: [PATCH 2/4] style: auto-format gradient_descent.py via pre-commit

---
 machine_learning/gradient_descent.py | 29 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/machine_learning/gradient_descent.py b/machine_learning/gradient_descent.py
index 06f068ef5528..b15f3e012580 100644
--- a/machine_learning/gradient_descent.py
+++ b/machine_learning/gradient_descent.py
@@ -4,7 +4,6 @@
 """
 
 import numpy as np
-from typing import Tuple, List, Union
 
 # List of input, output pairs
 train_data = (
@@ -15,7 +14,7 @@
     ((11, 12, 13), 41),
 )
 test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
-parameter_vector = [2, 4, 1, 5]
+parameter_vector = [2.0, 4.0, 1.0, 5.0]
 m = len(train_data)
 LEARNING_RATE = 0.009
 
@@ -31,7 +30,7 @@ def _error(example_no: int, data_set: str = "train") -> float:
     )
 
 
-def _hypothesis_value(data_input_tuple: Tuple[float, ...]) -> float:
+def _hypothesis_value(data_input_tuple: tuple[float, ...]) -> float:
     """
     Calculates hypothesis function value for a given input
     :param data_input_tuple: Input tuple of a particular example
@@ -40,7 +39,7 @@ def _hypothesis_value(data_input_tuple: Tuple[float, ...]) -> float:
     It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
     So, we have to take care of it separately. Line 36 takes care of it.
     """
-    hyp_val = 0
+    hyp_val = 0.0
     for i in range(len(parameter_vector) - 1):
         hyp_val += data_input_tuple[i] * parameter_vector[i + 1]
     hyp_val += parameter_vector[0]
@@ -57,7 +56,7 @@ def output(example_no: int, data_set: str) -> float:
         return train_data[example_no][1]
     elif data_set == "test":
         return test_data[example_no][1]
-    return None
+    raise ValueError(f"Unknown data_set: {data_set}")
 
 
 def calculate_hypothesis_value(example_no: int, data_set: str) -> float:
@@ -71,7 +70,7 @@ def calculate_hypothesis_value(example_no: int, data_set: str) -> float:
         return _hypothesis_value(train_data[example_no][0])
     elif data_set == "test":
         return _hypothesis_value(test_data[example_no][0])
-    return None
+    raise ValueError(f"Unknown data_set: {data_set}")
 
 
 def summation_of_cost_derivative(index: int, end: int = m) -> float:
@@ -83,7 +82,7 @@ def summation_of_cost_derivative(index: int, end: int = m) -> float:
     Note: If index is -1, this means we are calculating summation wrt to biased
         parameter.
     """
-    summation_value = 0
+    summation_value = 0.0
     for i in range(end):
         if index == -1:
             summation_value += _error(i)
@@ -111,7 +110,7 @@ def run_gradient_descent() -> None:
     j = 0
     while True:
         j += 1
-        temp_parameter_vector = [0, 0, 0, 0]
+        temp_parameter_vector = [0.0, 0.0, 0.0, 0.0]
         for i in range(len(parameter_vector)):
             cost_derivative = get_cost_derivative(i - 1)
             temp_parameter_vector[i] = (
@@ -142,11 +141,11 @@ def run_gradient_descent_vectorized() -> None:
     global parameter_vector
 
     # Convert training data into NumPy arrays
-    X = np.array([x for x, _ in train_data])
+    x_train = np.array([x for x, _ in train_data])
     y = np.array([y for _, y in train_data])
 
     # Add bias term (column of ones)
-    X = np.hstack((np.ones((X.shape[0], 1)), X))
+    x_train = np.hstack((np.ones((x_train.shape[0], 1)), x_train))
 
     # Convert parameter vector to NumPy array
     theta = np.array(parameter_vector, dtype=float)
@@ -159,13 +158,13 @@ def run_gradient_descent_vectorized() -> None:
         j += 1
 
         # Compute predictions
-        predictions = X @ theta
+        predictions = x_train @ theta
 
         # Compute errors
         errors = predictions - y
 
         # Compute gradient
-        gradient = (X.T @ errors) / len(y)
+        gradient = (x_train.T @ errors) / len(y)
 
         # Update parameters
         new_theta = theta - LEARNING_RATE * gradient
@@ -190,14 +189,14 @@ def test_gradient_descent_vectorized() -> None:
     Tests the vectorized gradient descent implementation on test data
     and prints predicted vs actual outputs.
     """
-    X_test = np.array([x for x, _ in test_data])
+    x_test = np.array([x for x, _ in test_data])
     y_test = np.array([y for _, y in test_data])
 
     # Add bias term
-    X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))
+    x_test = np.hstack((np.ones((x_test.shape[0], 1)), x_test))
 
     theta = np.array(parameter_vector, dtype=float)
-    predictions = X_test @ theta
+    predictions = x_test @ theta
 
     for i in range(len(test_data)):
         print(("Actual output value:", y_test[i]))

From ed183cbef6ba4f2a30c768c3b69fb9dc5041456c Mon Sep 17 00:00:00 2001
From: DevPatel-11 <dev99p2004a@gmail.com>
Date: Sun, 5 Oct 2025 21:03:32 +0530
Subject: [PATCH 3/4] style: auto-format gradient_descent.py via ruff

---
 machine_learning/gradient_descent.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/machine_learning/gradient_descent.py b/machine_learning/gradient_descent.py
index b15f3e012580..a2ab2f46af4f 100644
--- a/machine_learning/gradient_descent.py
+++ b/machine_learning/gradient_descent.py
@@ -56,7 +56,9 @@ def output(example_no: int, data_set: str) -> float:
         return train_data[example_no][1]
     elif data_set == "test":
         return test_data[example_no][1]
-    raise ValueError(f"Unknown data_set: {data_set}")
+    msg = "Unknown data_set: " + data_set
+    raise ValueError(msg)
+
 
 
 def calculate_hypothesis_value(example_no: int, data_set: str) -> float:
@@ -70,7 +72,9 @@ def calculate_hypothesis_value(example_no: int, data_set: str) -> float:
         return _hypothesis_value(train_data[example_no][0])
     elif data_set == "test":
         return _hypothesis_value(test_data[example_no][0])
-    raise ValueError(f"Unknown data_set: {data_set}")
+    msg = "Unknown data_set: " + data_set
+    raise ValueError(msg)
+
 
 
 def summation_of_cost_derivative(index: int, end: int = m) -> float:

From aacebedf2c039f1452d7de6b35792f11e1a5d4d5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 15:33:56 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/gradient_descent.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/machine_learning/gradient_descent.py b/machine_learning/gradient_descent.py
index a2ab2f46af4f..d9e253df7495 100644
--- a/machine_learning/gradient_descent.py
+++ b/machine_learning/gradient_descent.py
@@ -60,7 +60,6 @@ def output(example_no: int, data_set: str) -> float:
     raise ValueError(msg)
 
 
-
 def calculate_hypothesis_value(example_no: int, data_set: str) -> float:
     """
     Calculates hypothesis value for a given example
@@ -76,7 +75,6 @@ def calculate_hypothesis_value(example_no: int, data_set: str) -> float:
     raise ValueError(msg)
 
 
-
 def summation_of_cost_derivative(index: int, end: int = m) -> float:
     """
     Calculates the sum of cost function derivative