mahesh147 · DextroLaev · Sep 30, 2020 · Sep 30, 2020 · Sep 30, 2020
diff --git a/README.md b/README.md
@@ -1,7 +1,5 @@
-# Simple-Linear-Regression
+# Simple-Linear-Regression from scratch
 
-A simple python program that implements Linear Regression on a sample dataset. The programuses sklearn.linear_model from the scikit-learn library to import the class LinearRegression. The object of the class is declared and is fitted with the X_Train and Y_Train data.
+A simple python program that implements Linear Regression on a sample dataset. The program only uses numpy and other basic libraries, Here in this we implement the whole linear regression on single variable using oops in python.
 
-Y_Pred stores the predicted values of X_Test
-
-A graph is plotted using the matplotlib.pyplot to visually represent the Linear Regression model.
+You can select your own learning rate and your own epochs to train the model, and after training it will plot the error vs epochs graph.
diff --git a/Simple_Linear_Regression.py b/Simple_Linear_Regression.py
@@ -1,47 +1,72 @@
-# Simple Linear Regression
+# Simple Linear Regression from scratch without using sklearn or Tensorflow.
 # Importing the libraries
 
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
+import sys
 
-# Importing the datasets
-
-datasets = pd.read_csv('Salary_Data.csv')
-
-X = datasets.iloc[:, :-1].values
-Y = datasets.iloc[:, 1].values
-
-# Splitting the dataset into the Training set and Test set
-
-from sklearn.model_selection import train_test_split
-X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size = 1/3, random_state = 0)
-
-# Fitting Simple Linear Regression to the training set
-
-from sklearn.linear_model import LinearRegression
-regressor = LinearRegression()
-regressor.fit(X_Train, Y_Train)
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import sys
 
-# Predicting the Test set result 
+class Linear_regression:
+
+	def __init__(self,train_data,train_labels):
+		self.train_data = train_data
+		self.train_labels = train_labels
+		self.new_train_data = np.insert(self.train_data,0,1,axis=1)		
+		self.weights = np.zeros((2,1))		
+		self.epochs = 1500  # select epochs here
+		self.alpha = 0.01 #select your learning rate here.
 
-Y_Pred = regressor.predict(X_Test)
+	def hypothesis(self):
+		return np.dot(self.new_train_data,self.weights)	
+
+	def cost(self):
+		cost = (1/(2*np.size(self.train_labels)))*np.sum((self.hypothesis()-self.train_labels)**2)
+		return cost		
 
+	def derivative(self):
+		return (1/np.size(self.train_labels))*np.dot(self.new_train_data.T,(self.hypothesis()-self.train_labels))
 
-# Visualising the Training set results
+	def train(self):
+		self.loss = []		
+		for i in range(self.epochs):
+			cost = self.cost()					
+			self.weights = self.weights - (self.alpha) * self.derivative()
+			self.loss.append(cost)
+
+		plt.plot(self.loss)
+		plt.show()	
+		return self.weights,np.array(self.loss)
+
+	def predict(self,data):
+		return np.dot(data,self.weights)	
 
-plt.scatter(X_Train, Y_Train, color = 'red')
-plt.plot(X_Train, regressor.predict(X_Train), color = 'blue')
-plt.title('Salary vs Experience  (Training Set)')
-plt.xlabel('Years of experience')
-plt.ylabel('Salary')
-plt.show()
+	def visualize(self,data):
+		data = self.hypothesis()
+		plt.xlabel('population of city in 10,000s')
+		plt.ylabel('profit in $10,000')		
+		plt.scatter(self.train_data,self.train_labels,marker='x',color='red',label='Training data')
+		plt.plot(self.new_train_data[:,1],data,label='Linear regression')
+		plt.legend(loc='lower right')
+		plt.show()						
 
-# Visualising the Test set results
+if __name__ == '__main__':
+
+ # Reading data from csv file
+
+	data = pd.read_csv('Salary_Data.csv')
+	train_data = np.array(data.iloc[:,:1])
+	train_labels = np.array(data.iloc[:,1:])			
 
-plt.scatter(X_Test, Y_Test, color = 'red')
-plt.plot(X_Train, regressor.predict(X_Train), color = 'blue')
-plt.title('Salary vs Experience  (Training Set)')
-plt.xlabel('Years of experience')
-plt.ylabel('Salary')
-plt.show()
+ # Applying linear regression
+
+	gd = Linear_regression(train_data,train_labels)	
+	print('older cost: ',gd.cost())
+	result = gd.train()
+	print('updated theta: \n',result[0])
+	print('final cost: ',gd.cost())
+	gd.visualize(gd.hypothesis())