diff --git a/README.md b/README.md index 124fb74..f6f8a86 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,5 @@ -# Simple-Linear-Regression +# Simple-Linear-Regression from scratch -A simple python program that implements Linear Regression on a sample dataset. The programuses sklearn.linear_model from the scikit-learn library to import the class LinearRegression. The object of the class is declared and is fitted with the X_Train and Y_Train data. +A simple python program that implements Linear Regression on a sample dataset. The program only uses numpy and other basic libraries, Here in this we implement the whole linear regression on single variable using oops in python. -Y_Pred stores the predicted values of X_Test - -A graph is plotted using the matplotlib.pyplot to visually represent the Linear Regression model. +You can select your own learning rate and your own epochs to train the model, and after training it will plot the error vs epochs graph. diff --git a/Simple_Linear_Regression.py b/Simple_Linear_Regression.py index e36f0d4..8382514 100644 --- a/Simple_Linear_Regression.py +++ b/Simple_Linear_Regression.py @@ -1,47 +1,72 @@ -# Simple Linear Regression +# Simple Linear Regression from scratch without using sklearn or Tensorflow. # Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd +import sys -# Importing the datasets - -datasets = pd.read_csv('Salary_Data.csv') - -X = datasets.iloc[:, :-1].values -Y = datasets.iloc[:, 1].values - -# Splitting the dataset into the Training set and Test set - -from sklearn.model_selection import train_test_split -X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size = 1/3, random_state = 0) - -# Fitting Simple Linear Regression to the training set - -from sklearn.linear_model import LinearRegression -regressor = LinearRegression() -regressor.fit(X_Train, Y_Train) +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import sys -# Predicting the Test set result  +class Linear_regression: + + def __init__(self,train_data,train_labels): + self.train_data = train_data + self.train_labels = train_labels + self.new_train_data = np.insert(self.train_data,0,1,axis=1) + self.weights = np.zeros((2,1)) + self.epochs = 1500 # select epochs here + self.alpha = 0.01 #select your learning rate here. -Y_Pred = regressor.predict(X_Test) + def hypothesis(self): + return np.dot(self.new_train_data,self.weights) + + def cost(self): + cost = (1/(2*np.size(self.train_labels)))*np.sum((self.hypothesis()-self.train_labels)**2) + return cost + def derivative(self): + return (1/np.size(self.train_labels))*np.dot(self.new_train_data.T,(self.hypothesis()-self.train_labels)) -# Visualising the Training set results + def train(self): + self.loss = [] + for i in range(self.epochs): + cost = self.cost() + self.weights = self.weights - (self.alpha) * self.derivative() + self.loss.append(cost) + + plt.plot(self.loss) + plt.show() + return self.weights,np.array(self.loss) + + def predict(self,data): + return np.dot(data,self.weights) -plt.scatter(X_Train, Y_Train, color = 'red') -plt.plot(X_Train, regressor.predict(X_Train), color = 'blue') -plt.title('Salary vs Experience (Training Set)') -plt.xlabel('Years of experience') -plt.ylabel('Salary') -plt.show() + def visualize(self,data): + data = self.hypothesis() + plt.xlabel('population of city in 10,000s') + plt.ylabel('profit in $10,000') + plt.scatter(self.train_data,self.train_labels,marker='x',color='red',label='Training data') + plt.plot(self.new_train_data[:,1],data,label='Linear regression') + plt.legend(loc='lower right') + plt.show() -# Visualising the Test set results +if __name__ == '__main__': + + # Reading data from csv file + + data = pd.read_csv('Salary_Data.csv') + train_data = np.array(data.iloc[:,:1]) + train_labels = np.array(data.iloc[:,1:]) -plt.scatter(X_Test, Y_Test, color = 'red') -plt.plot(X_Train, regressor.predict(X_Train), color = 'blue') -plt.title('Salary vs Experience (Training Set)') -plt.xlabel('Years of experience') -plt.ylabel('Salary') -plt.show() \ No newline at end of file + # Applying linear regression + + gd = Linear_regression(train_data,train_labels) + print('older cost: ',gd.cost()) + result = gd.train() + print('updated theta: \n',result[0]) + print('final cost: ',gd.cost()) + gd.visualize(gd.hypothesis())