Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# Simple-Linear-Regression
# Simple-Linear-Regression from scratch

A simple python program that implements Linear Regression on a sample dataset. The programuses sklearn.linear_model from the scikit-learn library to import the class LinearRegression. The object of the class is declared and is fitted with the X_Train and Y_Train data.
A simple python program that implements Linear Regression on a sample dataset. The program only uses numpy and other basic libraries, Here in this we implement the whole linear regression on single variable using oops in python.

Y_Pred stores the predicted values of X_Test

A graph is plotted using the matplotlib.pyplot to visually represent the Linear Regression model.
You can select your own learning rate and your own epochs to train the model, and after training it will plot the error vs epochs graph.
93 changes: 59 additions & 34 deletions Simple_Linear_Regression.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,72 @@
# Simple Linear Regression
# Simple Linear Regression from scratch without using sklearn or Tensorflow.
# Importing the libraries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys

# Importing the datasets

datasets = pd.read_csv('Salary_Data.csv')

X = datasets.iloc[:, :-1].values
Y = datasets.iloc[:, 1].values

# Splitting the dataset into the Training set and Test set

from sklearn.model_selection import train_test_split
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size = 1/3, random_state = 0)

# Fitting Simple Linear Regression to the training set

from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_Train, Y_Train)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys

# Predicting the Test set result 
class Linear_regression:

def __init__(self,train_data,train_labels):
self.train_data = train_data
self.train_labels = train_labels
self.new_train_data = np.insert(self.train_data,0,1,axis=1)
self.weights = np.zeros((2,1))
self.epochs = 1500 # select epochs here
self.alpha = 0.01 #select your learning rate here.

Y_Pred = regressor.predict(X_Test)
def hypothesis(self):
return np.dot(self.new_train_data,self.weights)

def cost(self):
cost = (1/(2*np.size(self.train_labels)))*np.sum((self.hypothesis()-self.train_labels)**2)
return cost

def derivative(self):
return (1/np.size(self.train_labels))*np.dot(self.new_train_data.T,(self.hypothesis()-self.train_labels))

# Visualising the Training set results
def train(self):
self.loss = []
for i in range(self.epochs):
cost = self.cost()
self.weights = self.weights - (self.alpha) * self.derivative()
self.loss.append(cost)

plt.plot(self.loss)
plt.show()
return self.weights,np.array(self.loss)

def predict(self,data):
return np.dot(data,self.weights)

plt.scatter(X_Train, Y_Train, color = 'red')
plt.plot(X_Train, regressor.predict(X_Train), color = 'blue')
plt.title('Salary vs Experience (Training Set)')
plt.xlabel('Years of experience')
plt.ylabel('Salary')
plt.show()
def visualize(self,data):
data = self.hypothesis()
plt.xlabel('population of city in 10,000s')
plt.ylabel('profit in $10,000')
plt.scatter(self.train_data,self.train_labels,marker='x',color='red',label='Training data')
plt.plot(self.new_train_data[:,1],data,label='Linear regression')
plt.legend(loc='lower right')
plt.show()

# Visualising the Test set results
if __name__ == '__main__':

# Reading data from csv file

data = pd.read_csv('Salary_Data.csv')
train_data = np.array(data.iloc[:,:1])
train_labels = np.array(data.iloc[:,1:])

plt.scatter(X_Test, Y_Test, color = 'red')
plt.plot(X_Train, regressor.predict(X_Train), color = 'blue')
plt.title('Salary vs Experience (Training Set)')
plt.xlabel('Years of experience')
plt.ylabel('Salary')
plt.show()
# Applying linear regression

gd = Linear_regression(train_data,train_labels)
print('older cost: ',gd.cost())
result = gd.train()
print('updated theta: \n',result[0])
print('final cost: ',gd.cost())
gd.visualize(gd.hypothesis())