Skip to content

Commit 0672df2

Browse files
authored
Merge pull request #39 from param087/linearRegression
added Least Squares Linear Regression algorithm
2 parents 55cdf5a + a433c0e commit 0672df2

File tree

5 files changed

+439
-23
lines changed

5 files changed

+439
-23
lines changed

Notebooks/Least Squares Linear Regression.ipynb

Lines changed: 368 additions & 0 deletions
Large diffs are not rendered by default.

Sources/swiftML/BernoulliNB.swift

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ public class BernoulliNB {
4747
// Find unique classes in target values.
4848
(self.classes, self.indices) = Raw.unique(labels.flattened())
4949

50-
precondition(self.classes.shape[0] == 2, "Labels must have only two classes.")
51-
5250
// Initialize the classLogPrior and featureLogProb based on feature count and sample count.
5351
var separated = [[Tensor<Float>]]()
5452
self.classLogPrior = Tensor<Float>(zeros: [self.classes.shape[0]])

Sources/swiftML/KNeighborsClassifier.swift

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@ public class KNeighborsClassifier {
100100
/// - Returns: Predicted classification.
101101
internal func predictSingleSample(_ test: Tensor<Float>) -> Tensor<Int32> {
102102
var distances = Tensor<Float>(zeros: [self.data.shape[0]])
103-
var maxLabel = Tensor<Int32>(zeros: [self.neighborCount])
104-
var maxDistances: Tensor<Float>
105-
var maxIndex: Tensor<Int32>
103+
var minDistanceLabels = Tensor<Int32>(zeros: [self.neighborCount])
104+
var minDistances: Tensor<Float>
105+
var minDistanceIndex: Tensor<Int32>
106106
var classes: Tensor<Int32>
107107
var indices: Tensor<Int32>
108108

@@ -112,27 +112,27 @@ public class KNeighborsClassifier {
112112
}
113113

114114
// Find the top neighbor with minimum distance.
115-
(maxDistances, maxIndex) =
115+
(minDistances, minDistanceIndex) =
116116
Raw.topKV2(distances, k: Tensor<Int32>(Int32(data.shape[0])), sorted: true)
117-
maxDistances = Raw.reverse(maxDistances, dims: Tensor<Bool>([true]))
118-
maxDistances = maxDistances
117+
minDistances = Raw.reverse(minDistances, dims: Tensor<Bool>([true]))
118+
minDistances = minDistances
119119
.slice(lowerBounds: Tensor<Int32>([0]),
120120
sizes: Tensor<Int32>([Int32(self.neighborCount)]))
121121

122-
maxIndex = Raw.reverse(maxIndex, dims: Tensor<Bool>([true]))
123-
maxIndex = maxIndex
122+
minDistanceIndex = Raw.reverse(minDistanceIndex, dims: Tensor<Bool>([true]))
123+
minDistanceIndex = minDistanceIndex
124124
.slice(lowerBounds: Tensor<Int32>([0]),
125125
sizes: Tensor<Int32>([Int32(self.neighborCount)]))
126126

127127
for i in 0..<self.neighborCount {
128-
maxLabel[i] = self.labels[Int(maxIndex[i].scalarized())]
128+
minDistanceLabels[i] = self.labels[Int(minDistanceIndex[i].scalarized())]
129129
}
130130

131131
// Weights the neighbors based on their weighing method.
132132
let labelsAndWeightsTensor = computeWeights(
133-
distances: maxDistances, labels: Tensor<Float>(maxLabel))
133+
distances: minDistances, labels: Tensor<Float>(minDistanceLabels))
134134

135-
(classes, indices) = Raw.unique(Tensor<Int32>(maxLabel))
135+
(classes, indices) = Raw.unique(Tensor<Int32>(minDistanceLabels))
136136

137137
var kClasses = Tensor<Int32>(zeros: [classes.shape[0]])
138138
var kWeights = Tensor<Float>(zeros: [classes.shape[0]])

Sources/swiftML/KNeighborsRegressor.swift

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -97,34 +97,34 @@ public class KNeighborsRegressor {
9797
/// - Returns: Predicted target value.
9898
internal func predictSingleSample(_ test: Tensor<Float>) -> Tensor<Float> {
9999
var distances = Tensor<Float>(zeros: [self.data.shape[0]])
100-
var maxLabel = Tensor<Float>(zeros: [self.neighborCount])
101-
var maxDistances: Tensor<Float>
102-
var maxIndex: Tensor<Int32>
100+
var minDistanceLabels = Tensor<Float>(zeros: [self.neighborCount])
101+
var minDistances: Tensor<Float>
102+
var minDistanceIndex: Tensor<Int32>
103103

104104
// Calculate the distance between test and all data points.
105105
for i in 0..<self.data.shape[0] {
106106
distances[i] = minkowskiDistance(self.data[i], test, p: self.p)
107107
}
108108

109109
// Find the top neighbors with minimum distance.
110-
(maxDistances, maxIndex) =
110+
(minDistances, minDistanceIndex) =
111111
Raw.topKV2(distances, k: Tensor<Int32>(Int32(data.shape[0])), sorted: true)
112-
maxDistances = Raw.reverse(maxDistances, dims: Tensor<Bool>([true]))
113-
maxDistances = maxDistances
112+
minDistances = Raw.reverse(minDistances, dims: Tensor<Bool>([true]))
113+
minDistances = minDistances
114114
.slice(lowerBounds: Tensor<Int32>([0]),
115115
sizes: Tensor<Int32>([Int32(self.neighborCount)]))
116116

117-
maxIndex = Raw.reverse(maxIndex, dims: Tensor<Bool>([true]))
118-
maxIndex = maxIndex
117+
minDistanceIndex = Raw.reverse(minDistanceIndex, dims: Tensor<Bool>([true]))
118+
minDistanceIndex = minDistanceIndex
119119
.slice(lowerBounds: Tensor<Int32>([0]),
120120
sizes: Tensor<Int32>([Int32(self.neighborCount)]))
121121

122122
for i in 0..<self.neighborCount {
123-
maxLabel[i] = self.labels[Int(maxIndex[i].scalarized())]
123+
minDistanceLabels[i] = self.labels[Int(minDistanceIndex[i].scalarized())]
124124
}
125125

126126
// Average weight based on neighbors weights.
127-
let avgWeight = computeWeights(distances: maxDistances, labels: maxLabel)
127+
let avgWeight = computeWeights(distances: minDistances, labels: minDistanceLabels)
128128
return avgWeight
129129
}
130130

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import TensorFlow
2+
3+
/// Least Squares linear regression.
4+
///
5+
/// Reference: ["Least Squares linear regression"](
6+
/// https://en.wikipedia.org/wiki/Linear_regression)
7+
public class LeastSquaresLinearRegression: LinearRegression {
8+
/// Whether to calculate the intercept for this model.
9+
public var fitIntercept: Bool
10+
/// The weights of the model.
11+
public var weights: Tensor<Float>
12+
13+
/// Creates a linear regression model.
14+
///
15+
/// - Parameters:
16+
/// - fitIntercept: Whether to calculate the intercept for this model. If `false`, no
17+
/// intercept will be used in calculations. The default is `true`.
18+
public init(
19+
fitIntercept: Bool = true
20+
) {
21+
self.fitIntercept = fitIntercept
22+
self.weights = Tensor<Float>(0)
23+
}
24+
25+
/// Fit a linear model.
26+
///
27+
/// - Parameters:
28+
/// - data: Training data with shape `[sample count, feature count]`.
29+
/// - labels: Target value with shape `[sample count, target count]`.
30+
public func fit(data: Tensor<Float>, labels: Tensor<Float>) {
31+
precondition(data.shape[0] > 0, "Data must have a positive sample count.")
32+
precondition(data.shape[1] >= 1,
33+
"Data must have feature count greater than or equal to one.")
34+
precondition(labels.shape[0] > 0, "Labels must have a positive sample count.")
35+
precondition(labels.shape[1] >= 1,
36+
"Labels must have target feature count greater than or equal to one.")
37+
precondition(data.shape[0] == labels.shape[0],
38+
"Data and labels must have the same sample count.")
39+
40+
var data: Tensor<Float> = data
41+
if self.fitIntercept {
42+
let ones = Tensor<Float>(ones: [data.shape[0], 1])
43+
data = ones.concatenated(with: data, alongAxis: -1)
44+
}
45+
46+
// weights = (X^T.X)^-1.X^T.y
47+
self.weights = matmul(matmul(Raw.matrixInverse(matmul(data.transposed(), data)),
48+
data.transposed()), labels)
49+
}
50+
}

0 commit comments

Comments
 (0)