Skip to content

Commit 614cb30

Browse files
Merge pull request #26 from SciNim/differentiation
2 parents 0170715 + d9827d5 commit 614cb30

File tree

7 files changed

+1010
-61
lines changed

7 files changed

+1010
-61
lines changed

changelog.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,33 @@
1+
# v0.8.0 - 09.05.2022
2+
## Optimization has joined the chat
3+
Multi-variate optimization and differentiation has been introduced.
4+
5+
- `numericalnim/differentiate` offers `tensorGradient(f, x)` which calculates the gradient of `f` w.r.t `x` using finite differences, `tensorJacobian` (returns the transpose of the gradient), `tensorHessian`, `mixedDerivative`. It also provides `checkGradient(f, analyticGrad, x, tol)` to verify that the analytic gradient is correct by comparing it to the finite difference approximation.
6+
- `numericalnim/optimize` now has several multi-variate optimization methods:
7+
- `steepestDescent`
8+
- `newton`
9+
- `bfgs`
10+
- `lbfgs`
11+
- They all have the function signatures like:
12+
```nim
13+
proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = bfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U]
14+
```
15+
where `f` is the function to be minimized, `x0` is the starting guess, `options` contain options like tolerance (each method has it own options type which can be created by for example `lbfgsOptions` or `newtonOptions`), `analyticGradient` can be supplied to avoid having to do finite difference approximations of the derivatives.
16+
- There are 4 different line search methods supported and those are set in the `options`: `Armijo, Wolfe, WolfeStrong, NoLineSearch`.
17+
- `levmarq`: non-linear least-square optimizer
18+
```nim
19+
proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U]()): Tensor[U]
20+
```
21+
- `f` is the function you want to fit to the parameters in `param` and `x` is the value to evaluate the function at.
22+
- `params0` is the initial guess for the parameters
23+
- `xData` is a 1D Tensor with the x points and `yData` is a 1D Tensor with the y points.
24+
- `options` can be created using `levmarqOptions`.
25+
- Returns the final parameters
26+
27+
28+
Note: There are basic tests to ensure these methods converge for simple problems, but they are not tested on more complex problems and should be considered experimental until more tests have been done. Please try them out, but don't rely on them for anything important for now. Also, the API isn't set in stone yet so expect that it may change in future versions.
29+
30+
131
# v0.7.1 -25.01.2022
232
333
Add a `nimCI` task for the Nim CI to run now that the tests have external dependencies.

numericalnim.nimble

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Package Information
2-
version = "0.7.1"
2+
version = "0.8.0"
33
author = "Hugo Granström"
44
description = "A collection of numerical methods written in Nim. Current features: integration, ode, optimization."
55
license = "MIT"

src/numericalnim.nim

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,7 @@ import numericalnim/optimize
88
export optimize
99
import numericalnim/interpolate
1010
export interpolate
11+
import numericalnim/differentiate
12+
export differentiate
1113
import ./numericalnim/common/commonTypes
1214
export commonTypes

src/numericalnim/differentiate.nim

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
import std/strformat
2+
import arraymancer
3+
4+
proc diff1dForward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
5+
## Numerically calculate the derivative of f(x) at x0 using a step size h.
6+
## Uses forward difference which has accuracy O(h)
7+
result = (f(x0 + h) - f(x0)) / h
8+
9+
proc diff1dBackward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
10+
## Numerically calculate the derivative of f(x) at x0 using a step size h.
11+
## Uses backward difference which has accuracy O(h)
12+
result = (f(x0) - f(x0 - h)) / h
13+
14+
proc diff1dCentral*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
15+
## Numerically calculate the derivative of f(x) at x0 using a step size h.
16+
## Uses central difference which has accuracy O(h^2)
17+
result = (f(x0 + h) - f(x0 - h)) / (2*h)
18+
19+
proc secondDiff1dForward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
20+
## Numerically calculate the second derivative of f(x) at x0 using a step size h.
21+
result = (f(x0 + 2*h) - 2*f(x0 + h) + f(x0)) / (h*h)
22+
23+
proc secondDiff1dBackward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
24+
## Numerically calculate the second derivative of f(x) at x0 using a step size h.
25+
result = (f(x0) - 2*f(x0 - h) + f(x0 - 2*h)) / (h*h)
26+
27+
proc secondDiff1dCentral*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
28+
## Numerically calculate the second derivative of f(x) at x0 using a step size h.
29+
## Uses central difference which has accuracy O(h^2)
30+
result = (f(x0 + h) - 2*f(x0) + f(x0 - h)) / (h*h)
31+
32+
proc tensorGradient*[U; T: not Tensor](
33+
f: proc(x: Tensor[U]): T,
34+
x0: Tensor[U],
35+
h: U = U(1e-6),
36+
fastMode: bool = false
37+
): Tensor[T] =
38+
## Calculates the gradient of f(x) w.r.t vector x at x0 using step size h.
39+
## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative.
40+
## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate.
41+
assert x0.rank == 1 # must be a 1d vector
42+
let f0 = f(x0) # make use of this with a `fastMode` switch so we use forward difference instead of central difference?
43+
let xLen = x0.shape[0]
44+
result = newTensor[T](xLen)
45+
var x = x0.clone()
46+
for i in 0 ..< xLen:
47+
x[i] += h
48+
let fPlusH = f(x)
49+
if fastMode:
50+
x[i] -= h # restore to original
51+
result[i] = (fPlusH - f0) / h
52+
else:
53+
x[i] -= 2*h
54+
let fMinusH = f(x)
55+
x[i] += h # restore to original (± float error)
56+
result[i] = (fPlusH - fMinusH) / (2 * h)
57+
58+
proc tensorGradient*[U, T](
59+
f: proc(x: Tensor[U]): Tensor[T],
60+
x0: Tensor[U],
61+
h: U = U(1e-6),
62+
fastMode: bool = false
63+
): Tensor[T] =
64+
## Calculates the gradient of f(x) w.r.t vector x at x0 using step size h.
65+
## Every column is the gradient of one component of f.
66+
## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative.
67+
## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate.
68+
assert x0.rank == 1 # must be a 1d vector
69+
let f0 = f(x0) # make use of this with a `fastMode` switch so we use forward difference instead of central difference?
70+
assert f0.rank == 1
71+
let rows = x0.shape[0]
72+
let cols = f0.shape[0]
73+
result = newTensor[T](rows, cols)
74+
var x = x0.clone()
75+
for i in 0 ..< rows:
76+
x[i] += h
77+
let fPlusH = f(x)
78+
if fastMode:
79+
x[i] -= h # restore to original
80+
result[i, _] = ((fPlusH - f0) / h).reshape(1, cols)
81+
else:
82+
x[i] -= 2*h
83+
let fMinusH = f(x)
84+
x[i] += h # restore to original (± float error)
85+
result[i, _] = ((fPlusH - fMinusH) / (2 * h)).reshape(1, cols)
86+
87+
proc tensorJacobian*[U, T](
88+
f: proc(x: Tensor[U]): Tensor[T],
89+
x0: Tensor[U],
90+
h: U = U(1e-6),
91+
fastMode: bool = false
92+
): Tensor[T] =
93+
## Calculates the jacobian of f(x) w.r.t vector x at x0 using step size h.
94+
## Every row is the gradient of one component of f.
95+
## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative.
96+
## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate.
97+
transpose(tensorGradient(f, x0, h, fastMode))
98+
99+
proc mixedDerivative*[U, T](f: proc(x: Tensor[U]): T, x0: var Tensor[U], indices: (int, int), h: U = U(1e-6)): T =
100+
result = 0
101+
let i = indices[0]
102+
let j = indices[1]
103+
# f(x+h, y+h)
104+
x0[i] += h
105+
x0[j] += h
106+
result += f(x0)
107+
108+
# f(x+h, y-h)
109+
x0[j] -= 2*h
110+
result -= f(x0)
111+
112+
# f(x-h, y-h)
113+
x0[i] -= 2*h
114+
result += f(x0)
115+
116+
# f(x-h, y+h)
117+
x0[j] += 2*h
118+
result -= f(x0)
119+
120+
# restore x0
121+
x0[i] += h
122+
x0[j] -= h
123+
124+
result *= 1 / (4 * h*h)
125+
126+
127+
proc tensorHessian*[U; T: not Tensor](
128+
f: proc(x: Tensor[U]): T,
129+
x0: Tensor[U],
130+
h: U = U(1e-6)
131+
): Tensor[T] =
132+
assert x0.rank == 1 # must be a 1d vector
133+
let f0 = f(x0)
134+
let xLen = x0.shape[0]
135+
var x = x0.clone()
136+
result = zeros[T](xLen, xLen)
137+
for i in 0 ..< xLen:
138+
for j in i ..< xLen:
139+
let mixed = mixedDerivative(f, x, (i, j), h)
140+
result[i, j] = mixed
141+
result[j, i] = mixed
142+
143+
proc checkGradient*[U; T: not Tensor](f: proc(x: Tensor[U]): T, fGrad: proc(x: Tensor[U]): Tensor[T], x0: Tensor[U], tol: T): bool =
144+
## Checks if the provided gradient function `fGrad` gives the same values as numeric gradient.
145+
let numGrad = tensorGradient(f, x0)
146+
let grad = fGrad(x0)
147+
result = true
148+
for i, x in abs(numGrad - grad):
149+
if x > tol:
150+
echo fmt"Gradient at index {i[0]} has error: {x} (tol = {tol})"
151+
result = false
152+
153+
proc checkGradient*[U; T](f: proc(x: Tensor[U]): Tensor[T], fGrad: proc(x: Tensor[U]): Tensor[T], x0: Tensor[U], tol: T): bool =
154+
## Checks if the provided gradient function `fGrad` gives the same values as numeric gradient.
155+
let numGrad = tensorGradient(f, x0)
156+
let grad = fGrad(x0)
157+
result = true
158+
for i, x in abs(numGrad - grad):
159+
if x > tol:
160+
echo fmt"Gradient at index {i[0]} has error: {x} (tol = {tol})"
161+
result = false
162+
163+
164+
165+
when isMainModule:
166+
import std/math
167+
import benchy
168+
proc f1(x: Tensor[float]): Tensor[float] =
169+
x.sum(0)
170+
let x0 = ones[float](10)
171+
echo tensorGradient(f1, x0, 1e-6)
172+
echo tensorGradient(f1, x0, 1e-6, true)
173+
echo tensorJacobian(f1, x0, 1e-6)
174+
175+
proc f2(x: Tensor[float]): float =
176+
sum(x)
177+
echo tensorGradient(f2, x0, 1e-6)
178+
echo tensorGradient(f2, x0, 1e-6, true)
179+
180+
let N = 1000
181+
timeIt "slow mode":
182+
for i in 0 .. N:
183+
keep tensorGradient(f1, x0, 1e-6, false)
184+
timeIt "fast mode":
185+
for i in 0 .. N:
186+
keep tensorGradient(f1, x0, 1e-6, true)
187+
timeIt "slow mode float":
188+
for i in 0 .. N:
189+
keep tensorGradient(f2, x0, 1e-6, false)
190+
timeIt "fast mode float":
191+
for i in 0 .. N:
192+
keep tensorGradient(f2, x0, 1e-6, true)
193+
timeIt "jacobian slow":
194+
for i in 0 .. N:
195+
keep tensorJacobian(f1, x0, 1e-6, false)
196+
197+
198+
199+

0 commit comments

Comments
 (0)