|
| 1 | +import std/strformat |
| 2 | +import arraymancer |
| 3 | + |
| 4 | +proc diff1dForward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T = |
| 5 | + ## Numerically calculate the derivative of f(x) at x0 using a step size h. |
| 6 | + ## Uses forward difference which has accuracy O(h) |
| 7 | + result = (f(x0 + h) - f(x0)) / h |
| 8 | + |
| 9 | +proc diff1dBackward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T = |
| 10 | + ## Numerically calculate the derivative of f(x) at x0 using a step size h. |
| 11 | + ## Uses backward difference which has accuracy O(h) |
| 12 | + result = (f(x0) - f(x0 - h)) / h |
| 13 | + |
| 14 | +proc diff1dCentral*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T = |
| 15 | + ## Numerically calculate the derivative of f(x) at x0 using a step size h. |
| 16 | + ## Uses central difference which has accuracy O(h^2) |
| 17 | + result = (f(x0 + h) - f(x0 - h)) / (2*h) |
| 18 | + |
| 19 | +proc secondDiff1dForward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T = |
| 20 | + ## Numerically calculate the second derivative of f(x) at x0 using a step size h. |
| 21 | + result = (f(x0 + 2*h) - 2*f(x0 + h) + f(x0)) / (h*h) |
| 22 | + |
| 23 | +proc secondDiff1dBackward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T = |
| 24 | + ## Numerically calculate the second derivative of f(x) at x0 using a step size h. |
| 25 | + result = (f(x0) - 2*f(x0 - h) + f(x0 - 2*h)) / (h*h) |
| 26 | + |
| 27 | +proc secondDiff1dCentral*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T = |
| 28 | + ## Numerically calculate the second derivative of f(x) at x0 using a step size h. |
| 29 | + ## Uses central difference which has accuracy O(h^2) |
| 30 | + result = (f(x0 + h) - 2*f(x0) + f(x0 - h)) / (h*h) |
| 31 | + |
| 32 | +proc tensorGradient*[U; T: not Tensor]( |
| 33 | + f: proc(x: Tensor[U]): T, |
| 34 | + x0: Tensor[U], |
| 35 | + h: U = U(1e-6), |
| 36 | + fastMode: bool = false |
| 37 | + ): Tensor[T] = |
| 38 | + ## Calculates the gradient of f(x) w.r.t vector x at x0 using step size h. |
| 39 | + ## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative. |
| 40 | + ## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate. |
| 41 | + assert x0.rank == 1 # must be a 1d vector |
| 42 | + let f0 = f(x0) # make use of this with a `fastMode` switch so we use forward difference instead of central difference? |
| 43 | + let xLen = x0.shape[0] |
| 44 | + result = newTensor[T](xLen) |
| 45 | + var x = x0.clone() |
| 46 | + for i in 0 ..< xLen: |
| 47 | + x[i] += h |
| 48 | + let fPlusH = f(x) |
| 49 | + if fastMode: |
| 50 | + x[i] -= h # restore to original |
| 51 | + result[i] = (fPlusH - f0) / h |
| 52 | + else: |
| 53 | + x[i] -= 2*h |
| 54 | + let fMinusH = f(x) |
| 55 | + x[i] += h # restore to original (± float error) |
| 56 | + result[i] = (fPlusH - fMinusH) / (2 * h) |
| 57 | + |
| 58 | +proc tensorGradient*[U, T]( |
| 59 | + f: proc(x: Tensor[U]): Tensor[T], |
| 60 | + x0: Tensor[U], |
| 61 | + h: U = U(1e-6), |
| 62 | + fastMode: bool = false |
| 63 | + ): Tensor[T] = |
| 64 | + ## Calculates the gradient of f(x) w.r.t vector x at x0 using step size h. |
| 65 | + ## Every column is the gradient of one component of f. |
| 66 | + ## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative. |
| 67 | + ## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate. |
| 68 | + assert x0.rank == 1 # must be a 1d vector |
| 69 | + let f0 = f(x0) # make use of this with a `fastMode` switch so we use forward difference instead of central difference? |
| 70 | + assert f0.rank == 1 |
| 71 | + let rows = x0.shape[0] |
| 72 | + let cols = f0.shape[0] |
| 73 | + result = newTensor[T](rows, cols) |
| 74 | + var x = x0.clone() |
| 75 | + for i in 0 ..< rows: |
| 76 | + x[i] += h |
| 77 | + let fPlusH = f(x) |
| 78 | + if fastMode: |
| 79 | + x[i] -= h # restore to original |
| 80 | + result[i, _] = ((fPlusH - f0) / h).reshape(1, cols) |
| 81 | + else: |
| 82 | + x[i] -= 2*h |
| 83 | + let fMinusH = f(x) |
| 84 | + x[i] += h # restore to original (± float error) |
| 85 | + result[i, _] = ((fPlusH - fMinusH) / (2 * h)).reshape(1, cols) |
| 86 | + |
| 87 | +proc tensorJacobian*[U, T]( |
| 88 | + f: proc(x: Tensor[U]): Tensor[T], |
| 89 | + x0: Tensor[U], |
| 90 | + h: U = U(1e-6), |
| 91 | + fastMode: bool = false |
| 92 | + ): Tensor[T] = |
| 93 | + ## Calculates the jacobian of f(x) w.r.t vector x at x0 using step size h. |
| 94 | + ## Every row is the gradient of one component of f. |
| 95 | + ## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative. |
| 96 | + ## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate. |
| 97 | + transpose(tensorGradient(f, x0, h, fastMode)) |
| 98 | + |
| 99 | +proc mixedDerivative*[U, T](f: proc(x: Tensor[U]): T, x0: var Tensor[U], indices: (int, int), h: U = U(1e-6)): T = |
| 100 | + result = 0 |
| 101 | + let i = indices[0] |
| 102 | + let j = indices[1] |
| 103 | + # f(x+h, y+h) |
| 104 | + x0[i] += h |
| 105 | + x0[j] += h |
| 106 | + result += f(x0) |
| 107 | + |
| 108 | + # f(x+h, y-h) |
| 109 | + x0[j] -= 2*h |
| 110 | + result -= f(x0) |
| 111 | + |
| 112 | + # f(x-h, y-h) |
| 113 | + x0[i] -= 2*h |
| 114 | + result += f(x0) |
| 115 | + |
| 116 | + # f(x-h, y+h) |
| 117 | + x0[j] += 2*h |
| 118 | + result -= f(x0) |
| 119 | + |
| 120 | + # restore x0 |
| 121 | + x0[i] += h |
| 122 | + x0[j] -= h |
| 123 | + |
| 124 | + result *= 1 / (4 * h*h) |
| 125 | + |
| 126 | + |
| 127 | +proc tensorHessian*[U; T: not Tensor]( |
| 128 | + f: proc(x: Tensor[U]): T, |
| 129 | + x0: Tensor[U], |
| 130 | + h: U = U(1e-6) |
| 131 | + ): Tensor[T] = |
| 132 | + assert x0.rank == 1 # must be a 1d vector |
| 133 | + let f0 = f(x0) |
| 134 | + let xLen = x0.shape[0] |
| 135 | + var x = x0.clone() |
| 136 | + result = zeros[T](xLen, xLen) |
| 137 | + for i in 0 ..< xLen: |
| 138 | + for j in i ..< xLen: |
| 139 | + let mixed = mixedDerivative(f, x, (i, j), h) |
| 140 | + result[i, j] = mixed |
| 141 | + result[j, i] = mixed |
| 142 | + |
| 143 | +proc checkGradient*[U; T: not Tensor](f: proc(x: Tensor[U]): T, fGrad: proc(x: Tensor[U]): Tensor[T], x0: Tensor[U], tol: T): bool = |
| 144 | + ## Checks if the provided gradient function `fGrad` gives the same values as numeric gradient. |
| 145 | + let numGrad = tensorGradient(f, x0) |
| 146 | + let grad = fGrad(x0) |
| 147 | + result = true |
| 148 | + for i, x in abs(numGrad - grad): |
| 149 | + if x > tol: |
| 150 | + echo fmt"Gradient at index {i[0]} has error: {x} (tol = {tol})" |
| 151 | + result = false |
| 152 | + |
| 153 | +proc checkGradient*[U; T](f: proc(x: Tensor[U]): Tensor[T], fGrad: proc(x: Tensor[U]): Tensor[T], x0: Tensor[U], tol: T): bool = |
| 154 | + ## Checks if the provided gradient function `fGrad` gives the same values as numeric gradient. |
| 155 | + let numGrad = tensorGradient(f, x0) |
| 156 | + let grad = fGrad(x0) |
| 157 | + result = true |
| 158 | + for i, x in abs(numGrad - grad): |
| 159 | + if x > tol: |
| 160 | + echo fmt"Gradient at index {i[0]} has error: {x} (tol = {tol})" |
| 161 | + result = false |
| 162 | + |
| 163 | + |
| 164 | + |
| 165 | +when isMainModule: |
| 166 | + import std/math |
| 167 | + import benchy |
| 168 | + proc f1(x: Tensor[float]): Tensor[float] = |
| 169 | + x.sum(0) |
| 170 | + let x0 = ones[float](10) |
| 171 | + echo tensorGradient(f1, x0, 1e-6) |
| 172 | + echo tensorGradient(f1, x0, 1e-6, true) |
| 173 | + echo tensorJacobian(f1, x0, 1e-6) |
| 174 | + |
| 175 | + proc f2(x: Tensor[float]): float = |
| 176 | + sum(x) |
| 177 | + echo tensorGradient(f2, x0, 1e-6) |
| 178 | + echo tensorGradient(f2, x0, 1e-6, true) |
| 179 | + |
| 180 | + let N = 1000 |
| 181 | + timeIt "slow mode": |
| 182 | + for i in 0 .. N: |
| 183 | + keep tensorGradient(f1, x0, 1e-6, false) |
| 184 | + timeIt "fast mode": |
| 185 | + for i in 0 .. N: |
| 186 | + keep tensorGradient(f1, x0, 1e-6, true) |
| 187 | + timeIt "slow mode float": |
| 188 | + for i in 0 .. N: |
| 189 | + keep tensorGradient(f2, x0, 1e-6, false) |
| 190 | + timeIt "fast mode float": |
| 191 | + for i in 0 .. N: |
| 192 | + keep tensorGradient(f2, x0, 1e-6, true) |
| 193 | + timeIt "jacobian slow": |
| 194 | + for i in 0 .. N: |
| 195 | + keep tensorJacobian(f1, x0, 1e-6, false) |
| 196 | + |
| 197 | + |
| 198 | + |
| 199 | + |
0 commit comments