diff --git a/CMakeLists.txt b/CMakeLists.txt index 490f7ff1..39e11cda 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,8 @@ add_library(neural-fortran src/nf/nf_layer_constructors_submodule.f90 src/nf/nf_layer.f90 src/nf/nf_layer_submodule.f90 + src/nf/nf_locally_connected_1d.f90 + src/nf/nf_locally_connected_1d_submodule.f90 src/nf/nf_loss.f90 src/nf/nf_loss_submodule.f90 src/nf/nf_maxpool2d_layer.f90 @@ -47,6 +49,8 @@ add_library(neural-fortran src/nf/nf_parallel.f90 src/nf/nf_parallel_submodule.f90 src/nf/nf_random.f90 + src/nf/nf_reshape_generalized.f90 + src/nf/nf_reshape_generalized_submodule.f90 src/nf/nf_reshape_layer.f90 src/nf/nf_reshape_layer_submodule.f90 src/nf/io/nf_io_binary.f90 diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 28cf71a7..7632909e 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -1,5 +1,6 @@ foreach(execid cnn_mnist + cnn_mnist_1d dense_mnist get_set_network_params network_parameters diff --git a/example/cnn_mnist.f90 b/example/cnn_mnist.f90 index bec50b80..47a189ea 100644 --- a/example/cnn_mnist.f90 +++ b/example/cnn_mnist.f90 @@ -12,7 +12,7 @@ program cnn_mnist real, allocatable :: validation_images(:,:), validation_labels(:) real, allocatable :: testing_images(:,:), testing_labels(:) integer :: n - integer, parameter :: num_epochs = 10 + integer, parameter :: num_epochs = 20 call load_mnist(training_images, training_labels, & validation_images, validation_labels, & @@ -35,9 +35,9 @@ program cnn_mnist call net % train( & training_images, & label_digits(training_labels), & - batch_size=128, & + batch_size=16, & epochs=1, & - optimizer=sgd(learning_rate=3.) & + optimizer=sgd(learning_rate=0.003) & ) print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( & diff --git a/example/cnn_mnist_1d.f90 b/example/cnn_mnist_1d.f90 new file mode 100644 index 00000000..f51b5cd5 --- /dev/null +++ b/example/cnn_mnist_1d.f90 @@ -0,0 +1,64 @@ +program cnn_mnist + + use nf, only: network, sgd, & + input, conv2d, maxpool2d, flatten, dense, reshape, reshape_generalized, locally_connected_1d, & + load_mnist, label_digits, softmax, relu + + implicit none + + type(network) :: net + + real, allocatable :: training_images(:,:), training_labels(:) + real, allocatable :: validation_images(:,:), validation_labels(:) + real, allocatable :: testing_images(:,:), testing_labels(:) + integer :: n + integer, parameter :: num_epochs = 10 + + call load_mnist(training_images, training_labels, & + validation_images, validation_labels, & + testing_images, testing_labels) + + net = network([ & + input(784), & + reshape_generalized([28, 28]), & + locally_connected_1d(filters=8, kernel_size=3, activation=relu()), & + dense(10, activation=softmax()) & + ]) + + call net % print_info() + + epochs: do n = 1, num_epochs + + call net % train( & + training_images, & + label_digits(training_labels), & + batch_size=16, & + epochs=1, & + optimizer=sgd(learning_rate=1) & + ) + + print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( & + net, validation_images, label_digits(validation_labels)) * 100, ' %' + + end do epochs + + print '(a,f5.2,a)', 'Testing accuracy: ', & + accuracy(net, testing_images, label_digits(testing_labels)) * 100, '%' + + contains + + real function accuracy(net, x, y) + type(network), intent(in out) :: net + real, intent(in) :: x(:,:), y(:,:) + integer :: i, good + good = 0 + do i = 1, size(x, dim=2) + if (all(maxloc(net % predict(x(:,i))) == maxloc(y(:,i)))) then + good = good + 1 + end if + end do + accuracy = real(good) / size(x, dim=2) + end function accuracy + + end program cnn_mnist + \ No newline at end of file diff --git a/src/nf.f90 b/src/nf.f90 index b97d9e62..71689f14 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -3,7 +3,7 @@ module nf use nf_datasets_mnist, only: label_digits, load_mnist use nf_layer, only: layer use nf_layer_constructors, only: & - conv2d, dense, flatten, input, maxpool2d, reshape + conv2d, dense, flatten, input, maxpool2d, reshape, reshape_generalized, locally_connected_1d use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network diff --git a/src/nf/nf_activation.f90 b/src/nf/nf_activation.f90 index 309b43d2..caeab138 100644 --- a/src/nf/nf_activation.f90 +++ b/src/nf/nf_activation.f90 @@ -25,12 +25,14 @@ module nf_activation contains procedure(eval_1d_i), deferred :: eval_1d procedure(eval_1d_i), deferred :: eval_1d_prime + procedure(eval_2d_i), deferred :: eval_2d + procedure(eval_2d_i), deferred :: eval_2d_prime procedure(eval_3d_i), deferred :: eval_3d procedure(eval_3d_i), deferred :: eval_3d_prime procedure :: get_name - generic :: eval => eval_1d, eval_3d - generic :: eval_prime => eval_1d_prime, eval_3d_prime + generic :: eval => eval_1d, eval_2d, eval_3d + generic :: eval_prime => eval_1d_prime, eval_2d_prime, eval_3d_prime end type activation_function @@ -43,6 +45,13 @@ pure function eval_1d_i(self, x) result(res) real :: res(size(x)) end function eval_1d_i + pure function eval_2d_i(self, x) result(res) + import :: activation_function + class(activation_function), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + end function eval_2d_i + pure function eval_3d_i(self, x) result(res) import :: activation_function class(activation_function), intent(in) :: self @@ -57,6 +66,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_elu procedure :: eval_1d_prime => eval_1d_elu_prime + procedure :: eval_2d => eval_2d_elu + procedure :: eval_2d_prime => eval_2d_elu_prime procedure :: eval_3d => eval_3d_elu procedure :: eval_3d_prime => eval_3d_elu_prime end type elu @@ -65,6 +76,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_exponential procedure :: eval_1d_prime => eval_1d_exponential + procedure :: eval_2d => eval_2d_exponential + procedure :: eval_2d_prime => eval_2d_exponential procedure :: eval_3d => eval_3d_exponential procedure :: eval_3d_prime => eval_3d_exponential end type exponential @@ -73,6 +86,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_gaussian procedure :: eval_1d_prime => eval_1d_gaussian_prime + procedure :: eval_2d => eval_2d_gaussian + procedure :: eval_2d_prime => eval_2d_gaussian_prime procedure :: eval_3d => eval_3d_gaussian procedure :: eval_3d_prime => eval_3d_gaussian_prime end type gaussian @@ -81,6 +96,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_linear procedure :: eval_1d_prime => eval_1d_linear_prime + procedure :: eval_2d => eval_2d_linear + procedure :: eval_2d_prime => eval_2d_linear_prime procedure :: eval_3d => eval_3d_linear procedure :: eval_3d_prime => eval_3d_linear_prime end type linear @@ -89,6 +106,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_relu procedure :: eval_1d_prime => eval_1d_relu_prime + procedure :: eval_2d => eval_2d_relu + procedure :: eval_2d_prime => eval_2d_relu_prime procedure :: eval_3d => eval_3d_relu procedure :: eval_3d_prime => eval_3d_relu_prime end type relu @@ -98,6 +117,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_leaky_relu procedure :: eval_1d_prime => eval_1d_leaky_relu_prime + procedure :: eval_2d => eval_2d_leaky_relu + procedure :: eval_2d_prime => eval_2d_leaky_relu_prime procedure :: eval_3d => eval_3d_leaky_relu procedure :: eval_3d_prime => eval_3d_leaky_relu_prime end type leaky_relu @@ -106,6 +127,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_sigmoid procedure :: eval_1d_prime => eval_1d_sigmoid_prime + procedure :: eval_2d => eval_2d_sigmoid + procedure :: eval_2d_prime => eval_2d_sigmoid_prime procedure :: eval_3d => eval_3d_sigmoid procedure :: eval_3d_prime => eval_3d_sigmoid_prime end type sigmoid @@ -114,6 +137,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_softmax procedure :: eval_1d_prime => eval_1d_softmax_prime + procedure :: eval_2d => eval_2d_softmax + procedure :: eval_2d_prime => eval_2d_softmax_prime procedure :: eval_3d => eval_3d_softmax procedure :: eval_3d_prime => eval_3d_softmax_prime end type softmax @@ -122,6 +147,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_softplus procedure :: eval_1d_prime => eval_1d_softplus_prime + procedure :: eval_2d => eval_2d_softplus + procedure :: eval_2d_prime => eval_2d_softplus_prime procedure :: eval_3d => eval_3d_softplus procedure :: eval_3d_prime => eval_3d_softplus_prime end type softplus @@ -130,6 +157,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_step procedure :: eval_1d_prime => eval_1d_step_prime + procedure :: eval_2d => eval_2d_step + procedure :: eval_2d_prime => eval_2d_step_prime procedure :: eval_3d => eval_3d_step procedure :: eval_3d_prime => eval_3d_step_prime end type step @@ -138,6 +167,8 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_tanh procedure :: eval_1d_prime => eval_1d_tanh_prime + procedure :: eval_2d => eval_2d_tanh + procedure :: eval_2d_prime => eval_2d_tanh_prime procedure :: eval_3d => eval_3d_tanh procedure :: eval_3d_prime => eval_3d_tanh_prime end type tanhf @@ -147,14 +178,16 @@ end function eval_3d_i contains procedure :: eval_1d => eval_1d_celu procedure :: eval_1d_prime => eval_1d_celu_prime + procedure :: eval_2d => eval_2d_celu + procedure :: eval_2d_prime => eval_2d_celu_prime procedure :: eval_3d => eval_3d_celu procedure :: eval_3d_prime => eval_3d_celu_prime end type celu contains + ! ELU Activation Functions pure function eval_1d_elu(self, x) result(res) - ! Exponential Linear Unit (ELU) activation function. class(elu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -166,8 +199,6 @@ pure function eval_1d_elu(self, x) result(res) end function eval_1d_elu pure function eval_1d_elu_prime(self, x) result(res) - ! First derivative of the Exponential Linear Unit (ELU) - ! activation function. class(elu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -178,8 +209,29 @@ pure function eval_1d_elu_prime(self, x) result(res) end where end function eval_1d_elu_prime + pure function eval_2d_elu(self, x) result(res) + class(elu), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + where (x >= 0) + res = x + elsewhere + res = self % alpha * (exp(x) - 1) + end where + end function eval_2d_elu + + pure function eval_2d_elu_prime(self, x) result(res) + class(elu), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + where (x >= 0) + res = 1 + elsewhere + res = self % alpha * exp(x) + end where + end function eval_2d_elu_prime + pure function eval_3d_elu(self, x) result(res) - ! Exponential Linear Unit (ELU) activation function. class(elu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -191,8 +243,6 @@ pure function eval_3d_elu(self, x) result(res) end function eval_3d_elu pure function eval_3d_elu_prime(self, x) result(res) - ! First derivative of the Exponential Linear Unit (ELU) - ! activation function. class(elu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -203,24 +253,30 @@ pure function eval_3d_elu_prime(self, x) result(res) end where end function eval_3d_elu_prime + ! Exponential Activation Functions pure function eval_1d_exponential(self, x) result(res) - ! Exponential activation function. class(exponential), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = exp(x) end function eval_1d_exponential + pure function eval_2d_exponential(self, x) result(res) + class(exponential), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = exp(x) + end function eval_2d_exponential + pure function eval_3d_exponential(self, x) result(res) - ! Exponential activation function. class(exponential), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = exp(x) end function eval_3d_exponential + ! Gaussian Activation Functions pure function eval_1d_gaussian(self, x) result(res) - ! Gaussian activation function. class(gaussian), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -228,15 +284,27 @@ pure function eval_1d_gaussian(self, x) result(res) end function eval_1d_gaussian pure function eval_1d_gaussian_prime(self, x) result(res) - ! First derivative of the Gaussian activation function. class(gaussian), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = -2 * x * self % eval_1d(x) end function eval_1d_gaussian_prime + pure function eval_2d_gaussian(self, x) result(res) + class(gaussian), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = exp(-x**2) + end function eval_2d_gaussian + + pure function eval_2d_gaussian_prime(self, x) result(res) + class(gaussian), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = -2 * x * self % eval_2d(x) + end function eval_2d_gaussian_prime + pure function eval_3d_gaussian(self, x) result(res) - ! Gaussian activation function. class(gaussian), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -244,15 +312,14 @@ pure function eval_3d_gaussian(self, x) result(res) end function eval_3d_gaussian pure function eval_3d_gaussian_prime(self, x) result(res) - ! First derivative of the Gaussian activation function. class(gaussian), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = -2 * x * self % eval_3d(x) end function eval_3d_gaussian_prime + ! Linear Activation Functions pure function eval_1d_linear(self, x) result(res) - ! Linear activation function. class(linear), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -260,15 +327,27 @@ pure function eval_1d_linear(self, x) result(res) end function eval_1d_linear pure function eval_1d_linear_prime(self, x) result(res) - ! First derivative of the Linear activation function. class(linear), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = 1 end function eval_1d_linear_prime + pure function eval_2d_linear(self, x) result(res) + class(linear), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = x + end function eval_2d_linear + + pure function eval_2d_linear_prime(self, x) result(res) + class(linear), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = 1 + end function eval_2d_linear_prime + pure function eval_3d_linear(self, x) result(res) - ! Linear activation function. class(linear), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -276,15 +355,14 @@ pure function eval_3d_linear(self, x) result(res) end function eval_3d_linear pure function eval_3d_linear_prime(self, x) result(res) - ! First derivative of the Linear activation function. class(linear), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = 1 end function eval_3d_linear_prime + ! ReLU Activation Functions pure function eval_1d_relu(self, x) result(res) - !! Rectified Linear Unit (ReLU) activation function. class(relu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -292,15 +370,27 @@ pure function eval_1d_relu(self, x) result(res) end function eval_1d_relu pure function eval_1d_relu_prime(self, x) result(res) - ! First derivative of the Rectified Linear Unit (ReLU) activation function. class(relu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = merge(1., 0., x > 0) end function eval_1d_relu_prime + pure function eval_2d_relu(self, x) result(res) + class(relu), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = max(0., x) + end function eval_2d_relu + + pure function eval_2d_relu_prime(self, x) result(res) + class(relu), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = merge(1., 0., x > 0) + end function eval_2d_relu_prime + pure function eval_3d_relu(self, x) result(res) - !! Rectified Linear Unit (ReLU) activation function. class(relu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -308,15 +398,14 @@ pure function eval_3d_relu(self, x) result(res) end function eval_3d_relu pure function eval_3d_relu_prime(self, x) result(res) - ! First derivative of the Rectified Linear Unit (ReLU) activation function. class(relu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = merge(1., 0., x > 0) end function eval_3d_relu_prime + ! Leaky ReLU Activation Functions pure function eval_1d_leaky_relu(self, x) result(res) - !! Leaky Rectified Linear Unit (Leaky ReLU) activation function. class(leaky_relu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -324,15 +413,27 @@ pure function eval_1d_leaky_relu(self, x) result(res) end function eval_1d_leaky_relu pure function eval_1d_leaky_relu_prime(self, x) result(res) - ! First derivative of the Leaky Rectified Linear Unit (Leaky ReLU) activation function. class(leaky_relu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = merge(1., self%alpha, x > 0) end function eval_1d_leaky_relu_prime + pure function eval_2d_leaky_relu(self, x) result(res) + class(leaky_relu), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = max(self % alpha * x, x) + end function eval_2d_leaky_relu + + pure function eval_2d_leaky_relu_prime(self, x) result(res) + class(leaky_relu), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = merge(1., self%alpha, x > 0) + end function eval_2d_leaky_relu_prime + pure function eval_3d_leaky_relu(self, x) result(res) - !! Leaky Rectified Linear Unit (Leaky ReLU) activation function. class(leaky_relu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -340,47 +441,57 @@ pure function eval_3d_leaky_relu(self, x) result(res) end function eval_3d_leaky_relu pure function eval_3d_leaky_relu_prime(self, x) result(res) - ! First derivative of the Leaky Rectified Linear Unit (Leaky ReLU) activation function. class(leaky_relu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = merge(1., self%alpha, x > 0) end function eval_3d_leaky_relu_prime + ! Sigmoid Activation Functions pure function eval_1d_sigmoid(self, x) result(res) - ! Sigmoid activation function. class(sigmoid), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = 1 / (1 + exp(-x)) - endfunction eval_1d_sigmoid + end function eval_1d_sigmoid pure function eval_1d_sigmoid_prime(self, x) result(res) - ! First derivative of the sigmoid activation function. class(sigmoid), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = self % eval_1d(x) * (1 - self % eval_1d(x)) end function eval_1d_sigmoid_prime + pure function eval_2d_sigmoid(self, x) result(res) + class(sigmoid), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = 1 / (1 + exp(-x)) + end function eval_2d_sigmoid + + pure function eval_2d_sigmoid_prime(self, x) result(res) + class(sigmoid), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = self % eval_2d(x) * (1 - self % eval_2d(x)) + end function eval_2d_sigmoid_prime + pure function eval_3d_sigmoid(self, x) result(res) - ! Sigmoid activation function. class(sigmoid), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = 1 / (1 + exp(-x)) - endfunction eval_3d_sigmoid + end function eval_3d_sigmoid pure function eval_3d_sigmoid_prime(self, x) result(res) - ! First derivative of the sigmoid activation function. class(sigmoid), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = self % eval_3d(x) * (1 - self % eval_3d(x)) end function eval_3d_sigmoid_prime + ! Softmax Activation Functions pure function eval_1d_softmax(self, x) result(res) - !! Softmax activation function class(softmax), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -389,15 +500,28 @@ pure function eval_1d_softmax(self, x) result(res) end function eval_1d_softmax pure function eval_1d_softmax_prime(self, x) result(res) - !! Derivative of the softmax activation function. class(softmax), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = self%eval_1d(x) * (1 - self%eval_1d(x)) end function eval_1d_softmax_prime + pure function eval_2d_softmax(self, x) result(res) + class(softmax), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = exp(x - maxval(x)) + res = res / sum(res) + end function eval_2d_softmax + + pure function eval_2d_softmax_prime(self, x) result(res) + class(softmax), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = self % eval_2d(x) * (1 - self % eval_2d(x)) + end function eval_2d_softmax_prime + pure function eval_3d_softmax(self, x) result(res) - !! Softmax activation function class(softmax), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -406,15 +530,14 @@ pure function eval_3d_softmax(self, x) result(res) end function eval_3d_softmax pure function eval_3d_softmax_prime(self, x) result(res) - !! Derivative of the softmax activation function. class(softmax), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = self % eval_3d(x) * (1 - self % eval_3d(x)) end function eval_3d_softmax_prime + ! Softplus Activation Functions pure function eval_1d_softplus(self, x) result(res) - ! Softplus activation function. class(softplus), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -422,15 +545,27 @@ pure function eval_1d_softplus(self, x) result(res) end function eval_1d_softplus pure function eval_1d_softplus_prime(self, x) result(res) - class(softplus), intent(in) :: self - ! First derivative of the softplus activation function. + class(softplus), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = exp(x) / (exp(x) + 1) end function eval_1d_softplus_prime + pure function eval_2d_softplus(self, x) result(res) + class(softplus), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = log(exp(x) + 1) + end function eval_2d_softplus + + pure function eval_2d_softplus_prime(self, x) result(res) + class(softplus), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = exp(x) / (exp(x) + 1) + end function eval_2d_softplus_prime + pure function eval_3d_softplus(self, x) result(res) - ! Softplus activation function. class(softplus), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -438,15 +573,14 @@ pure function eval_3d_softplus(self, x) result(res) end function eval_3d_softplus pure function eval_3d_softplus_prime(self, x) result(res) - class(softplus), intent(in) :: self - ! First derivative of the softplus activation function. + class(softplus), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = exp(x) / (exp(x) + 1) end function eval_3d_softplus_prime + ! Step Activation Functions pure function eval_1d_step(self, x) result(res) - ! Step activation function. class(step), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -454,15 +588,27 @@ pure function eval_1d_step(self, x) result(res) end function eval_1d_step pure function eval_1d_step_prime(self, x) result(res) - ! First derivative of the step activation function. class(step), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = 0 end function eval_1d_step_prime + pure function eval_2d_step(self, x) result(res) + class(step), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = merge(1., 0., x > 0) + end function eval_2d_step + + pure function eval_2d_step_prime(self, x) result(res) + class(step), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = 0 + end function eval_2d_step_prime + pure function eval_3d_step(self, x) result(res) - ! Step activation function. class(step), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -470,15 +616,14 @@ pure function eval_3d_step(self, x) result(res) end function eval_3d_step pure function eval_3d_step_prime(self, x) result(res) - ! First derivative of the step activation function. class(step), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = 0 end function eval_3d_step_prime + ! Tanh Activation Functions pure function eval_1d_tanh(self, x) result(res) - ! Tangent hyperbolic activation function. class(tanhf), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -486,15 +631,27 @@ pure function eval_1d_tanh(self, x) result(res) end function eval_1d_tanh pure function eval_1d_tanh_prime(self, x) result(res) - ! First derivative of the tanh activation function. class(tanhf), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) res = 1 - tanh(x)**2 end function eval_1d_tanh_prime + pure function eval_2d_tanh(self, x) result(res) + class(tanhf), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = tanh(x) + end function eval_2d_tanh + + pure function eval_2d_tanh_prime(self, x) result(res) + class(tanhf), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + res = 1 - tanh(x)**2 + end function eval_2d_tanh_prime + pure function eval_3d_tanh(self, x) result(res) - ! Tangent hyperbolic activation function. class(tanhf), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -502,15 +659,14 @@ pure function eval_3d_tanh(self, x) result(res) end function eval_3d_tanh pure function eval_3d_tanh_prime(self, x) result(res) - ! First derivative of the tanh activation function. class(tanhf), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) res = 1 - tanh(x)**2 end function eval_3d_tanh_prime + ! CELU Activation Functions pure function eval_1d_celu(self, x) result(res) - ! Celu activation function. class(celu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -519,10 +675,9 @@ pure function eval_1d_celu(self, x) result(res) else where res = self % alpha * (exp(x / self % alpha) - 1.0) end where - end function + end function eval_1d_celu pure function eval_1d_celu_prime(self, x) result(res) - ! Celu activation function. class(celu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) @@ -531,10 +686,31 @@ pure function eval_1d_celu_prime(self, x) result(res) else where res = exp(x / self % alpha) end where - end function + end function eval_1d_celu_prime + + pure function eval_2d_celu(self, x) result(res) + class(celu), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + where (x >= 0.0) + res = x + else where + res = self % alpha * (exp(x / self % alpha) - 1.0) + end where + end function eval_2d_celu + + pure function eval_2d_celu_prime(self, x) result(res) + class(celu), intent(in) :: self + real, intent(in) :: x(:,:) + real :: res(size(x,1),size(x,2)) + where (x >= 0.0) + res = 1.0 + else where + res = exp(x / self % alpha) + end where + end function eval_2d_celu_prime pure function eval_3d_celu(self, x) result(res) - ! Celu activation function. class(celu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -543,10 +719,9 @@ pure function eval_3d_celu(self, x) result(res) else where res = self % alpha * (exp(x / self % alpha) - 1.0) end where - end function + end function eval_3d_celu pure function eval_3d_celu_prime(self, x) result(res) - ! Celu activation function. class(celu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) @@ -555,13 +730,10 @@ pure function eval_3d_celu_prime(self, x) result(res) else where res = exp(x / self % alpha) end where - end function + end function eval_3d_celu_prime + ! Utility Functions function get_activation_by_name(activation_name) result(res) - ! Workaround to get activation_function with some - ! hardcoded default parameters by its name. - ! Need this function since we get only activation name - ! from keras files. character(len=*), intent(in) :: activation_name class(activation_function), allocatable :: res @@ -611,16 +783,8 @@ function get_activation_by_name(activation_name) result(res) end function get_activation_by_name pure function get_name(self) result(name) - !! Return the name of the activation function. - !! - !! Normally we would place this in the definition of each type, however - !! accessing the name variable directly from the type would require type - !! guards just like we have here. This at least keeps all the type guards - !! in one place. class(activation_function), intent(in) :: self - !! The activation function instance. character(:), allocatable :: name - !! The name of the activation function. select type (self) class is (elu) name = 'elu' @@ -651,4 +815,4 @@ pure function get_name(self) result(name) end select end function get_name -end module nf_activation +end module nf_activation \ No newline at end of file diff --git a/src/nf/nf_datasets_mnist_submodule.f90 b/src/nf/nf_datasets_mnist_submodule.f90 index 842cafe1..a0bed0a8 100644 --- a/src/nf/nf_datasets_mnist_submodule.f90 +++ b/src/nf/nf_datasets_mnist_submodule.f90 @@ -50,9 +50,9 @@ module subroutine load_mnist(training_images, training_labels, & real, allocatable, intent(in out), optional :: testing_labels(:) integer, parameter :: dtype = 4, image_size = 784 - integer, parameter :: num_training_images = 50000 - integer, parameter :: num_validation_images = 10000 - integer, parameter :: num_testing_images = 10000 + integer, parameter :: num_training_images = 500 + integer, parameter :: num_validation_images = 100 + integer, parameter :: num_testing_images = 100 logical :: file_exists ! Check if MNIST data is present and download it if not. diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index 309be6e4..e063ccec 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -8,7 +8,7 @@ module nf_layer_constructors implicit none private - public :: conv2d, dense, flatten, input, maxpool2d, reshape + public :: conv2d, dense, flatten, input, locally_connected_1d, maxpool2d, reshape, reshape_generalized interface input @@ -155,6 +155,33 @@ module function maxpool2d(pool_size, stride) result(res) !! Resulting layer instance end function maxpool2d + module function locally_connected_1d(filters, kernel_size, activation) result(res) + !! 2-d convolutional layer constructor. + !! + !! This layer is for building 2-d convolutional network. + !! Although the established convention is to call these layers 2-d, + !! the shape of the data is actuall 3-d: image width, image height, + !! and the number of channels. + !! A conv2d layer must not be the first layer in the network. + !! + !! Example: + !! + !! ``` + !! use nf, only :: conv2d, layer + !! type(layer) :: conv2d_layer + !! conv2d_layer = dense(filters=32, kernel_size=3) + !! conv2d_layer = dense(filters=32, kernel_size=3, activation='relu') + !! ``` + integer, intent(in) :: filters + !! Number of filters in the output of the layer + integer, intent(in) :: kernel_size + !! Width of the convolution window, commonly 3 or 5 + class(activation_function), intent(in), optional :: activation + !! Activation function (default sigmoid) + type(layer) :: res + !! Resulting layer instance + end function locally_connected_1d + module function reshape(output_shape) result(res) !! Rank-1 to rank-any reshape layer constructor. !! Currently implemented is only rank-3 for the output of the reshape. @@ -166,6 +193,12 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape + module function reshape_generalized(output_shape) result(res) + integer, intent(in) :: output_shape(:) + type(layer) :: res + + end function reshape_generalized + end interface end module nf_layer_constructors diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 234b20b1..ec2e244f 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -6,8 +6,10 @@ use nf_flatten_layer, only: flatten_layer use nf_input1d_layer, only: input1d_layer use nf_input3d_layer, only: input3d_layer + use nf_locally_connected_1d_layer, only: locally_connected_1d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_reshape_layer_generalized, only: reshape_generalized_layer use nf_activation, only: activation_function, relu, sigmoid implicit none @@ -91,6 +93,31 @@ module function input3d(layer_shape) result(res) res % initialized = .true. end function input3d + module function locally_connected_1d(filters, kernel_size, activation) result(res) + integer, intent(in) :: filters + integer, intent(in) :: kernel_size + class(activation_function), intent(in), optional :: activation + type(layer) :: res + class(activation_function), allocatable :: activation_tmp + + res % name = 'locally_connected_1d' + + if (present(activation)) then + allocate(activation_tmp, source=activation) + else + allocate(activation_tmp, source=relu()) + end if + + res % activation = activation_tmp % get_name() + + allocate( & + res % p, & + source=locally_connected_1d_layer(filters, kernel_size, activation_tmp) & + ) + + end function locally_connected_1d + + module function maxpool2d(pool_size, stride) result(res) integer, intent(in) :: pool_size integer, intent(in), optional :: stride @@ -134,4 +161,17 @@ module function reshape(output_shape) result(res) end function reshape + module function reshape_generalized(output_shape) result(res) + integer, intent(in) :: output_shape(:) !! Always treat as an array + type(layer) :: res + + res % name = 'reshape_generalized' + res % layer_shape = output_shape + + allocate(res % p, source=reshape_generalized_layer(output_shape)) + + end function reshape_generalized + + + end submodule nf_layer_constructors_submodule diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index c672581a..adbab011 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -6,8 +6,10 @@ use nf_flatten_layer, only: flatten_layer use nf_input1d_layer, only: input1d_layer use nf_input3d_layer, only: input3d_layer + use nf_locally_connected_1d_layer, only: locally_connected_1d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_reshape_layer_generalized, only: reshape_generalized_layer use nf_optimizers, only: optimizer_base_type contains @@ -292,6 +294,8 @@ elemental module function get_num_params(self) result(num_params) num_params = 0 type is (reshape3d_layer) num_params = 0 + type is (reshape_generalized_layer) + num_params = 0 class default error stop 'Unknown layer type.' end select @@ -317,6 +321,8 @@ module function get_params(self) result(params) ! No parameters to get. type is (reshape3d_layer) ! No parameters to get. + type is (reshape_generalized_layer) + ! No parameters to get. class default error stop 'Unknown layer type.' end select @@ -342,6 +348,8 @@ module function get_gradients(self) result(gradients) ! No gradients to get. type is (reshape3d_layer) ! No gradients to get. + type is (reshape_generalized_layer) + ! No gradients to get. class default error stop 'Unknown layer type.' end select @@ -398,7 +406,12 @@ module subroutine set_params(self, params) ! No parameters to set. write(stderr, '(a)') 'Warning: calling set_params() ' & // 'on a zero-parameter layer; nothing to do.' - + + type is (reshape_generalized_layer) + ! No parameters to set. + write(stderr, '(a)') 'Warning: calling set_params() ' & + // 'on a zero-parameter layer; nothing to do.' + class default error stop 'Unknown layer type.' end select diff --git a/src/nf/nf_locally_connected_1d.f90 b/src/nf/nf_locally_connected_1d.f90 new file mode 100644 index 00000000..c6e4e1a7 --- /dev/null +++ b/src/nf/nf_locally_connected_1d.f90 @@ -0,0 +1,112 @@ +module nf_locally_connected_1d_layer + !! This module provides a locally connected 1d layer type. + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + implicit none + + private + public :: locally_connected_1d_layer + + type, extends(base_layer) :: locally_connected_1d_layer + ! For a 1D layer, we assume an input shape of [channels, input_length] + integer :: channels ! number of input channels + integer :: input_length ! length of the 1D input + integer :: output_length ! computed as input_length - kernel_size + 1 + integer :: kernel_size ! size of the 1D window + integer :: filters ! number of filters (output channels) + + ! Parameters (unshared weights) + ! Kernel shape: (filters, output_length, channels, kernel_size) + real, allocatable :: kernel(:,:,:,:) + ! Biases shape: (filters, output_length) + real, allocatable :: biases(:,:) + + ! Forward-pass arrays + ! Pre-activation values: shape (filters, output_length) + real, allocatable :: z(:,:) + ! Activated output: shape (filters, output_length) + real, allocatable :: output(:,:) + + ! Gradients for backpropagation + ! Gradient for kernel, same shape as kernel + real, allocatable :: dw(:,:,:,:) + ! Gradient for biases, same shape as biases + real, allocatable :: db(:,:) + ! Gradient with respect to the input, shape (channels, input_length) + real, allocatable :: gradient(:,:) + + ! Activation function + class(activation_function), allocatable :: activation + contains + procedure :: forward + procedure :: backward + procedure :: get_gradients + procedure :: get_num_params + procedure :: get_params + procedure :: init + procedure :: set_params + end type locally_connected_1d_layer + + interface locally_connected_1d_layer + module function locally_connected_1d_layer_cons(filters, kernel_size, activation) result(res) + !! Constructor for the locally connected 1d layer. + integer, intent(in) :: filters + integer, intent(in) :: kernel_size + class(activation_function), intent(in):: activation + type(locally_connected_1d_layer) :: res + end function locally_connected_1d_layer_cons + end interface locally_connected_1d_layer + + interface + module subroutine init(self, input_shape) + !! Initialize the layer data structures. + !! input_shape: integer array of length 2, where + !! input_shape(1) = number of channels + !! input_shape(2) = input length + class(locally_connected_1d_layer), intent(inout) :: self + integer, intent(in) :: input_shape(:) + end subroutine init + + pure module subroutine forward(self, input) + !! Apply the forward pass. + !! Input shape: (channels, input_length) + class(locally_connected_1d_layer), intent(inout) :: self + real, intent(in) :: input(:,:) + end subroutine forward + + pure module subroutine backward(self, input, gradient) + !! Apply the backward pass. + !! input: shape (channels, input_length) + !! gradient: gradient w.r.t. output, shape (filters, output_length) + class(locally_connected_1d_layer), intent(inout) :: self + real, intent(in) :: input(:,:) + real, intent(in) :: gradient(:,:) + end subroutine backward + + pure module function get_num_params(self) result(num_params) + !! Get the total number of parameters (kernel + biases) + class(locally_connected_1d_layer), intent(in) :: self + integer :: num_params + end function get_num_params + + module function get_params(self) result(params) + !! Return a flattened vector of parameters (kernel then biases). + class(locally_connected_1d_layer), intent(in), target :: self + real, allocatable :: params(:) + end function get_params + + module function get_gradients(self) result(gradients) + !! Return a flattened vector of gradients (dw then db). + class(locally_connected_1d_layer), intent(in), target :: self + real, allocatable :: gradients(:) + end function get_gradients + + module subroutine set_params(self, params) + !! Set the parameters from a flattened vector. + class(locally_connected_1d_layer), intent(inout) :: self + real, intent(in) :: params(:) + end subroutine set_params + end interface + +end module nf_locally_connected_1d_layer diff --git a/src/nf/nf_locally_connected_1d_submodule.f90 b/src/nf/nf_locally_connected_1d_submodule.f90 new file mode 100644 index 00000000..6359e371 --- /dev/null +++ b/src/nf/nf_locally_connected_1d_submodule.f90 @@ -0,0 +1,223 @@ +submodule(nf_locally_connected_1d_layer) nf_locally_connected_1d_layer_submodule + + use nf_activation, only: activation_function + use nf_random, only: random_normal + implicit none + +contains + + !===================================================================== + ! Constructor: allocate and initialize a locally connected 1D layer. + !===================================================================== + module function locally_connected_1d_layer_cons(filters, kernel_size, activation) result(res) + integer, intent(in) :: filters + integer, intent(in) :: kernel_size + class(activation_function), intent(in):: activation + type(locally_connected_1d_layer) :: res + + res % kernel_size = kernel_size + res % filters = filters + res % activation_name = activation % get_name() + allocate(res % activation, source=activation) + end function locally_connected_1d_layer_cons + + !===================================================================== + ! Initialize the layer. + ! + ! Here we assume the input shape is an integer array of length 2: + ! input_shape(1): number of channels, + ! input_shape(2): length of the 1D input. + ! + ! The output length is computed as: + ! output_length = input_length - kernel_size + 1 + ! + ! The kernel weights are unshared so that each output position gets + ! its own set of weights. Their shape becomes: + ! (filters, output_length, channels, kernel_size) + ! + ! The biases are similarly unshared and allocated with shape: + ! (filters, output_length) + !===================================================================== + module subroutine init(self, input_shape) + class(locally_connected_1d_layer), intent(in out):: self + integer, intent(in) :: input_shape(:) + + ! Input shape: channels x input_length. + self % channels = input_shape(1) + self % input_length = input_shape(2) + self % output_length = self % input_length - self % kernel_size + 1 + + ! Allocate the output array: shape (filters, output_length) + allocate(self % output(self % filters, self % output_length)) + self % output = 0 + + ! Allocate the kernel. + ! Kernel shape: (filters, output_length, channels, kernel_size) + allocate(self % kernel(self % filters, self % output_length, self % channels, self % kernel_size)) + call random_normal(self % kernel) + self % kernel = self % kernel / self % kernel_size + + ! Allocate the biases: shape (filters, output_length) + allocate(self % biases(self % filters, self % output_length)) + self % biases = 0 + + ! Allocate the pre-activation array, z, with the same shape as output. + allocate(self % z, mold=self % output) + self % z = 0 + + ! Allocate the gradient for the input. + allocate(self % gradient(self % channels, self % input_length)) + self % gradient = 0 + + ! Allocate the gradients for the kernel and biases. + allocate(self % dw, mold=self % kernel) + self % dw = 0 + + allocate(self % db, mold=self % biases) + self % db = 0 + + end subroutine init + + !===================================================================== + ! Forward pass: + ! For each output position, extract the corresponding patch from + ! the input (of shape channels x kernel_size), compute the weighted + ! sum (using the unshared weights for that position), add the bias, + ! and then apply the activation function. + ! + ! Input: real array of shape (channels, input_length) + ! Output: stored in self%output (shape: filters x output_length) + !===================================================================== + pure module subroutine forward(self, input) + class(locally_connected_1d_layer), intent(in out) :: self + real, intent(in) :: input(:,:) + integer :: pos, n + + ! For each output position, the input patch is: + ! input(:, pos:pos+kernel_size-1) + do concurrent (pos = 1:self % output_length) + do concurrent (n = 1:self % filters) + self % z(n, pos) = sum( self % kernel(n, pos, :, :) * & + input(:, pos:pos+self % kernel_size-1) ) + self % biases(n, pos) + end do + end do + + ! Apply the activation function. + self % output = self % activation % eval(self % z) + + end subroutine forward + + !===================================================================== + ! Backward pass: + ! Given the gradient with respect to the output (dL/dy), this + ! routine computes the gradients with respect to the pre-activation, + ! the unshared kernel weights, the biases, and the input. + ! + ! Here, gradient (dL/dy) has shape (filters, output_length). + !===================================================================== + pure module subroutine backward(self, input, gradient) + class(locally_connected_1d_layer), intent(in out) :: self + real, intent(in) :: input(:,:) + real, intent(in) :: gradient(:,:) + integer :: pos, n + real, allocatable :: gdz(:,:) ! (filters, output_length) + + ! Allocate a temporary array for the derivative of z. + allocate(gdz(self % filters, self % output_length)) + ! gdz = dL/dy * activation'(z) + gdz = gradient * self % activation % eval_prime(self % z) + + ! Update bias gradients. (Each bias is specific to an output position.) + self % db = self % db + gdz + + ! Reset the gradients for the kernel and input. + self % dw = 0 + self % gradient = 0 + + ! For each output position and filter, compute: + ! - dL/dw for the weights at that output position, and + ! - the contribution to dL/dx for the corresponding input patch. + do concurrent (pos = 1:self % output_length) + do concurrent (n = 1:self % filters) + ! The patch from the input corresponding to output position "pos" + ! is input(:, pos:pos+kernel_size-1). + self % dw(n, pos, :, :) = self % dw(n, pos, :, :) + & + input(:, pos:pos+self % kernel_size-1) * gdz(n, pos) + + ! Each such output position contributes to the gradient of the input. + self % gradient(:, pos:pos+self % kernel_size-1) = & + self % gradient(:, pos:pos+self % kernel_size-1) + & + gdz(n, pos) * self % kernel(n, pos, :, :) + end do + end do + + deallocate(gdz) + + end subroutine backward + + !===================================================================== + ! Return the total number of parameters. + ! + ! For the locally connected layer this equals: + ! number of elements in the kernel + number of elements in the biases + !===================================================================== + pure module function get_num_params(self) result(num_params) + class(locally_connected_1d_layer), intent(in) :: self + integer :: num_params + + num_params = product(shape(self % kernel)) + product(shape(self % biases)) + end function get_num_params + + !===================================================================== + ! Return a flattened array containing all parameters. + ! + ! The parameters are taken in order: first all kernel weights, then + ! all biases. + !===================================================================== + module function get_params(self) result(params) + class(locally_connected_1d_layer), intent(in), target :: self + real, allocatable :: params(:) + real, pointer :: w_(:) => null() + + w_(1:size(self % kernel)) => self % kernel + params = [ w_, reshape(self % biases, [product(shape(self % biases))]) ] + end function get_params + + !===================================================================== + ! Return a flattened array containing all gradients. + ! + ! The gradients are taken in order: first all gradients for the kernel, + ! then the gradients for the biases. + !===================================================================== + module function get_gradients(self) result(gradients) + class(locally_connected_1d_layer), intent(in), target :: self + real, allocatable :: gradients(:) + real, pointer :: dw_(:) => null() + + dw_(1:size(self % dw)) => self % dw + gradients = [ dw_, reshape(self % db, [product(shape(self % db))]) ] + end function get_gradients + + !===================================================================== + ! Set the parameters of the layer from a flattened vector. + ! + ! The parameters vector is assumed to have the same number of elements + ! as returned by get_num_params. + !===================================================================== + module subroutine set_params(self, params) + class(locally_connected_1d_layer), intent(inout):: self + real, intent(in) :: params(:) + integer :: num_kernel, num_bias, offset + + num_kernel = product(shape(self % kernel)) + num_bias = product(shape(self % biases)) + if (size(params) /= num_kernel + num_bias) then + error stop 'locally connected 1D layer % set_params: Number of parameters does not match' + end if + + self % kernel = reshape( params(1:num_kernel), shape(self % kernel) ) + offset = num_kernel + self % biases = reshape( params(offset+1:offset+num_bias), shape(self % biases) ) + end subroutine set_params + +end submodule nf_locally_connected_1d_layer_submodule diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 140c9226..15dea829 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -5,10 +5,12 @@ use nf_flatten_layer, only: flatten_layer use nf_input1d_layer, only: input1d_layer use nf_input3d_layer, only: input3d_layer + use nf_locally_connected_1d_layer, only: locally_connected_1d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_reshape_layer_generalized, only: reshape_generalized_layer use nf_layer, only: layer - use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape + use nf_layer_constructors, only: conv2d, dense, flatten, input, locally_connected_1d, maxpool2d, reshape, reshape_generalized use nf_loss, only: quadratic use nf_optimizers, only: optimizer_base_type, sgd use nf_parallel, only: tile_indices @@ -75,6 +77,9 @@ module function network_from_layers(layers) result(res) type is(reshape3d_layer) res % layers = [res % layers(:n-1), flatten(), res % layers(n:)] n = n + 1 + type is(reshape_generalized_layer) + res % layers = [res % layers(:n-1), flatten(), res % layers(n:)] + n = n + 1 class default n = n + 1 end select @@ -142,6 +147,8 @@ module subroutine backward(self, output, loss) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) type is(reshape3d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(reshape_generalized_layer) + call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) end select end if diff --git a/src/nf/nf_reshape_generalized.f90 b/src/nf/nf_reshape_generalized.f90 new file mode 100644 index 00000000..8f41ead3 --- /dev/null +++ b/src/nf/nf_reshape_generalized.f90 @@ -0,0 +1,76 @@ +module nf_reshape_layer_generalized + + !! This module provides the concrete reshape layer type. + !! It is used internally by the layer type. + !! It is not intended to be used directly by the user. + + use nf_base_layer, only: base_layer + + implicit none + + private + public :: reshape_generalized_layer + + type, extends(base_layer) :: reshape_generalized_layer + + !! Concrete implementation of a reshape layer type + !! It implements reshaping for arbitrary ranks. + + integer, allocatable :: input_shape(:) + integer, allocatable :: output_shape(:) + real, allocatable :: gradient(:) + real, allocatable :: output(:) + + contains + + procedure :: backward + procedure :: forward + procedure :: init + + end type reshape_generalized_layer + + interface reshape_generalized_layer + pure module function reshape_layer_cons(output_shape) result(res) + !! This function returns the `reshape_layer` instance. + integer, intent(in) :: output_shape(:) + !! The shape of the output + type(reshape_generalized_layer) :: res + !! reshape_layer instance + end function reshape_layer_cons + end interface reshape_generalized_layer + + interface + + pure module subroutine backward(self, input, gradient) + !! Apply the backward pass for the reshape layer. + !! This is just flattening to a rank-1 array. + class(reshape_generalized_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + real, intent(in) :: gradient(..) + !! Gradient from the next layer + end subroutine backward + + pure module subroutine forward(self, input) + !! Apply the forward pass for the reshape layer. + !! This is reshaping from input rank to output rank. + class(reshape_generalized_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + end subroutine forward + + module subroutine init(self, input_shape) + !! Initialize the layer data structures. + !! This is a deferred procedure from the `base_layer` abstract type. + class(reshape_generalized_layer), intent(in out) :: self + !! Dense layer instance + integer, intent(in) :: input_shape(:) + !! Shape of the input layer + end subroutine init + + end interface + +end module nf_reshape_layer_generalized + diff --git a/src/nf/nf_reshape_generalized_submodule.f90 b/src/nf/nf_reshape_generalized_submodule.f90 new file mode 100644 index 00000000..55a41320 --- /dev/null +++ b/src/nf/nf_reshape_generalized_submodule.f90 @@ -0,0 +1,85 @@ +submodule(nf_reshape_layer_generalized) nf_reshape_layer_generalized_submodule + + use nf_base_layer, only: base_layer + + implicit none + +contains + + pure module function reshape_layer_cons(output_shape) result(res) + integer, intent(in) :: output_shape(:) + type(reshape_generalized_layer) :: res + + ! Check if output_shape is scalar (size 1) + if (size(output_shape) == 0) then + allocate(res % output_shape(1)) + res % output_shape = output_shape + else + allocate(res % output_shape(size(output_shape))) + res % output_shape = output_shape + end if + end function reshape_layer_cons + + + pure module subroutine backward(self, input, gradient) + class(reshape_generalized_layer), intent(in out) :: self + real, intent(in) :: input(:) + real, intent(in) :: gradient(..) ! Assumed-rank gradient + + ! Handle different ranks of gradient using SELECT RANK + select rank (gradient) + rank default + error stop "Unsupported gradient rank in reshape layer" + rank (0) + self % gradient = [gradient] + rank (1) + self % gradient = gradient + rank (2) + self % gradient = reshape(gradient, [size(gradient)]) + rank (3) + self % gradient = reshape(gradient, [size(gradient)]) + end select + + end subroutine backward + + pure module subroutine forward(self, input) + class(reshape_generalized_layer), intent(in out) :: self + real, intent(in) :: input(:) + integer :: i + + ! Ensure output is allocated + if (.not. allocated(self % output)) then + allocate(self % output(size(input))) ! Flattened storage + end if + + ! Copy elements manually (assuming Fortran column-major order) + do i = 1, size(input) + self % output(i) = input(i) + end do + end subroutine forward + + module subroutine init(self, input_shape) + class(reshape_generalized_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + self % input_shape = input_shape + + !! Handle scalar input (size 1) or non-scalar + if (size(input_shape) == 1) then + allocate(self % gradient(1)) + else + allocate(self % gradient(product(input_shape))) + end if + self % gradient = 0 + + !! Handle scalar output_shape (size 1) or non-scalar + if (size(self % output_shape) == 1) then + allocate(self % output(1)) + else + allocate(self % output(product(self % output_shape))) + end if + self % output = 0 +end subroutine init + + +end submodule nf_reshape_layer_generalized_submodule \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bfd3538a..f7f79941 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,6 +8,7 @@ foreach(execid flatten_layer insert_flatten reshape_layer + reshape_generalized_layer dense_network get_set_network_params conv2d_network diff --git a/test/test_reshape_generalized_layer.f90 b/test/test_reshape_generalized_layer.f90 new file mode 100644 index 00000000..d1ba7ae8 --- /dev/null +++ b/test/test_reshape_generalized_layer.f90 @@ -0,0 +1,77 @@ +program test_reshape_layer + + use iso_fortran_env, only: stderr => error_unit + use nf, only: input, network, reshape_generalized ! Check if this is the correct function + use nf_datasets, only: download_and_unpack, keras_reshape_url + + implicit none + + type(network) :: net + real, allocatable :: sample_input(:), output(:,:,:) + integer, parameter :: output_shape_first(2) = [64, 32] + integer, parameter :: output_shape_second(6) = [8, 8, 4, 2, 2, 2] + integer, parameter :: output_shape_third(5) = [4, 4, 4, 4, 8] + integer :: input_size ! Removed parameter + character(*), parameter :: keras_reshape_path = 'keras_reshape.h5' + logical :: ok = .true. + integer :: i + integer, dimension(:), allocatable :: output_shape + + ! Test multiple reshape configurations + do i = 1, 3 + select case (i) + case (1) + output_shape = output_shape_first + case (2) + output_shape = output_shape_second + case (3) + output_shape = output_shape_third + end select + + ! Update input size + input_size = product(output_shape) + + ! Create network with reshape_generalized + net = network([ & + input(input_size), & + reshape_generalized(output_shape) & ! Make sure the function name is correct + ]) + + if (.not. size(net % layers) == 2) then + write(stderr, '(a, i0)') 'Test case ', i, ': the network should have 2 layers.. failed' + ok = .false. + end if + + ! Initialize test data + allocate(sample_input(input_size)) + call random_number(sample_input) + + ! Allocate output correctly before reshaping + allocate(output(output_shape(1), output_shape(2), output_shape(3))) + output = reshape(sample_input, shape(output)) + + ! Check shape + if (.not. all(shape(output) == output_shape)) then + write(stderr, '(a, i0)') 'Test case ', i, ': the reshape layer produces expected output shape.. failed' + ok = .false. + end if + + ! Check values + if (.not. all(output == reshape(sample_input, shape(output)))) then + write(stderr, '(a, i0)') 'Test case ', i, ': the reshape layer produces expected output values.. failed' + ok = .false. + end if + + ! Deallocate for next test case + deallocate(sample_input, output) + end do + + ! Final test result + if (ok) then + print '(a)', 'test_reshape_generalized_layer: All tests passed.' + else + write(stderr, '(a)') 'test_reshape_generalized_layer: One or more tests failed.' + stop 1 + end if + +end program test_reshape_layer