diff --git a/theano/cudnn_rnn.py b/theano/cudnn_rnn.py new file mode 100644 index 0000000..08ef86b --- /dev/null +++ b/theano/cudnn_rnn.py @@ -0,0 +1,136 @@ +"""CUDNN RNN Test.""" +import theano +import theano.tensor as T +from theano.gpuarray import dnn +from theano.gpuarray.type import gpuarray_shared_constructor +import numpy as np +import argparse +import time + +mode_with_gpu = theano.compile.mode.get_default_mode().including( + 'gpuarray' +).excluding('gpu') + +parser = argparse.ArgumentParser() +parser.add_argument( + "-n", + "--network", + help="network type rnn/lstm/gru", + required=True +) +parser.add_argument( + "-d", + "--depth", + help="num layers", + type=int, + required=True +) +parser.add_argument( + "-b", + "--batch_size", + type=int, + help="batch size", + required=True +) +parser.add_argument( + "-l", + "--hidden", + type=int, + help="hidden dim", + required=True +) +parser.add_argument( + "-s", + "--seq_len", + type=int, + help="time steps", + required=True +) +args = parser.parse_args() +network_type = args.network +depth = args.depth +batch_size = args.batch_size +hidden_dim = args.hidden +seq_len = args.seq_len +num_passes = 1000 + +x_val = np.random.random((seq_len, batch_size, hidden_dim)).astype( + theano.config.floatX +) +y_val = np.random.random((seq_len, batch_size, hidden_dim)).astype( + theano.config.floatX +) +h0_val = np.random.random((depth, batch_size, hidden_dim)).astype( + theano.config.floatX +) +c0_val = np.random.random((depth, batch_size, hidden_dim)).astype( + theano.config.floatX +) + +start = time.time() + +X = T.tensor3('X') +Y = T.tensor3('Y') +h0 = T.tensor3('h0') +c0 = T.tensor3('c0') + +rnnb = dnn.RNNBlock( + theano.config.floatX, + hidden_dim, + depth, + network_type, + input_mode='skip' +) +psize = rnnb.get_param_size([batch_size, hidden_dim]) +params_cudnn = gpuarray_shared_constructor( + np.zeros((psize,), dtype=theano.config.floatX) +) + +output = rnnb.apply(params_cudnn, X, h0, c0)[0] # Only hidden states +cost = T.mean((Y - output) ** 2) +grads = T.grad(cost, params_cudnn) +cudnn_fn = theano.function( + inputs=[], + outputs=output, + mode=mode_with_gpu, + givens={X: x_val, h0: h0_val, c0: c0_val} +) +cudnn_grad_fn = theano.function( + inputs=[], + outputs=grads, + mode=mode_with_gpu, + givens={X: x_val, Y: y_val, h0: h0_val, c0: c0_val} +) + +cudnn_fn() +cudnn_grad_fn() +theano.sandbox.cuda.synchronize() +print "Setup : compile + forward/backward x 1" +print "--- %s seconds" % (time.time() - start) + +num_processed = num_passes * batch_size +start = time.time() +for i in xrange(0, num_passes): + cudnn_fn() +theano.sandbox.cuda.synchronize() +end = time.time() +print "Forward:" +print "--- %i samples in %s seconds (%f samples/s, %.7f s/sample) ---" % ( + num_processed, + end - start, + num_processed / (end - start), + (end - start) / num_processed +) + +start = time.time() +for i in xrange(0, num_passes): + cudnn_grad_fn() +theano.sandbox.cuda.synchronize() +end = time.time() +print "Forward + Backward:" +print "--- %i samples in %s seconds (%f samples/s, %.7f s/sample) ---" % ( + num_processed, + end - start, + num_processed / (end - start), + (end - start) / num_processed +)