|
| 1 | +#ifndef CAFFE_MKLDNN_LAYERS_HPP_ |
| 2 | +#define CAFFE_MKLDNN_LAYERS_HPP_ |
| 3 | + |
| 4 | +#include <string> |
| 5 | +#include <vector> |
| 6 | + |
| 7 | +#include "boost/enable_shared_from_this.hpp" |
| 8 | +#include "caffe/blob.hpp" |
| 9 | +#include "caffe/common.hpp" |
| 10 | +#include "caffe/layers/base_conv_layer.hpp" |
| 11 | +#include "caffe/layers/conv_layer.hpp" |
| 12 | +#include "caffe/layers/inner_product_layer.hpp" |
| 13 | +#include "caffe/layers/neuron_layer.hpp" |
| 14 | +#include "caffe/proto/caffe.pb.h" |
| 15 | +#include "caffe/mkldnn_memory.hpp" |
| 16 | +#include "mkldnn.hpp" |
| 17 | + |
| 18 | +using namespace mkldnn; |
| 19 | + |
| 20 | +namespace caffe { |
| 21 | + |
| 22 | +// ===== CpuEngine ======================================= |
| 23 | +// cpu_engine singleton |
| 24 | +class CpuEngine |
| 25 | +{ |
| 26 | +public: |
| 27 | + static CpuEngine & Instance() |
| 28 | + { |
| 29 | + // I's thread-safe in C++11. |
| 30 | + static CpuEngine myInstance; |
| 31 | + return myInstance; |
| 32 | + } |
| 33 | + CpuEngine(CpuEngine const&) = delete; // Copy construct |
| 34 | + CpuEngine(CpuEngine&&) = delete; // Move construct |
| 35 | + CpuEngine& operator=(CpuEngine const&) = delete; // Copy assign |
| 36 | + CpuEngine& operator=(CpuEngine &&) = delete; // Move assign |
| 37 | + |
| 38 | + engine & get_engine() { return _cpu_engine; } |
| 39 | +protected: |
| 40 | + CpuEngine() : _cpu_engine(engine::cpu, 0) {} |
| 41 | + ~CpuEngine() {} |
| 42 | +private: |
| 43 | + engine _cpu_engine; |
| 44 | +}; |
| 45 | + |
| 46 | +// ===== MKLDNNConvolutionLayer ======================================= |
| 47 | +template <typename Dtype> |
| 48 | +class MKLDNNConvolutionLayer : public ConvolutionLayer<Dtype> { |
| 49 | +public: |
| 50 | + explicit MKLDNNConvolutionLayer(const LayerParameter& param); |
| 51 | + virtual ~MKLDNNConvolutionLayer() {} |
| 52 | +protected: |
| 53 | + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 54 | + virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 55 | + virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down |
| 56 | + , const vector<Blob<Dtype>*>& bottom); |
| 57 | + virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down |
| 58 | + , const vector<Blob<Dtype>*>& bottom); |
| 59 | + // Customized methods |
| 60 | + virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 61 | + void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 62 | +private: |
| 63 | + virtual void compute_output_shape(); |
| 64 | + virtual void init_properties(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 65 | + void InitConvolution(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 66 | + |
| 67 | + shared_ptr<MKLDNNData<Dtype> > fwd_bottom_data, fwd_top_data, fwd_weights_data, fwd_bias_data; |
| 68 | + shared_ptr<convolution::primitive_desc> convFwd_pd; |
| 69 | + |
| 70 | + shared_ptr<convolution> convFwd; |
| 71 | + shared_ptr<memory> input_memory, weights_memory, bias_memory, output_memory; |
| 72 | + |
| 73 | + uint32_t width_, height_, width_out_, height_out_, kernel_w_, kernel_h_, stride_w_, stride_h_; |
| 74 | + int pad_w_, pad_h_; |
| 75 | +}; |
| 76 | + |
| 77 | +// ===== MKLDNNInnerProductLayer ======================================= |
| 78 | +template <typename Dtype> |
| 79 | +class MKLDNNInnerProductLayer : public InnerProductLayer<Dtype> { |
| 80 | +public: |
| 81 | + explicit MKLDNNInnerProductLayer(const LayerParameter& param); |
| 82 | + virtual ~MKLDNNInnerProductLayer(); |
| 83 | +protected: |
| 84 | + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 85 | + virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 86 | + virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down |
| 87 | + , const vector<Blob<Dtype>*>& bottom); |
| 88 | + virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down |
| 89 | + , const vector<Blob<Dtype>*>& bottom); |
| 90 | + // Customized methods |
| 91 | + virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 92 | + void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 93 | +private: |
| 94 | + void InitInnerProduct(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 95 | + |
| 96 | + shared_ptr<MKLDNNData<Dtype> > fwd_bottom_data, fwd_top_data, fwd_weights_data, fwd_bias_data; |
| 97 | + shared_ptr<inner_product::primitive_desc> ipFwd_pd; |
| 98 | + |
| 99 | + shared_ptr<inner_product> ipFwd; |
| 100 | + shared_ptr<memory> input_memory, weights_memory, bias_memory, output_memory; |
| 101 | + |
| 102 | + uint32_t w_, h_; |
| 103 | +}; |
| 104 | + |
| 105 | + |
| 106 | +/** |
| 107 | + * @brief Normalize the input in a local region across feature maps. |
| 108 | + */ |
| 109 | + |
| 110 | +// ===== MKLDNNLRNLayer ======================================= |
| 111 | +template <typename Dtype> |
| 112 | +class MKLDNNLRNLayer : public Layer<Dtype> { |
| 113 | +public: |
| 114 | + explicit MKLDNNLRNLayer(const LayerParameter& param) |
| 115 | + : Layer<Dtype>(param) |
| 116 | + , fwd_top_data(NULL) |
| 117 | + , fwd_bottom_data(NULL) |
| 118 | + , lrnFwd_pd(NULL) |
| 119 | + , lrnFwd(NULL) |
| 120 | + , input_memory(NULL) |
| 121 | + , output_memory(NULL) |
| 122 | + , scratch_(NULL) {} |
| 123 | + virtual ~MKLDNNLRNLayer() {} |
| 124 | +protected: |
| 125 | + virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 126 | + virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 127 | + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 128 | + virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down |
| 129 | + , const vector<Blob<Dtype>*>& bottom); |
| 130 | + virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 131 | + virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down |
| 132 | + , const vector<Blob<Dtype>*>& bottom); |
| 133 | + |
| 134 | + virtual inline const char* type() const { return "LRN"; } |
| 135 | + virtual inline int ExactNumBottomBlobs() const { return 1; } |
| 136 | + virtual inline int ExactNumTopBlobs() const { return 1; } |
| 137 | +private: |
| 138 | + void InitLRN(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 139 | + |
| 140 | + Dtype alpha_, beta_, k_; |
| 141 | + int size_, num_, width_, height_, channels_; |
| 142 | + |
| 143 | + shared_ptr<MKLDNNData<Dtype> > fwd_top_data, fwd_bottom_data; |
| 144 | + shared_ptr<lrn::primitive_desc> lrnFwd_pd; |
| 145 | + |
| 146 | + shared_ptr<lrn> lrnFwd; |
| 147 | + shared_ptr<memory> input_memory, output_memory; |
| 148 | + |
| 149 | + shared_ptr<memory> scratch_; |
| 150 | +}; |
| 151 | + |
| 152 | +// ===== MKLDNNPoolingLayer ======================================= |
| 153 | +template <typename Dtype> |
| 154 | +class MKLDNNPoolingLayer : public Layer<Dtype> { |
| 155 | +public: |
| 156 | + explicit MKLDNNPoolingLayer(const LayerParameter& param) |
| 157 | + : Layer<Dtype>(param) |
| 158 | + , fwd_top_data(NULL) |
| 159 | + , fwd_bottom_data(NULL) |
| 160 | + , poolingFwd_pd(NULL) |
| 161 | + , poolingFwd(NULL) |
| 162 | + , indices_memory(NULL) |
| 163 | + , input_memory(NULL) |
| 164 | + , output_memory(NULL) |
| 165 | + , indices_pd(NULL) |
| 166 | + {} |
| 167 | + ~MKLDNNPoolingLayer() {} |
| 168 | +protected: |
| 169 | + virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 170 | + virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 171 | + |
| 172 | + virtual inline const char* type() const { return "Pooling"; } |
| 173 | + virtual inline int ExactNumBottomBlobs() const { return 1; } |
| 174 | + virtual inline int MinTopBlobs() const { return 1; } |
| 175 | + // MAX POOL layers can output an extra top blob for the mask; |
| 176 | + // others can only output the pooled inputs. |
| 177 | + virtual inline int MaxTopBlobs() const { |
| 178 | + return (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) ? 2 : 1; |
| 179 | + } |
| 180 | +protected: |
| 181 | + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 182 | + virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 183 | + virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down |
| 184 | + ,const vector<Blob<Dtype>*>& bottom); |
| 185 | + virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down |
| 186 | + ,const vector<Blob<Dtype>*>& bottom); |
| 187 | + |
| 188 | +private: |
| 189 | + void InitPooling(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 190 | + |
| 191 | + uint32_t num_, channels_, width_, height_, width_out_, height_out_; |
| 192 | + uint32_t kernel_w_, kernel_h_; |
| 193 | + uint32_t stride_w_, stride_h_; |
| 194 | + int32_t pad_w_, pad_h_; |
| 195 | + |
| 196 | + Blob<uint32_t> max_idx_; |
| 197 | + bool global_pooling_; |
| 198 | + |
| 199 | + shared_ptr<MKLDNNData<Dtype> > fwd_top_data, fwd_bottom_data; |
| 200 | + shared_ptr<pooling::primitive_desc> poolingFwd_pd; |
| 201 | + shared_ptr<pooling> poolingFwd; |
| 202 | + shared_ptr<memory> indices_memory, input_memory, output_memory; |
| 203 | + shared_ptr<memory::primitive_desc> indices_pd; |
| 204 | + |
| 205 | +}; |
| 206 | + |
| 207 | +// ===== MKLDNNReLULayer ======================================= |
| 208 | +template <typename Dtype> |
| 209 | +class MKLDNNReLULayer : public NeuronLayer<Dtype> { |
| 210 | +public: |
| 211 | + /** |
| 212 | + * @param param provides ReLUParameter relu_param, |
| 213 | + * with ReLULayer options: |
| 214 | + * - negative_slope (\b optional, default 0). |
| 215 | + * the value @f$ \nu @f$ by which negative values are multiplied. |
| 216 | + */ |
| 217 | + explicit MKLDNNReLULayer(const LayerParameter& param) |
| 218 | + : NeuronLayer<Dtype>(param) |
| 219 | + , fwd_top_data (NULL) |
| 220 | + , fwd_bottom_data (NULL) |
| 221 | + , reluFwd_pd(NULL) |
| 222 | + , reluFwd(NULL) |
| 223 | + , input_memory(NULL) |
| 224 | + , output_memory(NULL) |
| 225 | + {} |
| 226 | + |
| 227 | + ~MKLDNNReLULayer() {} |
| 228 | +protected: |
| 229 | + virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 230 | + virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 231 | + virtual inline const char* type() const { return "ReLU"; } |
| 232 | + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 233 | + virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 234 | + virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down |
| 235 | + , const vector<Blob<Dtype>*>& bottom); |
| 236 | + virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down |
| 237 | + , const vector<Blob<Dtype>*>& bottom); |
| 238 | +private: |
| 239 | + void InitReLU(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); |
| 240 | + shared_ptr<MKLDNNData<Dtype> > fwd_top_data, fwd_bottom_data; |
| 241 | + shared_ptr<relu::primitive_desc> reluFwd_pd; |
| 242 | + |
| 243 | + shared_ptr<relu> reluFwd; |
| 244 | + shared_ptr<memory> input_memory, output_memory; |
| 245 | + |
| 246 | + uint32_t num_, width_, height_, channels_; |
| 247 | +}; |
| 248 | + |
| 249 | +} // namespace caffe |
| 250 | +#endif // #ifndef CAFFE_MKLDNN_LAYERS_HPP_ |
0 commit comments