| #ifndef CAFFE2_UTILS_MKL_MKL_MEMORY_H_ |
| #define CAFFE2_UTILS_MKL_MKL_MEMORY_H_ |
| |
| #include <string> |
| #include <vector> |
| #include <mutex> |
| |
| #include "caffe2/core/flags.h" // for TIndex |
| #include "caffe2/core/tensor.h" // for TIndex |
| #include "caffe2/mkl/utils/mkl_dnn_cppwrapper.h" |
| |
| // A global boolean variable that controls the behavior when we call View() on |
| // an MKLMemory: if it is set true, then the View() function will actually |
| // change the underlying storage. If it is set false, an implicit copy is |
| // triggered but the original storage is not affected. |
| CAFFE2_DECLARE_bool(caffe2_mkl_implicit_layout_change); |
| |
| namespace caffe2 { |
| namespace mkl { |
| |
| template <typename T> |
| class PrimitiveWrapper { |
| public: |
| PrimitiveWrapper() {} |
| // Creates a primitive wrapper from an existing primitive. The wrapper |
| // takes over ownership. |
| explicit PrimitiveWrapper(dnnPrimitive_t primitive) : primitive_(primitive) {} |
| |
| template <typename Creator, typename FirstArg, typename... Args> |
| PrimitiveWrapper(Creator creator, FirstArg&& arg, Args&&... args) { |
| creator(&primitive_, arg, args...); |
| } |
| |
| ~PrimitiveWrapper() { |
| if (primitive_) { |
| MKLDNN_CHECK(dnnDelete<T>(primitive_)); |
| } |
| } |
| |
| template <typename Creator, typename... Args> |
| void Reset(Creator creator, Args&&... args) { |
| if (primitive_) { |
| MKLDNN_SAFE_CALL(dnnDelete<T>(primitive_)); |
| } |
| creator(&primitive_, args...); |
| } |
| |
| operator dnnPrimitive_t() const { |
| return primitive_; |
| } |
| |
| private: |
| dnnPrimitive_t primitive_ = 0; |
| DISABLE_COPY_AND_ASSIGN(PrimitiveWrapper); |
| }; |
| |
| template <typename T> |
| class LayoutWrapper { |
| public: |
| LayoutWrapper() {} |
| // Create a user layout from a TensorCPU with the given shapes. |
| explicit LayoutWrapper(const TensorCPU& tensor) { |
| Reset(tensor); |
| } |
| |
| // Create an internal layout from the primitive and type. |
| LayoutWrapper(const dnnPrimitive_t primitive, const dnnResourceType_t type) { |
| Reset(primitive, type); |
| } |
| |
| // Create a user layout from the given dimension, size and strides. |
| LayoutWrapper( |
| const size_t dimension, |
| const size_t size[], |
| const size_t strides[]) { |
| Reset(dimension, size, strides); |
| } |
| |
| // Destructs the layout wrapper. |
| ~LayoutWrapper() { |
| if (layout_) |
| MKLDNN_CHECK(dnnLayoutDelete<T>(layout_)); |
| } |
| |
| // Create a user layout from a TensorCPU with the given shapes. |
| void Reset(const TensorCPU& tensor) { |
| if (layout_) |
| MKLDNN_CHECK(dnnLayoutDelete<T>(layout_)); |
| CAFFE_ENFORCE(tensor.size(), "Cannot reset with an empty tensor."); |
| size_t dimension = tensor.ndim(); |
| size_t size[dimension]; |
| size_t strides[dimension]; |
| for (int i = 0; i < dimension; ++i) { |
| size[i] = tensor.dim(dimension - i - 1); |
| strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1]; |
| } |
| MKLDNN_SAFE_CALL(dnnLayoutCreate<T>(&layout_, dimension, size, strides)); |
| } |
| |
| // Create an internal layout from the primitive and type. |
| void Reset(const dnnPrimitive_t primitive, const dnnResourceType_t type) { |
| CAFFE_ENFORCE(primitive, "Cannot reset with an unknwon primitive."); |
| CAFFE_ENFORCE( |
| type != dnnResourceNumber, |
| "Cannot reset with an unknown resource number."); |
| if (layout_) { |
| MKLDNN_CHECK(dnnLayoutDelete<T>(layout_)); |
| } |
| MKLDNN_SAFE_CALL( |
| dnnLayoutCreateFromPrimitive<T>(&layout_, primitive, type)); |
| } |
| |
| // Create a user layout from the given dimension, size and strides. |
| void |
| Reset(const size_t dimension, const size_t size[], const size_t strides[]) { |
| if (layout_) |
| MKLDNN_CHECK(dnnLayoutDelete<T>(layout_)); |
| MKLDNN_SAFE_CALL(dnnLayoutCreate<T>(&layout_, dimension, size, strides)); |
| } |
| |
| operator dnnLayout_t() const { |
| return layout_; |
| } |
| |
| private: |
| dnnLayout_t layout_ = 0; |
| DISABLE_COPY_AND_ASSIGN(LayoutWrapper); |
| }; |
| |
| /** |
| * @brief A wrapper around an opaque MKL internal resource that has certain |
| * layouts and convertion primitives set up. |
| * |
| * Most of the MKLMemory functions are not thread safe. |
| */ |
| template <typename T> |
| class MKLMemory { |
| public: |
| // Initializes an empty MKLMemory. |
| MKLMemory() {} |
| // Initialize an MKLMemory with the given size, strides, dnn |
| // primitive and type. |
| MKLMemory( |
| const size_t dimension, |
| const size_t size[], |
| const size_t strides[], |
| const dnnPrimitive_t primitive = nullptr, |
| const dnnResourceType_t type = dnnResourceNumber, |
| bool share_mem_if_possible = false) { |
| Reset(dimension, size, strides, primitive, type, share_mem_if_possible); |
| } |
| |
| // Initialize an MKLMemory, with the given dimension assuming a C-contiguous |
| // storage. |
| template <typename IndexType> |
| explicit MKLMemory( |
| const vector<IndexType>& dims, |
| const dnnPrimitive_t primitive = nullptr, |
| const dnnResourceType_t type = dnnResourceNumber, |
| bool share_mem_if_possible = false) { |
| Reset(dims, primitive, type, share_mem_if_possible); |
| } |
| |
| // Initialize an MKLMemory with the given size, strides, dnn |
| // primitive and type. |
| void Reset( |
| const size_t dimension, |
| const size_t size[], |
| const size_t strides[], |
| const dnnPrimitive_t primitive = nullptr, |
| const dnnResourceType_t type = dnnResourceNumber, |
| bool share_mem_if_possible = false) { |
| buffer_.reset(); |
| dims_.resize(dimension); |
| for (int i = 0; i < dimension; ++i) { |
| dims_[i] = size[dimension - 1 - i]; |
| } |
| user_layout_.Reset(dimension, size, strides); |
| if (primitive) { |
| layout_.Reset(primitive, type); |
| } else { |
| layout_.Reset(dimension, size, strides); |
| } |
| convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_); |
| convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_); |
| share_mem_if_possible_ = share_mem_if_possible; |
| layout_is_user_layout_ = dnnLayoutCompare<T>(layout_, user_layout_); |
| VLOG(2) << "layout is user layout? " << layout_is_user_layout_; |
| if (!share_mem_if_possible_) { |
| // If we are not going to share memory, we will simply allocate |
| // memory upfront. |
| buffer(); |
| } |
| } |
| |
| // Initialize an MKLMemory, with the given dimension assuming a C-contiguous |
| // storage. |
| template <typename IndexType> |
| void Reset( |
| const vector<IndexType>& dims, |
| const dnnPrimitive_t primitive = nullptr, |
| const dnnResourceType_t type = dnnResourceNumber, |
| bool share_mem_if_possible = false) { |
| buffer_.reset(); |
| dims_.resize(dims.size()); |
| for (int i = 0; i < dims.size(); ++i) { |
| dims_[i] = dims[i]; |
| } |
| size_t dimension = dims.size(); |
| vector<size_t> size(dimension); |
| vector<size_t> strides(dimension); |
| for (int i = 0; i < dimension; ++i) { |
| size[i] = dims[dimension - i - 1]; |
| strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1]; |
| } |
| user_layout_.Reset(dims.size(), size.data(), strides.data()); |
| if (primitive) { |
| layout_.Reset(primitive, type); |
| } else { |
| layout_.Reset(dimension, size.data(), strides.data()); |
| } |
| convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_); |
| convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_); |
| share_mem_if_possible_ = share_mem_if_possible; |
| layout_is_user_layout_ = dnnLayoutCompare<T>(layout_, user_layout_); |
| VLOG(2) << "layout is user layout? " << layout_is_user_layout_; |
| if (!share_mem_if_possible_) { |
| // If we are not going to share memory, we will simply allocate |
| // memory upfront. |
| buffer(); |
| } |
| } |
| |
| // Destructs the MKLMemory. |
| ~MKLMemory() {} |
| |
| void CopyFrom(const void* ptr) { |
| if (share_mem_if_possible_ && layout_is_user_layout_) { |
| VLOG(2) << "Sharing underlying memory and skip copy."; |
| buffer_.reset(const_cast<void*>(ptr), [](void*) -> void {}); |
| } else { |
| VLOG(2) << "Copying external content."; |
| MKLDNN_SAFE_CALL(dnnConversionExecute<T>( |
| convert_in_, const_cast<void*>(ptr), buffer())); |
| } |
| } |
| |
| void CopyFrom(const TensorCPU& tensor) { |
| CAFFE_ENFORCE_EQ( |
| tensor.dims(), |
| dims_, |
| "Dims does not match the expected dims of the resource."); |
| CopyFrom(tensor.template data<T>()); |
| } |
| |
| void CopyFrom(const MKLMemory<T>& other) { |
| if (share_mem_if_possible_ && dnnLayoutCompare(other.layout_, layout_)) { |
| buffer_ = other.buffer_; |
| } else { |
| PrimitiveWrapper<T> convert( |
| dnnConversionCreate<T>, other.layout_, layout_); |
| MKLDNN_SAFE_CALL( |
| dnnConversionExecute<T>(convert, other.buffer_, buffer())); |
| } |
| } |
| |
| bool ShareFromRaw(const void* ptr) { |
| if (share_mem_if_possible_ && layout_is_user_layout_) { |
| buffer_.reset(const_cast<void*>(ptr), [](void*) -> void {}); |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| bool ShareFromTensor(const TensorCPU& tensor) { |
| CAFFE_ENFORCE_EQ( |
| tensor.dims(), |
| dims_, |
| "Dims does not match the expected dims of the resource."); |
| return ShareFromRaw(tensor.template data<T>()); |
| } |
| |
| bool ShareFrom(const MKLMemory<T>& other) { |
| if (share_mem_if_possible_ && dnnLayoutCompare<T>(other.layout_, layout_)) { |
| VLOG(2) << "Sharing underlying memory."; |
| buffer_ = other.buffer_; |
| if (!buffer_.get()) { |
| VLOG(2) << "Warning: the source MKLMemory has no content yet, so the " |
| "sharing actually has no effect."; |
| } |
| return true; |
| } else { |
| VLOG(2) << "Not sharing underlying memory."; |
| return false; |
| } |
| } |
| |
| void CopyTo(void* ptr) const { |
| if (buffer_.get() == ptr) { |
| // This is already mapping to the same memory region. Skip copy. |
| VLOG(2) << "CopyTo does not need actual copying, as we are sharing " |
| "memory with the output."; |
| return; |
| } |
| CAFFE_ENFORCE( |
| buffer_.get(), "Canot copy out from an uninitialized MKLMemory."); |
| VLOG(2) << "Copy to external memory."; |
| MKLDNN_SAFE_CALL(dnnConversionExecute<T>(convert_out_, buffer_.get(), ptr)); |
| } |
| |
| void CopyTo(TensorCPU* tensor) const { |
| if (tensor->size() > 0 && buffer_.get() == tensor->mutable_data<T>()) { |
| // This is already mapping to the same memory region. Skip copy. |
| VLOG(2) << "CopyTo does not need actual copying, as we are sharing " |
| "memory with the output."; |
| return; |
| } |
| tensor->Resize(dims_); |
| CopyTo(tensor->mutable_data<T>()); |
| } |
| |
| // Copies to another MKL memory. |
| // |
| // This function |
| void CopyTo( |
| MKLMemory<T>* other, |
| const dnnPrimitive_t primitive = nullptr, |
| const dnnResourceType_t type = dnnResourceNumber) { |
| if (buffer_.get() == other->buffer_.get()) { |
| VLOG(2) << "CopyTo does not need actual copying, as we are sharing " |
| "memory with the output."; |
| // This is already mapping to the same memory region. Skip copy. |
| return; |
| } |
| CAFFE_ENFORCE( |
| buffer_.get(), "Canot copy out from an uninitialized MKLMemory."); |
| // TODO(jiayq): if primitive creation is a big overhead and we will be |
| // consistently copying stuff with fixed src and dst layouts, consider |
| // making a cache for the primitive below. |
| VLOG(2) << "CopyTo requires copying. Performing direct copy."; |
| PrimitiveWrapper<T> convert( |
| dnnConversionCreate<T>, layout_, other->layout_); |
| if (dnnPrimitive_t(convert) == nullptr || |
| dnnConversionExecute<T>(convert, buffer_.get(), other->buffer()) != |
| E_SUCCESS) { |
| VLOG(2) << "Direct copy failed, will need to allocate output."; |
| // If CopyTo directly did not succeed, it could be because the target |
| // MKLMemory is not having the right layout. In this case we will reset |
| // the target and then do another copy. |
| other->Reset(dims_, primitive, type); |
| PrimitiveWrapper<T> convert2( |
| dnnConversionCreate<T>, layout_, other->layout_); |
| MKLDNN_SAFE_CALL( |
| dnnConversionExecute<T>(convert2, buffer_.get(), other->buffer())); |
| } |
| } |
| |
| inline void* buffer() { |
| if (buffer_ == nullptr) { |
| CAFFE_ENFORCE( |
| layout_ != nullptr, "Trying to allocate buffer but layout is empty."); |
| void* allocated = nullptr; |
| MKLDNN_SAFE_CALL(dnnAllocateBuffer<T>(&allocated, layout_)); |
| buffer_.reset(allocated, [](void* ptr) -> void { |
| MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr)); |
| }); |
| } |
| return buffer_.get(); |
| } |
| |
| // MKLDNN does not use const void* even for the inputs, so we will |
| // have to use void* and rely on the underlying implementation to make |
| // sure that the buffer is actually not changed. |
| inline void* buffer() const { |
| CAFFE_ENFORCE( |
| buffer_ != nullptr, "Trying to refer to an unallocated buffer."); |
| return buffer_.get(); |
| } |
| |
| inline const vector<TIndex>& dims() const { |
| return dims_; |
| } |
| |
| inline const int ndim() const { return dims_.size(); } |
| |
| inline int dim32(const int i) const { |
| CAFFE_ENFORCE_LT(dims_.at(i), std::numeric_limits<int>::max()); |
| return static_cast<int>(dims_[i]); |
| } |
| |
| /** |
| * Returns the i-th dimension of the tensor. Note that the passed in index |
| * must be between 0 (inclusive) and the number of dimensions, otherwise |
| * this function will produce a fatal message. |
| */ |
| inline TIndex dim(const int i) const { |
| return dims_.at(i); |
| } |
| |
| inline const LayoutWrapper<T>& layout() const { |
| return layout_; |
| } |
| |
| // Returns a view of the content. We mark this function const, but be noted |
| // that the returned std::shared_ptr is not const protected - user discretion |
| // is recommended for correctness. |
| std::shared_ptr<void> View( |
| dnnLayout_t layout_wanted, |
| dnnPrimitive_t primitive, |
| dnnResourceType_t type) const { |
| std::lock_guard<std::mutex> lock(buffer_lock_); |
| if (dnnLayoutCompare<T>(layout_wanted, layout_)) { |
| // If they are the same, return the original content. |
| VLOG(2) << "Creating a view without the need of copying."; |
| return std::shared_ptr<void>(buffer_); |
| } else { |
| void* temp_buffer; |
| VLOG(2) << "Creating a view with copying."; |
| MKLDNN_SAFE_CALL(dnnAllocateBuffer<T>(&temp_buffer, layout_wanted)); |
| PrimitiveWrapper<T> convert( |
| dnnConversionCreate<T>, layout_, layout_wanted); |
| MKLDNN_SAFE_CALL(dnnConversionExecute<T>( |
| convert, buffer_.get(), temp_buffer)); |
| if (FLAGS_caffe2_mkl_implicit_layout_change) { |
| VLOG(2) << "Implicit layout change set. " |
| "Changing the underlying storage."; |
| // We will need to call Reset to set up all the member variables. |
| // This is not thread safe, so we might want to double check if this |
| // makes sense in actual use cases. |
| const_cast<MKLMemory<T>*>(this)->Reset( |
| dims_, primitive, type, share_mem_if_possible_); |
| CAFFE_ENFORCE(dnnLayoutCompare<T>(layout_wanted, layout_), |
| "You passed in a target layout that is not " |
| "generated by the given primitive and type."); |
| buffer_.reset(temp_buffer, [](void* ptr) -> void { |
| MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr)); |
| }); |
| return std::shared_ptr<void>(buffer_); |
| } else { |
| return std::shared_ptr<void>(temp_buffer, [](void* ptr) -> void { |
| MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr)); |
| }); |
| } |
| } |
| } |
| |
| private: |
| bool share_mem_if_possible_; |
| bool layout_is_user_layout_; |
| // The internal buffer in the specific dnn layout. |
| // It is marked mutable but any modification in a const function should |
| // be accompanied by the buffer lock, see the View() function. |
| mutable std::shared_ptr<void> buffer_; |
| // A mutex to control the access of buffer in the View() function. |
| mutable std::mutex buffer_lock_; |
| // The dimensions in the same order as Caffe2 does. This is used to |
| // interface with C2. |
| vector<TIndex> dims_; |
| // The user dnn layout. |
| LayoutWrapper<T> user_layout_; |
| // The internal dnn layout. |
| LayoutWrapper<T> layout_; |
| // The primitive to use to convert from user layout to internal layout |
| PrimitiveWrapper<T> convert_in_; |
| // The primitive to use to convert from internal layout to user layout |
| PrimitiveWrapper<T> convert_out_; |
| |
| DISABLE_COPY_AND_ASSIGN(MKLMemory); |
| }; |
| |
| } // namespace mkl |
| } // namespace caffe2 |
| |
| #endif // CAFFE2_UTILS_MKL_MKL_MEMORY_H_ |