| /*M/////////////////////////////////////////////////////////////////////////////////////// |
| // |
| // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
| // |
| // By downloading, copying, installing or using the software you agree to this license. |
| // If you do not agree to this license, do not download, install, |
| // copy or use the software. |
| // |
| // |
| // License Agreement |
| // For Open Source Computer Vision Library |
| // |
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
| // Copyright (C) 2013, OpenCV Foundation, all rights reserved. |
| // Third party copyrights are property of their respective owners. |
| // |
| // Redistribution and use in source and binary forms, with or without modification, |
| // are permitted provided that the following conditions are met: |
| // |
| // * Redistribution's of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // |
| // * Redistribution's in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // |
| // * The name of the copyright holders may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // This software is provided by the copyright holders and contributors "as is" and |
| // any express or implied warranties, including, but not limited to, the implied |
| // warranties of merchantability and fitness for a particular purpose are disclaimed. |
| // In no event shall the Intel Corporation or contributors be liable for any direct, |
| // indirect, incidental, special, exemplary, or consequential damages |
| // (including, but not limited to, procurement of substitute goods or services; |
| // loss of use, data, or profits; or business interruption) however caused |
| // and on any theory of liability, whether in contract, strict liability, |
| // or tort (including negligence or otherwise) arising in any way out of |
| // the use of this software, even if advised of the possibility of such damage. |
| // |
| //M*/ |
| |
| #include "precomp.hpp" |
| #include <map> |
| |
| using namespace cv; |
| using namespace cv::cuda; |
| |
| #ifdef HAVE_CUDA |
| |
| namespace { |
| |
| class HostMemAllocator : public MatAllocator |
| { |
| public: |
| explicit HostMemAllocator(unsigned int flags) : flags_(flags) |
| { |
| } |
| |
| UMatData* allocate(int dims, const int* sizes, int type, |
| void* data0, size_t* step, |
| int /*flags*/, UMatUsageFlags /*usageFlags*/) const |
| { |
| size_t total = CV_ELEM_SIZE(type); |
| for (int i = dims-1; i >= 0; i--) |
| { |
| if (step) |
| { |
| if (data0 && step[i] != CV_AUTOSTEP) |
| { |
| CV_Assert(total <= step[i]); |
| total = step[i]; |
| } |
| else |
| { |
| step[i] = total; |
| } |
| } |
| |
| total *= sizes[i]; |
| } |
| |
| UMatData* u = new UMatData(this); |
| u->size = total; |
| |
| if (data0) |
| { |
| u->data = u->origdata = static_cast<uchar*>(data0); |
| u->flags |= UMatData::USER_ALLOCATED; |
| } |
| else |
| { |
| void* ptr = 0; |
| cudaSafeCall( cudaHostAlloc(&ptr, total, flags_) ); |
| |
| u->data = u->origdata = static_cast<uchar*>(ptr); |
| } |
| |
| return u; |
| } |
| |
| bool allocate(UMatData* u, int /*accessFlags*/, UMatUsageFlags /*usageFlags*/) const |
| { |
| return (u != NULL); |
| } |
| |
| void deallocate(UMatData* u) const |
| { |
| if (!u) |
| return; |
| |
| CV_Assert(u->urefcount >= 0); |
| CV_Assert(u->refcount >= 0); |
| |
| if (u->refcount == 0) |
| { |
| if ( !(u->flags & UMatData::USER_ALLOCATED) ) |
| { |
| cudaFreeHost(u->origdata); |
| u->origdata = 0; |
| } |
| |
| delete u; |
| } |
| } |
| |
| private: |
| unsigned int flags_; |
| }; |
| |
| } // namespace |
| |
| #endif |
| |
| MatAllocator* cv::cuda::HostMem::getAllocator(AllocType alloc_type) |
| { |
| #ifndef HAVE_CUDA |
| (void) alloc_type; |
| throw_no_cuda(); |
| return NULL; |
| #else |
| static std::map<unsigned int, Ptr<MatAllocator> > allocators; |
| |
| unsigned int flag = cudaHostAllocDefault; |
| |
| switch (alloc_type) |
| { |
| case PAGE_LOCKED: flag = cudaHostAllocDefault; break; |
| case SHARED: flag = cudaHostAllocMapped; break; |
| case WRITE_COMBINED: flag = cudaHostAllocWriteCombined; break; |
| default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); |
| } |
| |
| Ptr<MatAllocator>& a = allocators[flag]; |
| |
| if (a.empty()) |
| { |
| a = makePtr<HostMemAllocator>(flag); |
| } |
| |
| return a.get(); |
| #endif |
| } |
| |
| #ifdef HAVE_CUDA |
| namespace |
| { |
| size_t alignUpStep(size_t what, size_t alignment) |
| { |
| size_t alignMask = alignment - 1; |
| size_t inverseAlignMask = ~alignMask; |
| size_t res = (what + alignMask) & inverseAlignMask; |
| return res; |
| } |
| } |
| #endif |
| |
| void cv::cuda::HostMem::create(int rows_, int cols_, int type_) |
| { |
| #ifndef HAVE_CUDA |
| (void) rows_; |
| (void) cols_; |
| (void) type_; |
| throw_no_cuda(); |
| #else |
| if (alloc_type == SHARED) |
| { |
| DeviceInfo devInfo; |
| CV_Assert( devInfo.canMapHostMemory() ); |
| } |
| |
| type_ &= Mat::TYPE_MASK; |
| |
| if (rows == rows_ && cols == cols_ && type() == type_ && data) |
| return; |
| |
| if (data) |
| release(); |
| |
| CV_DbgAssert( rows_ >= 0 && cols_ >= 0 ); |
| |
| if (rows_ > 0 && cols_ > 0) |
| { |
| flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + type_; |
| rows = rows_; |
| cols = cols_; |
| step = elemSize() * cols; |
| |
| if (alloc_type == SHARED) |
| { |
| DeviceInfo devInfo; |
| step = alignUpStep(step, devInfo.textureAlignment()); |
| } |
| |
| int64 _nettosize = (int64)step*rows; |
| size_t nettosize = (size_t)_nettosize; |
| |
| if (_nettosize != (int64)nettosize) |
| CV_Error(cv::Error::StsNoMem, "Too big buffer is allocated"); |
| |
| size_t datasize = alignSize(nettosize, (int)sizeof(*refcount)); |
| |
| void* ptr = 0; |
| |
| switch (alloc_type) |
| { |
| case PAGE_LOCKED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocDefault) ); break; |
| case SHARED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocMapped) ); break; |
| case WRITE_COMBINED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocWriteCombined) ); break; |
| default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); |
| } |
| |
| datastart = data = (uchar*)ptr; |
| dataend = data + nettosize; |
| |
| refcount = (int*)cv::fastMalloc(sizeof(*refcount)); |
| *refcount = 1; |
| } |
| #endif |
| } |
| |
| HostMem cv::cuda::HostMem::reshape(int new_cn, int new_rows) const |
| { |
| HostMem hdr = *this; |
| |
| int cn = channels(); |
| if (new_cn == 0) |
| new_cn = cn; |
| |
| int total_width = cols * cn; |
| |
| if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0) |
| new_rows = rows * total_width / new_cn; |
| |
| if (new_rows != 0 && new_rows != rows) |
| { |
| int total_size = total_width * rows; |
| |
| if (!isContinuous()) |
| CV_Error(cv::Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed"); |
| |
| if ((unsigned)new_rows > (unsigned)total_size) |
| CV_Error(cv::Error::StsOutOfRange, "Bad new number of rows"); |
| |
| total_width = total_size / new_rows; |
| |
| if (total_width * new_rows != total_size) |
| CV_Error(cv::Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows"); |
| |
| hdr.rows = new_rows; |
| hdr.step = total_width * elemSize1(); |
| } |
| |
| int new_width = total_width / new_cn; |
| |
| if (new_width * new_cn != total_width) |
| CV_Error(cv::Error::BadNumChannels, "The total width is not divisible by the new number of channels"); |
| |
| hdr.cols = new_width; |
| hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT); |
| |
| return hdr; |
| } |
| |
| void cv::cuda::HostMem::release() |
| { |
| #ifdef HAVE_CUDA |
| if (refcount && CV_XADD(refcount, -1) == 1) |
| { |
| cudaFreeHost(datastart); |
| fastFree(refcount); |
| } |
| |
| dataend = data = datastart = 0; |
| step = rows = cols = 0; |
| refcount = 0; |
| #endif |
| } |
| |
| GpuMat cv::cuda::HostMem::createGpuMatHeader() const |
| { |
| #ifndef HAVE_CUDA |
| throw_no_cuda(); |
| return GpuMat(); |
| #else |
| CV_Assert( alloc_type == SHARED ); |
| |
| void *pdev; |
| cudaSafeCall( cudaHostGetDevicePointer(&pdev, data, 0) ); |
| |
| return GpuMat(rows, cols, type(), pdev, step); |
| #endif |
| } |
| |
| void cv::cuda::registerPageLocked(Mat& m) |
| { |
| #ifndef HAVE_CUDA |
| (void) m; |
| throw_no_cuda(); |
| #else |
| CV_Assert( m.isContinuous() ); |
| cudaSafeCall( cudaHostRegister(m.data, m.step * m.rows, cudaHostRegisterPortable) ); |
| #endif |
| } |
| |
| void cv::cuda::unregisterPageLocked(Mat& m) |
| { |
| #ifndef HAVE_CUDA |
| (void) m; |
| #else |
| cudaSafeCall( cudaHostUnregister(m.data) ); |
| #endif |
| } |