| /** | 
 |  * Copyright (c) 2016-present, Facebook, Inc. | 
 |  * | 
 |  * Licensed under the Apache License, Version 2.0 (the "License"); | 
 |  * you may not use this file except in compliance with the License. | 
 |  * You may obtain a copy of the License at | 
 |  * | 
 |  *     http://www.apache.org/licenses/LICENSE-2.0 | 
 |  * | 
 |  * Unless required by applicable law or agreed to in writing, software | 
 |  * distributed under the License is distributed on an "AS IS" BASIS, | 
 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 |  * See the License for the specific language governing permissions and | 
 |  * limitations under the License. | 
 |  */ | 
 |  | 
 | /* SampleAs by Kaiming He for Mask R-CNN | 
 | X.dim32(0) = L.dim32(0) | 
 | Y's output samples are the samples of X for which L > 0. | 
 | */ | 
 | #include <cfloat> | 
 |  | 
 | #include "caffe2/core/context_gpu.h" | 
 | #include "modules/detectron/sample_as_op.h" | 
 |  | 
 | #include <stdio.h> | 
 |  | 
 | namespace caffe2 { | 
 |  | 
 | template <> | 
 | bool SampleAsOp<float, CUDAContext>::RunOnDevice() { | 
 |   auto& X = Input(0); // Input data to be sliced | 
 |   auto& L = Input(1); // Target data that provide the identity | 
 |  | 
 |   CAFFE_ENFORCE( | 
 |       X.dim32(0) == L.dim32(0), | 
 |       "X.dim32(0) must be equal to L.dim32(0)", | 
 |       "(", | 
 |       X.dim32(0), | 
 |       " vs. ", | 
 |       L.dim32(0), | 
 |       ")"); | 
 |  | 
 |   // copy L to CPU: | 
 |   std::vector<int> labels(L.dim32(0)); | 
 |   context_.CopyBytes<CUDAContext, CPUContext>( | 
 |       L.dim32(0) * sizeof(int), L.data<int>(), &labels[0]); | 
 |   // Make sure that the copy is finished | 
 |   context_.FinishDeviceComputation(); | 
 |  | 
 |   int count = 0; | 
 |   for (int i = 0; i < L.dim32(0); i++) { | 
 |     if (labels[i] > 0) { | 
 |       count++; | 
 |     } | 
 |   } | 
 |   assert(count > 0); | 
 |  | 
 |   // resize Y | 
 |   vector<int64_t> out_shape(X.sizes().vec()); | 
 |   out_shape[0] = count; | 
 |   auto* Y = Output(0, out_shape, at::dtype<float>()); // Sliced data (Y.dim32(0) = num of (L > 0)) | 
 |  | 
 |   const int len = X.size() / X.dim32(0); | 
 |  | 
 |   float* output = Y->mutable_data<float>(); | 
 |   for (int i = 0; i < L.dim32(0); i++) { | 
 |     if (labels[i] > 0) { | 
 |       context_.CopyBytes<CUDAContext, CUDAContext>( | 
 |           len * sizeof(float), X.data<float>() + i * len, output); | 
 |       output += len; | 
 |     } // if | 
 |   } // i | 
 |  | 
 |   return true; | 
 | } | 
 |  | 
 | template <> | 
 | bool SampleAsGradientOp<float, CUDAContext>::RunOnDevice() { | 
 |   auto& X = Input(0); | 
 |   auto& L = Input(1); | 
 |   auto& dY = Input(2); | 
 |  | 
 |  | 
 |   auto* dX = Output(0, X.sizes(), at::dtype<float>()); | 
 |  | 
 |   // copy L to CPU: | 
 |   std::vector<int> labels(L.dim32(0)); | 
 |   context_.CopyBytes<CUDAContext, CPUContext>( | 
 |       L.dim32(0) * sizeof(int), L.data<int>(), &labels[0]); | 
 |   // Make sure that the copy is finished | 
 |   context_.FinishDeviceComputation(); | 
 |  | 
 |   // zero-out dX | 
 |   math::Set<float, CUDAContext>( | 
 |       dX->size(), 0.f, dX->mutable_data<float>(), &context_); | 
 |  | 
 |   const int len = X.size() / X.dim32(0); | 
 |  | 
 |   const float* input = dY.data<float>(); | 
 |   for (int i = 0; i < L.dim32(0); i++) { | 
 |     if (labels[i] > 0) { | 
 |       context_.CopyBytes<CUDAContext, CUDAContext>( | 
 |           len * sizeof(float), input, dX->mutable_data<float>() + i * len); | 
 |       input += len; | 
 |     } // if | 
 |   } // i | 
 |  | 
 |   return true; | 
 | } | 
 |  | 
 | REGISTER_CUDA_OPERATOR(SampleAs, SampleAsOp<float, CUDAContext>); | 
 | REGISTER_CUDA_OPERATOR( | 
 |     SampleAsGradient, | 
 |     SampleAsGradientOp<float, CUDAContext>); | 
 | } // namespace caffe2 |