|  | /** | 
|  | * Copyright (c) 2016-present, Facebook, Inc. | 
|  | * | 
|  | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | * you may not use this file except in compliance with the License. | 
|  | * You may obtain a copy of the License at | 
|  | * | 
|  | *     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, software | 
|  | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | * See the License for the specific language governing permissions and | 
|  | * limitations under the License. | 
|  | */ | 
|  |  | 
|  | #include "caffe2/core/context.h" | 
|  | #include "caffe2/core/operator.h" | 
|  |  | 
|  | extern "C" { | 
|  | #include <THNN.h> | 
|  | } | 
|  |  | 
|  | namespace caffe2 { | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | using UniqueTHFloatTensor = | 
|  | std::unique_ptr<THFloatTensor, decltype(&THFloatTensor_free)>; | 
|  |  | 
|  | UniqueTHFloatTensor aliasFromTensorCPU(TensorCPU* tensor) { | 
|  | if (!tensor->ndim()) { | 
|  | return UniqueTHFloatTensor(THFloatTensor_new(), THFloatTensor_free); | 
|  | } | 
|  |  | 
|  | THLongStorage* thshape = THLongStorage_newWithSize(tensor->ndim()); | 
|  | for (int i = 0; i < tensor->ndim(); ++i) { | 
|  | THLongStorage_set(thshape, i, tensor->dim(i)); | 
|  | } | 
|  | THFloatStorage* storage = THFloatStorage_newWithData( | 
|  | tensor->template mutable_data<float>(), tensor->size()); | 
|  | THFloatStorage_clearFlag(storage, TH_STORAGE_FREEMEM); | 
|  | auto* th = THFloatTensor_newWithStorage(storage, 0, thshape, nullptr); | 
|  | THFloatStorage_free(storage); | 
|  | THLongStorage_free(thshape); | 
|  | CAFFE_ENFORCE_EQ( | 
|  | THFloatTensor_storage(th)->data, tensor->template mutable_data<float>()); | 
|  | return UniqueTHFloatTensor(th, THFloatTensor_free); | 
|  | } | 
|  |  | 
|  | void copyToTensorCPU(UniqueTHFloatTensor th, TensorCPU* tensor) { | 
|  | // TODO - if th and tensor point to the same data and have the same | 
|  | // size, elide the copy! | 
|  | th = UniqueTHFloatTensor( | 
|  | THFloatTensor_newContiguous(th.get()), THFloatTensor_free); | 
|  | const auto dims = std::vector<TIndex>( | 
|  | th->size, th->size + THFloatTensor_nDimension(th.get())); | 
|  | // Short-circuit if we never reallocated in TH | 
|  | auto* storage = THFloatTensor_storage(th.get()); | 
|  | // Short-circuit if we never reallocated in TH | 
|  | if (dims == tensor->dims() && | 
|  | storage->data == tensor->template data<float>()) { | 
|  | THFloatStorage_clearFlag(storage, TH_STORAGE_FREEMEM); | 
|  | return; | 
|  | } | 
|  | tensor->Resize(dims); | 
|  | CPUContext ctx; | 
|  | ctx.Copy<float, CPUContext, CPUContext>( | 
|  | tensor->size(), storage->data, tensor->mutable_data<float>()); | 
|  | } | 
|  |  | 
|  | // _Everything_ below here can be autogenerated with the TBD | 
|  | // THNN/THCUNN schema. This is just a proof of concept. | 
|  |  | 
|  | class THNNELUCPUOp final : public Operator<CPUContext> { | 
|  | public: | 
|  | USE_OPERATOR_FUNCTIONS(CPUContext); | 
|  | using Operator<CPUContext>::Operator; | 
|  | bool RunOnDevice() override { | 
|  | // TODO - we can autogenerate this from a schema. | 
|  | auto X = aliasFromTensorCPU(const_cast<TensorCPU*>(&Input(0))); | 
|  | auto Y = aliasFromTensorCPU(Output(0)); | 
|  | THNN_FloatELU_updateOutput( | 
|  | nullptr, | 
|  | X.get(), | 
|  | Y.get(), | 
|  | GetSingleArgument<float>("alpha", 1.0), | 
|  | &Input(0) == Output(0)); | 
|  | copyToTensorCPU(std::move(Y), Output(0)); | 
|  | return true; | 
|  | } | 
|  | }; | 
|  |  | 
|  | class THNNELUCPUGradientOp final : public Operator<CPUContext> { | 
|  | public: | 
|  | USE_OPERATOR_FUNCTIONS(CPUContext); | 
|  | using Operator<CPUContext>::Operator; | 
|  |  | 
|  | bool RunOnDevice() override { | 
|  | // TODO - we can autogenerate this from a schema. | 
|  | auto X = aliasFromTensorCPU(const_cast<TensorCPU*>(&Input(0))); | 
|  | auto Y = aliasFromTensorCPU(const_cast<TensorCPU*>(&Input(1))); | 
|  | auto dY = aliasFromTensorCPU(const_cast<TensorCPU*>(&Input(2))); | 
|  | auto dX = aliasFromTensorCPU(Output(0)); | 
|  | THNN_FloatELU_updateGradInput( | 
|  | nullptr, | 
|  | X.get(), | 
|  | dY.get(), | 
|  | dX.get(), | 
|  | Y.get(), | 
|  | GetSingleArgument<float>("alpha", 1.0), | 
|  | &Input(2) == Output(0) /* inplace */); | 
|  | copyToTensorCPU(std::move(dX), Output(0)); | 
|  | return true; | 
|  | } | 
|  | }; | 
|  |  | 
|  | REGISTER_CPU_OPERATOR_WITH_ENGINE(ELU, THNN, THNNELUCPUOp); | 
|  | REGISTER_CPU_OPERATOR_WITH_ENGINE(ELUGradient, THNN, THNNELUCPUGradientOp); | 
|  |  | 
|  | class GetELUGradient : public GradientMakerBase { | 
|  | using GradientMakerBase::GradientMakerBase; | 
|  | vector<OperatorDef> GetGradientDefs() override { | 
|  | return SingleGradientDef( | 
|  | "ELUGradient", | 
|  | "", | 
|  | vector<string>{I(0), O(0), GO(0)}, | 
|  | vector<string>{GI(0)}, | 
|  | Def().arg()); | 
|  | } | 
|  | }; | 
|  | REGISTER_GRADIENT(ELU, GetELUGradient); | 
|  | } | 
|  | } |