caffe2/opt/fusion.h - platform/external/pytorch - Git at Google

 /**
  * Copyright (c) 2016-present, Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef CAFFE2_OPT_FUSION_H_
 #define CAFFE2_OPT_FUSION_H_

 #include "caffe2/core/workspace.h"
 #include "nomnigraph/Representations/NeuralNet.h"

 namespace caffe2 {
 namespace opt {

 using namespace nom;

 TORCH_API void fuseConvBN(repr::NNModule* nn, caffe2::Workspace* ws);

 // Generic activation fusion helper.
 //
 // \tparam OperationT The operator to be fused.
 // \tparam ActivationT The activation to be fused.
 // \param nn Neural network module to be modified in place
 // \param should_fuse Given a conv op, check whether we want to fuse it with
 // subsequent relu or not
 // \param postprocess Functor to postprocess the conv node,
 // attaching additional attributes if necessary
 template <typename OperationT, typename ActivationT>
 C10_EXPORT void fuseActivation(
     repr::NNModule* nn,
     std::function<bool(const OperationT& conv)> should_fuse,
     std::function<void(repr::NNGraph::NodeRef conv_node)> postprocess) {
   for (auto node_pair : repr::nn::dataIterator<OperationT>(nn->dataFlow)) {
     repr::NNGraph::NodeRef conv_node;
     OperationT* conv;
     std::tie(conv, conv_node) = node_pair;

     // Check topological feasibility
     auto conv_outputs = repr::nn::getOutputs(conv_node);
     if (conv_outputs.size() != 1) {
       continue;
     }
     auto conv_output = conv_outputs.front();

     auto consumers = repr::nn::getConsumers(conv_output);
     if (consumers.size() != 1) {
       continue;
     }
     if (!repr::nn::is<ActivationT>(consumers.front())) {
       continue;
     }
     auto relu_node = consumers.front();

     auto relu_outputs = repr::nn::getOutputs(relu_node);
     if (relu_outputs.size() != 1) {
       continue;
     }

     // Check feasibility with application specific logic
     if (!should_fuse(*conv)) {
       continue;
     }

     // Ready to fuse
     auto relu_output = relu_outputs.front();
     auto output_tensor = repr::nn::get<repr::Tensor>(relu_output);
     auto output_node = relu_output;
     auto input_tensor =
         repr::nn::get<repr::Tensor>(repr::nn::getInputs(conv_node).front());

     // Conv cannot be in-place
     if (output_tensor->getName() != input_tensor->getName()) {
       nn->dataFlow.replaceNode(conv_output, relu_output);
       nn->dataFlow.deleteNode(relu_node);
       nn->dataFlow.deleteNode(conv_output);
     } else {
       nn->dataFlow.replaceNode(relu_output, conv_output);
       output_tensor = repr::nn::get<repr::Tensor>(conv_output);
       output_node = conv_output;
       nn->dataFlow.deleteNode(relu_node);
       nn->dataFlow.deleteNode(relu_output);
     }

     // We may have accidentally made the next op in-place
     // In future iterations of transformations this won't be an issue,
     // but current caffe2 predictor usage requires things like
     // external_input and output to be unchanged.
     bool rectify_inplace = false;
     for (auto& consumer : repr::nn::getConsumers(output_node)) {
       for (auto& consumer_output : repr::nn::getOutputs(consumer)) {
         auto co_name = repr::nn::get<repr::Tensor>(consumer_output)->getName();
         if (co_name == output_tensor->getName()) {
           rectify_inplace = true;
         }
       }
     }
     if (rectify_inplace) {
       auto new_output = nn->dataFlow.createNode(
           make_unique<repr::Tensor>(output_tensor->getName() + "_fusion_fix"));
       nn->dataFlow.replaceNode(output_node, new_output);
     }

     // Application specific logic for postprocessing the conv node
     postprocess(conv_node);
   }
 }

 } // namespace opt
 } // namespace caffe2

 #endif // CAFFE2_OPT_FUSION_H_
	/**
	* Copyright (c) 2016-present, Facebook, Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#ifndef CAFFE2_OPT_FUSION_H_
	#define CAFFE2_OPT_FUSION_H_

	#include "caffe2/core/workspace.h"
	#include "nomnigraph/Representations/NeuralNet.h"

	namespace caffe2 {
	namespace opt {

	using namespace nom;

	TORCH_API void fuseConvBN(repr::NNModule* nn, caffe2::Workspace* ws);

	// Generic activation fusion helper.
	//
	// \tparam OperationT The operator to be fused.
	// \tparam ActivationT The activation to be fused.
	// \param nn Neural network module to be modified in place
	// \param should_fuse Given a conv op, check whether we want to fuse it with
	// subsequent relu or not
	// \param postprocess Functor to postprocess the conv node,
	// attaching additional attributes if necessary
	template <typename OperationT, typename ActivationT>
	C10_EXPORT void fuseActivation(
	repr::NNModule* nn,
	std::function<bool(const OperationT& conv)> should_fuse,
	std::function<void(repr::NNGraph::NodeRef conv_node)> postprocess) {
	for (auto node_pair : repr::nn::dataIterator<OperationT>(nn->dataFlow)) {
	repr::NNGraph::NodeRef conv_node;
	OperationT* conv;
	std::tie(conv, conv_node) = node_pair;

	// Check topological feasibility
	auto conv_outputs = repr::nn::getOutputs(conv_node);
	if (conv_outputs.size() != 1) {
	continue;
	}
	auto conv_output = conv_outputs.front();

	auto consumers = repr::nn::getConsumers(conv_output);
	if (consumers.size() != 1) {
	continue;
	}
	if (!repr::nn::is<ActivationT>(consumers.front())) {
	continue;
	}
	auto relu_node = consumers.front();

	auto relu_outputs = repr::nn::getOutputs(relu_node);
	if (relu_outputs.size() != 1) {
	continue;
	}

	// Check feasibility with application specific logic
	if (!should_fuse(*conv)) {
	continue;
	}

	// Ready to fuse
	auto relu_output = relu_outputs.front();
	auto output_tensor = repr::nn::get<repr::Tensor>(relu_output);
	auto output_node = relu_output;
	auto input_tensor =
	repr::nn::get<repr::Tensor>(repr::nn::getInputs(conv_node).front());

	// Conv cannot be in-place
	if (output_tensor->getName() != input_tensor->getName()) {
	nn->dataFlow.replaceNode(conv_output, relu_output);
	nn->dataFlow.deleteNode(relu_node);
	nn->dataFlow.deleteNode(conv_output);
	} else {
	nn->dataFlow.replaceNode(relu_output, conv_output);
	output_tensor = repr::nn::get<repr::Tensor>(conv_output);
	output_node = conv_output;
	nn->dataFlow.deleteNode(relu_node);
	nn->dataFlow.deleteNode(relu_output);
	}

	// We may have accidentally made the next op in-place
	// In future iterations of transformations this won't be an issue,
	// but current caffe2 predictor usage requires things like
	// external_input and output to be unchanged.
	bool rectify_inplace = false;
	for (auto& consumer : repr::nn::getConsumers(output_node)) {
	for (auto& consumer_output : repr::nn::getOutputs(consumer)) {
	auto co_name = repr::nn::get<repr::Tensor>(consumer_output)->getName();
	if (co_name == output_tensor->getName()) {
	rectify_inplace = true;
	}
	}
	}
	if (rectify_inplace) {
	auto new_output = nn->dataFlow.createNode(
	make_unique<repr::Tensor>(output_tensor->getName() + "_fusion_fix"));
	nn->dataFlow.replaceNode(output_node, new_output);
	}

	// Application specific logic for postprocessing the conv node
	postprocess(conv_node);
	}
	}

	} // namespace opt
	} // namespace caffe2

	#endif // CAFFE2_OPT_FUSION_H_