kernels/portable/cpu/op_glu.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #include <executorch/kernels/portable/cpu/util/activation_ops_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
 #include <cinttypes>
 #include <cmath>
 #include <cstdint>

 namespace torch {
 namespace executor {
 namespace native {

 using Tensor = exec_aten::Tensor;
 using ScalarType = exec_aten::ScalarType;

 namespace {

 double exp_overload(double d) {
   return exp(d);
 }

 float exp_overload(float f) {
   return expf(f);
 }

 /**
  * In-place element-wise sigmoid function , i.e., f(x) = 1 / (1 + e^{-x})
  */
 // TODO: T146333648, refactor this as a common helper function
 template <typename CTYPE_OUT>
 void sigmoid_tensor(Tensor& out) {
   CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
   for (size_t i = 0; i < out.numel(); i++) {
     out_data[i] = 1.0 / (1.0 + exp_overload(-out_data[i]));
   }
 }

 /**
  * Element-wise multiplication of the first half of `in` along the specified
  * dimension and `out`, overwriting `out`.
  */
 template <typename CTYPE_IN, typename CTYPE_OUT>
 void mul_tensors(const Tensor& in, int64_t dim, Tensor& out) {
   size_t num_values = static_cast<size_t>(in.size(dim)) / 2;
   size_t dim_length_in = static_cast<size_t>(in.size(dim));
   size_t dim_length_out = static_cast<size_t>(out.size(dim));
   size_t leading_dims = getLeadingDims(in, dim);
   size_t trailing_dims = getTrailingDims(in, dim);

   const CTYPE_IN* input_data_base = in.const_data_ptr<CTYPE_IN>();
   CTYPE_OUT* output_data_base = out.mutable_data_ptr<CTYPE_OUT>();

   for (size_t i = 0; i < leading_dims; i++) {
     const CTYPE_IN* input_data =
         input_data_base + i * dim_length_in * trailing_dims;
     CTYPE_OUT* output_data =
         output_data_base + i * dim_length_out * trailing_dims;
     for (size_t j = 0; j < num_values; j++) {
       for (size_t k = 0; k < trailing_dims; ++k) {
         output_data[k] = static_cast<CTYPE_OUT>(input_data[k]) * output_data[k];
       }
       input_data += trailing_dims;
       output_data += trailing_dims;
     }
   }
 }

 /**
  * Slice the tensor in the given dim, from start to end, assume tensor in and
  * out have same shape and dtype, the dim is a non-negative number and start,
  * end are valid non-negative number
  */
 template <typename CTYPE_IN, typename CTYPE_OUT>
 void slice_tensor(
     const Tensor& in,
     int64_t dim,
     int64_t start,
     int64_t end,
     Tensor& out) {
   size_t num_values = static_cast<size_t>(end - start);
   size_t dim_length_in = static_cast<size_t>(in.size(dim));
   size_t dim_length_out = static_cast<size_t>(out.size(dim));
   size_t non_negative_start = static_cast<size_t>(start);
   size_t leading_dims = getLeadingDims(in, dim);
   size_t trailing_dims = getTrailingDims(in, dim);

   const CTYPE_IN* input_data_base = in.const_data_ptr<CTYPE_IN>();
   CTYPE_OUT* output_data_base = out.mutable_data_ptr<CTYPE_OUT>();

   for (size_t i = 0; i < leading_dims; i++) {
     const CTYPE_IN* input_data = input_data_base +
         (i * dim_length_in + non_negative_start) * trailing_dims;
     CTYPE_OUT* output_data =
         output_data_base + i * dim_length_out * trailing_dims;
     for (size_t j = 0; j < num_values; j++) {
       for (size_t k = 0; k < trailing_dims; ++k) {
         output_data[k] = static_cast<CTYPE_OUT>(input_data[k]);
       }
       input_data += trailing_dims;
       output_data += trailing_dims;
     }
   }
 }

 /**
  * Applies the gated linear unit function
  *
  * Based on the characteristic of glu function, the output should be in
  * floating point type (Float and Double). The input and output tensors don't
  * necessarily need to have the same type. Here are the assertions:
  *  1. The input shall be in any float types (Float, Double)
  *  2. The output shall be in float types (Float, Double)
  */
 template <typename CTYPE_IN, typename CTYPE_OUT>
 Tensor& glu_out_tensor(const Tensor& self, int64_t dim, Tensor& out) {
   const auto self_size = self.size(dim);
   slice_tensor<CTYPE_IN, CTYPE_OUT>(self, dim, self_size / 2, self_size, out);
   sigmoid_tensor<CTYPE_OUT>(out);
   mul_tensors<CTYPE_IN, CTYPE_OUT>(self, dim, out);
   return out;
 }
 } // namespace

 /**
  * Applies the gated linear unit function
  *
  * Based on the characteristic of glu function, the output should be in
  * floating point type (Float and Double). The input and output tensors don't
  * necessarily need to have the same type. Here are the assertions:
  *  1. The input shall be in any float types (Float, Double)
  *  2. The output shall be in float types (Float, Double)
  */
 Tensor&
 glu_out(RuntimeContext& ctx, const Tensor& self, int64_t dim, Tensor& out) {
   (void)ctx;

   ET_KERNEL_CHECK(
       ctx, resize_glu_out(self, dim, out) == Error::Ok, InvalidArgument, out);

   ET_KERNEL_CHECK(
       ctx, tensors_have_same_dim_order(self, out), InvalidArgument, out);

   ET_KERNEL_CHECK(ctx, check_glu_args(self, dim, out), InvalidArgument, out);

   const size_t non_negative_dim = dim < 0 ? dim + self.dim() : dim;
   const auto in_dtype = self.scalar_type();

   ET_SWITCH_FLOAT_TYPES(in_dtype, ctx, "glu", CTYPE_IN, [&]() {
     if (out.scalar_type() == ScalarType::Float) {
       glu_out_tensor<CTYPE_IN, float>(self, non_negative_dim, out);
     } else {
       glu_out_tensor<CTYPE_IN, double>(self, non_negative_dim, out);
     }
   });

   return out;
 }

 } // namespace native
 } // namespace executor
 } // namespace torch
	/*
	* Copyright (c) Meta Platforms, Inc. and affiliates.
	* All rights reserved.
	*
	* This source code is licensed under the BSD-style license found in the
	* LICENSE file in the root directory of this source tree.
	*/

	#include <executorch/kernels/portable/cpu/util/activation_ops_util.h>
	#include <executorch/runtime/kernel/kernel_includes.h>
	#include <executorch/runtime/platform/assert.h>
	#include <cinttypes>
	#include <cmath>
	#include <cstdint>

	namespace torch {
	namespace executor {
	namespace native {

	using Tensor = exec_aten::Tensor;
	using ScalarType = exec_aten::ScalarType;

	namespace {

	double exp_overload(double d) {
	return exp(d);
	}

	float exp_overload(float f) {
	return expf(f);
	}

	/**
	* In-place element-wise sigmoid function , i.e., f(x) = 1 / (1 + e^{-x})
	*/
	// TODO: T146333648, refactor this as a common helper function
	template <typename CTYPE_OUT>
	void sigmoid_tensor(Tensor& out) {
	CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
	for (size_t i = 0; i < out.numel(); i++) {
	out_data[i] = 1.0 / (1.0 + exp_overload(-out_data[i]));
	}
	}

	/**
	* Element-wise multiplication of the first half of `in` along the specified
	* dimension and `out`, overwriting `out`.
	*/
	template <typename CTYPE_IN, typename CTYPE_OUT>
	void mul_tensors(const Tensor& in, int64_t dim, Tensor& out) {
	size_t num_values = static_cast<size_t>(in.size(dim)) / 2;
	size_t dim_length_in = static_cast<size_t>(in.size(dim));
	size_t dim_length_out = static_cast<size_t>(out.size(dim));
	size_t leading_dims = getLeadingDims(in, dim);
	size_t trailing_dims = getTrailingDims(in, dim);

	const CTYPE_IN* input_data_base = in.const_data_ptr<CTYPE_IN>();
	CTYPE_OUT* output_data_base = out.mutable_data_ptr<CTYPE_OUT>();

	for (size_t i = 0; i < leading_dims; i++) {
	const CTYPE_IN* input_data =
	input_data_base + i * dim_length_in * trailing_dims;
	CTYPE_OUT* output_data =
	output_data_base + i * dim_length_out * trailing_dims;
	for (size_t j = 0; j < num_values; j++) {
	for (size_t k = 0; k < trailing_dims; ++k) {
	output_data[k] = static_cast<CTYPE_OUT>(input_data[k]) * output_data[k];
	}
	input_data += trailing_dims;
	output_data += trailing_dims;
	}
	}
	}

	/**
	* Slice the tensor in the given dim, from start to end, assume tensor in and
	* out have same shape and dtype, the dim is a non-negative number and start,
	* end are valid non-negative number
	*/
	template <typename CTYPE_IN, typename CTYPE_OUT>
	void slice_tensor(
	const Tensor& in,
	int64_t dim,
	int64_t start,
	int64_t end,
	Tensor& out) {
	size_t num_values = static_cast<size_t>(end - start);
	size_t dim_length_in = static_cast<size_t>(in.size(dim));
	size_t dim_length_out = static_cast<size_t>(out.size(dim));
	size_t non_negative_start = static_cast<size_t>(start);
	size_t leading_dims = getLeadingDims(in, dim);
	size_t trailing_dims = getTrailingDims(in, dim);

	const CTYPE_IN* input_data_base = in.const_data_ptr<CTYPE_IN>();
	CTYPE_OUT* output_data_base = out.mutable_data_ptr<CTYPE_OUT>();

	for (size_t i = 0; i < leading_dims; i++) {
	const CTYPE_IN* input_data = input_data_base +
	(i * dim_length_in + non_negative_start) * trailing_dims;
	CTYPE_OUT* output_data =
	output_data_base + i * dim_length_out * trailing_dims;
	for (size_t j = 0; j < num_values; j++) {
	for (size_t k = 0; k < trailing_dims; ++k) {
	output_data[k] = static_cast<CTYPE_OUT>(input_data[k]);
	}
	input_data += trailing_dims;
	output_data += trailing_dims;
	}
	}
	}

	/**
	* Applies the gated linear unit function
	*
	* Based on the characteristic of glu function, the output should be in
	* floating point type (Float and Double). The input and output tensors don't
	* necessarily need to have the same type. Here are the assertions:
	* 1. The input shall be in any float types (Float, Double)
	* 2. The output shall be in float types (Float, Double)
	*/
	template <typename CTYPE_IN, typename CTYPE_OUT>
	Tensor& glu_out_tensor(const Tensor& self, int64_t dim, Tensor& out) {
	const auto self_size = self.size(dim);
	slice_tensor<CTYPE_IN, CTYPE_OUT>(self, dim, self_size / 2, self_size, out);
	sigmoid_tensor<CTYPE_OUT>(out);
	mul_tensors<CTYPE_IN, CTYPE_OUT>(self, dim, out);
	return out;
	}
	} // namespace

	/**
	* Applies the gated linear unit function
	*
	* Based on the characteristic of glu function, the output should be in
	* floating point type (Float and Double). The input and output tensors don't
	* necessarily need to have the same type. Here are the assertions:
	* 1. The input shall be in any float types (Float, Double)
	* 2. The output shall be in float types (Float, Double)
	*/
	Tensor&
	glu_out(RuntimeContext& ctx, const Tensor& self, int64_t dim, Tensor& out) {
	(void)ctx;

	ET_KERNEL_CHECK(
	ctx, resize_glu_out(self, dim, out) == Error::Ok, InvalidArgument, out);

	ET_KERNEL_CHECK(
	ctx, tensors_have_same_dim_order(self, out), InvalidArgument, out);

	ET_KERNEL_CHECK(ctx, check_glu_args(self, dim, out), InvalidArgument, out);

	const size_t non_negative_dim = dim < 0 ? dim + self.dim() : dim;
	const auto in_dtype = self.scalar_type();

	ET_SWITCH_FLOAT_TYPES(in_dtype, ctx, "glu", CTYPE_IN, [&]() {
	if (out.scalar_type() == ScalarType::Float) {
	glu_out_tensor<CTYPE_IN, float>(self, non_negative_dim, out);
	} else {
	glu_out_tensor<CTYPE_IN, double>(self, non_negative_dim, out);
	}
	});

	return out;
	}

	} // namespace native
	} // namespace executor
	} // namespace torch