backends/xnnpack/runtime/XNNExecutor.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #include <executorch/backends/xnnpack/runtime/XNNExecutor.h>

 namespace executorch {
 namespace backends {
 namespace xnnpack {
 namespace delegate {

 using executorch::aten::ScalarType;
 using executorch::aten::SizesType;
 using executorch::aten::Tensor;
 using executorch::runtime::BackendExecutionContext;
 using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::is_contiguous_dim_order;
 using executorch::runtime::kTensorDimensionLimit;

 /**
  * Initializes the XNNExecutor with the runtime and given number of
  * inputs/outputs externals_ is resized to the total number of inputs and
  * outputs
  */
 ET_NODISCARD Error XNNExecutor::initialize(
     xnn_runtime_t runtime,
     std::vector<uint32_t>&& input_ids,
     std::vector<uint32_t>&& output_ids) {
   runtime_ = std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)>(
       runtime, xnn_delete_runtime);

   auto error = profiler_.initialize(runtime);
   if (error != Error::Ok) {
     ET_LOG(
         Error,
         "Failed to start profiling: %u.",
         static_cast<unsigned int>(error));
   }

   // Initialize the external values for inputs and outputs
   // mapping the executorch arg idx to external IDs
   input_ids_ = std::move(input_ids);
   std::sort(input_ids_.begin(), input_ids_.end());

   output_ids_ = std::move(output_ids);
   std::sort(output_ids_.begin(), output_ids_.end());

   externals_.resize(input_ids_.size() + output_ids_.size());

   return Error::Ok;
 }

 /**
  * Prepares the args for XNNPACK Runtime.
  *
  * Creates an array of xnn_externals_values from the EValues passed in.
  * Reshapes all the external input tensors, in case any input shapes have
  * changed. The reshapes the entire runtime, propagating shape information
  * through the runtime.
  *
  * Note: the external ids given to the external tensors in the XNNPACK
  * runtime correspond to their index in the list of arg passed into
  * delegate->execute()
  */
 ET_NODISCARD Error XNNExecutor::prepare_args(EValue** args) {
   // Create xnn_externals_value from evalue args
   xnn_status status;
   for (uint32_t i = 0; i < externals_.size(); ++i) {
     if (i < input_ids_.size()) {
       externals_[i].id = input_ids_[i];
     } else {
       externals_[i].id = output_ids_[i - input_ids_.size()];
     }
     uint32_t ext_id = externals_[i].id;

     ET_CHECK_OR_RETURN_ERROR(
         args[ext_id]->isTensor(),
         InvalidArgument,
         "Expected argument to delegate at index %u to be a Tensor, but got %" PRIu32,
         i,
         static_cast<uint32_t>(args[ext_id]->tag));

     Tensor* tensor = &args[ext_id]->toTensor();
     externals_[i].data = tensor->mutable_data_ptr<float>();

     // Reshape runtime inputs
     if (i < input_ids_.size()) {
       size_t num_dims = tensor->dim();
       ET_CHECK_OR_RETURN_ERROR(
           is_contiguous_dim_order(tensor->dim_order().data(), tensor->dim()),
           Internal,
           "Expecting default dim_order but got a non default dim_order tensor for external input %u",
           i);
       size_t dims[XNN_MAX_TENSOR_DIMS];
       ET_CHECK_OR_RETURN_ERROR(
           num_dims <= XNN_MAX_TENSOR_DIMS,
           InvalidArgument,
           "XNNPACK backend accepts tensors with at most %d dims, but got %zu",
           XNN_MAX_TENSOR_DIMS,
           num_dims);
       for (int d = 0; d < num_dims; ++d) {
         dims[d] = tensor->size(d);
       }
       status =
           xnn_reshape_external_value(runtime_.get(), ext_id, num_dims, dims);
       ET_CHECK_OR_RETURN_ERROR(
           status == xnn_status_success,
           Internal,
           "Internal Error: Reshape Input Tensor Failed with code: %s",
           xnn_status_to_string(status));
     }
   }
   // // Propagate Input Shape and Memory Plan for increased allocation
   status = xnn_reshape_runtime(runtime_.get());

   ET_CHECK_OR_RETURN_ERROR(
       status == xnn_status_success,
       Internal,
       "Internal Error: Propagating input shapes failed with code: %s",
       xnn_status_to_string(status));

   return Error::Ok;
 }

 /**
  * Runs the XNNPACK Runtime.
  *
  * We first setup the runtime by feeding the externals_ to runtime setup.
  * After which we then execute the runtime through invoke_runtime.
  */
 ET_NODISCARD Error XNNExecutor::forward(BackendExecutionContext& context) {
   ET_CHECK_OR_RETURN_ERROR(
       runtime_ != nullptr,
       Internal,
       "XNNPACK Delegate did not compile correctly");

   xnn_status status = xnn_setup_runtime_v2(
       runtime_.get(), externals_.size(), externals_.data());

   ET_CHECK_OR_RETURN_ERROR(
       status == xnn_status_success,
       Internal,
       "Internal Error: Setting up the runtime failed with code: %s",
       xnn_status_to_string(status));

   auto error = profiler_.start(context.event_tracer());
   if (error != Error::Ok) {
     ET_LOG(
         Error,
         "Failed to start profiling: %u.",
         static_cast<unsigned int>(error));
   }

   status = xnn_invoke_runtime(runtime_.get());

   error = profiler_.end();
   if (error != Error::Ok) {
     ET_LOG(
         Error,
         "Failed to end profiling: %u.",
         static_cast<unsigned int>(error));
   }

   ET_CHECK_OR_RETURN_ERROR(
       status == xnn_status_success,
       Internal,
       "XNN Runtime invoke failed with code: %s",
       xnn_status_to_string(status));

   return Error::Ok;
 }

 /**
  * Prepares the outputs for ExecuTorch
  *
  * Resizes the output tensors based on the output shapes returned by
  * the xnnpack runtime.
  *
  * Note: For arg_max pooling, we recast the output index tensor. Since
  * XNNPACK gives the index tensor to us as int32, we need to convert it
  * back to int64 for ExecuTorch.
  */
 ET_NODISCARD Error XNNExecutor::resize_outputs(EValue** args) const {
   size_t output_idx_start = input_ids_.size();
   for (size_t i = output_idx_start; i < externals_.size(); ++i) {
     uint32_t ext_id = externals_[i].id;
     Tensor* out_tensor = &args[ext_id]->toTensor();

     size_t num_dim;
     size_t dims[XNN_MAX_TENSOR_DIMS];

     // Fetch the updated output shapes from xnnpack runtime
     xnn_status status =
         xnn_get_external_value_shape(runtime_.get(), ext_id, &num_dim, dims);

     ET_CHECK_OR_RETURN_ERROR(
         status == xnn_status_success,
         Internal,
         "Internal Error: Failed to retrieve graph output shapes");

     // Convert new output shape into SizesType
     SizesType expected_output_size[kTensorDimensionLimit];
     for (size_t d = 0; d < num_dim; ++d) {
       expected_output_size[d] = static_cast<SizesType>(dims[d]);
     }

     executorch::aten::ArrayRef<SizesType> output_size{
         expected_output_size, static_cast<size_t>(num_dim)};

     ET_LOG(Debug, "Resizing output tensor to a new shape");
     Error err = resize_tensor(*out_tensor, output_size);
     if (err != Error::Ok) {
       ET_LOG(Error, "Failed to resize output tensor for XNNExecutor");
       return err;
     }

     // Output datatype is int64. However, XNNPACK doesn't support
     // int64. This means that the data was put into this tensor
     // by XNNPACK as int32 and needs to be copied to int64 form
     if (out_tensor->scalar_type() == ScalarType::Long) {
       int64_t* data_64 = out_tensor->mutable_data_ptr<int64_t>();
       const int32_t* data_32 = out_tensor->const_data_ptr<int32_t>();
       for (size_t j = out_tensor->numel() - 1; j >= 0; --j) {
         data_64[j] = data_32[j];
       }
     }
   }

   return Error::Ok;
 }

 } // namespace delegate
 } // namespace xnnpack
 } // namespace backends
 } // namespace executorch
	/*
	* Copyright (c) Meta Platforms, Inc. and affiliates.
	* All rights reserved.
	*
	* This source code is licensed under the BSD-style license found in the
	* LICENSE file in the root directory of this source tree.
	*/

	#include <executorch/backends/xnnpack/runtime/XNNExecutor.h>

	namespace executorch {
	namespace backends {
	namespace xnnpack {
	namespace delegate {

	using executorch::aten::ScalarType;
	using executorch::aten::SizesType;
	using executorch::aten::Tensor;
	using executorch::runtime::BackendExecutionContext;
	using executorch::runtime::Error;
	using executorch::runtime::EValue;
	using executorch::runtime::is_contiguous_dim_order;
	using executorch::runtime::kTensorDimensionLimit;

	/**
	* Initializes the XNNExecutor with the runtime and given number of
	* inputs/outputs externals_ is resized to the total number of inputs and
	* outputs
	*/
	ET_NODISCARD Error XNNExecutor::initialize(
	xnn_runtime_t runtime,
	std::vector<uint32_t>&& input_ids,
	std::vector<uint32_t>&& output_ids) {
	runtime_ = std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)>(
	runtime, xnn_delete_runtime);

	auto error = profiler_.initialize(runtime);
	if (error != Error::Ok) {
	ET_LOG(
	Error,
	"Failed to start profiling: %u.",
	static_cast<unsigned int>(error));
	}

	// Initialize the external values for inputs and outputs
	// mapping the executorch arg idx to external IDs
	input_ids_ = std::move(input_ids);
	std::sort(input_ids_.begin(), input_ids_.end());

	output_ids_ = std::move(output_ids);
	std::sort(output_ids_.begin(), output_ids_.end());

	externals_.resize(input_ids_.size() + output_ids_.size());

	return Error::Ok;
	}

	/**
	* Prepares the args for XNNPACK Runtime.
	*
	* Creates an array of xnn_externals_values from the EValues passed in.
	* Reshapes all the external input tensors, in case any input shapes have
	* changed. The reshapes the entire runtime, propagating shape information
	* through the runtime.
	*
	* Note: the external ids given to the external tensors in the XNNPACK
	* runtime correspond to their index in the list of arg passed into
	* delegate->execute()
	*/
	ET_NODISCARD Error XNNExecutor::prepare_args(EValue** args) {
	// Create xnn_externals_value from evalue args
	xnn_status status;
	for (uint32_t i = 0; i < externals_.size(); ++i) {
	if (i < input_ids_.size()) {
	externals_[i].id = input_ids_[i];
	} else {
	externals_[i].id = output_ids_[i - input_ids_.size()];
	}
	uint32_t ext_id = externals_[i].id;

	ET_CHECK_OR_RETURN_ERROR(
	args[ext_id]->isTensor(),
	InvalidArgument,
	"Expected argument to delegate at index %u to be a Tensor, but got %" PRIu32,
	i,
	static_cast<uint32_t>(args[ext_id]->tag));

	Tensor* tensor = &args[ext_id]->toTensor();
	externals_[i].data = tensor->mutable_data_ptr<float>();

	// Reshape runtime inputs
	if (i < input_ids_.size()) {
	size_t num_dims = tensor->dim();
	ET_CHECK_OR_RETURN_ERROR(
	is_contiguous_dim_order(tensor->dim_order().data(), tensor->dim()),
	Internal,
	"Expecting default dim_order but got a non default dim_order tensor for external input %u",
	i);
	size_t dims[XNN_MAX_TENSOR_DIMS];
	ET_CHECK_OR_RETURN_ERROR(
	num_dims <= XNN_MAX_TENSOR_DIMS,
	InvalidArgument,
	"XNNPACK backend accepts tensors with at most %d dims, but got %zu",
	XNN_MAX_TENSOR_DIMS,
	num_dims);
	for (int d = 0; d < num_dims; ++d) {
	dims[d] = tensor->size(d);
	}
	status =
	xnn_reshape_external_value(runtime_.get(), ext_id, num_dims, dims);
	ET_CHECK_OR_RETURN_ERROR(
	status == xnn_status_success,
	Internal,
	"Internal Error: Reshape Input Tensor Failed with code: %s",
	xnn_status_to_string(status));
	}
	}
	// // Propagate Input Shape and Memory Plan for increased allocation
	status = xnn_reshape_runtime(runtime_.get());

	ET_CHECK_OR_RETURN_ERROR(
	status == xnn_status_success,
	Internal,
	"Internal Error: Propagating input shapes failed with code: %s",
	xnn_status_to_string(status));

	return Error::Ok;
	}

	/**
	* Runs the XNNPACK Runtime.
	*
	* We first setup the runtime by feeding the externals_ to runtime setup.
	* After which we then execute the runtime through invoke_runtime.
	*/
	ET_NODISCARD Error XNNExecutor::forward(BackendExecutionContext& context) {
	ET_CHECK_OR_RETURN_ERROR(
	runtime_ != nullptr,
	Internal,
	"XNNPACK Delegate did not compile correctly");

	xnn_status status = xnn_setup_runtime_v2(
	runtime_.get(), externals_.size(), externals_.data());

	ET_CHECK_OR_RETURN_ERROR(
	status == xnn_status_success,
	Internal,
	"Internal Error: Setting up the runtime failed with code: %s",
	xnn_status_to_string(status));

	auto error = profiler_.start(context.event_tracer());
	if (error != Error::Ok) {
	ET_LOG(
	Error,
	"Failed to start profiling: %u.",
	static_cast<unsigned int>(error));
	}

	status = xnn_invoke_runtime(runtime_.get());

	error = profiler_.end();
	if (error != Error::Ok) {
	ET_LOG(
	Error,
	"Failed to end profiling: %u.",
	static_cast<unsigned int>(error));
	}

	ET_CHECK_OR_RETURN_ERROR(
	status == xnn_status_success,
	Internal,
	"XNN Runtime invoke failed with code: %s",
	xnn_status_to_string(status));

	return Error::Ok;
	}

	/**
	* Prepares the outputs for ExecuTorch
	*
	* Resizes the output tensors based on the output shapes returned by
	* the xnnpack runtime.
	*
	* Note: For arg_max pooling, we recast the output index tensor. Since
	* XNNPACK gives the index tensor to us as int32, we need to convert it
	* back to int64 for ExecuTorch.
	*/
	ET_NODISCARD Error XNNExecutor::resize_outputs(EValue** args) const {
	size_t output_idx_start = input_ids_.size();
	for (size_t i = output_idx_start; i < externals_.size(); ++i) {
	uint32_t ext_id = externals_[i].id;
	Tensor* out_tensor = &args[ext_id]->toTensor();

	size_t num_dim;
	size_t dims[XNN_MAX_TENSOR_DIMS];

	// Fetch the updated output shapes from xnnpack runtime
	xnn_status status =
	xnn_get_external_value_shape(runtime_.get(), ext_id, &num_dim, dims);

	ET_CHECK_OR_RETURN_ERROR(
	status == xnn_status_success,
	Internal,
	"Internal Error: Failed to retrieve graph output shapes");

	// Convert new output shape into SizesType
	SizesType expected_output_size[kTensorDimensionLimit];
	for (size_t d = 0; d < num_dim; ++d) {
	expected_output_size[d] = static_cast<SizesType>(dims[d]);
	}

	executorch::aten::ArrayRef<SizesType> output_size{
	expected_output_size, static_cast<size_t>(num_dim)};

	ET_LOG(Debug, "Resizing output tensor to a new shape");
	Error err = resize_tensor(*out_tensor, output_size);
	if (err != Error::Ok) {
	ET_LOG(Error, "Failed to resize output tensor for XNNExecutor");
	return err;
	}

	// Output datatype is int64. However, XNNPACK doesn't support
	// int64. This means that the data was put into this tensor
	// by XNNPACK as int32 and needs to be copied to int64 form
	if (out_tensor->scalar_type() == ScalarType::Long) {
	int64_t* data_64 = out_tensor->mutable_data_ptr<int64_t>();
	const int32_t* data_32 = out_tensor->const_data_ptr<int32_t>();
	for (size_t j = out_tensor->numel() - 1; j >= 0; --j) {
	data_64[j] = data_32[j];
	}
	}
	}

	return Error::Ok;
	}

	} // namespace delegate
	} // namespace xnnpack
	} // namespace backends
	} // namespace executorch