arm_compute/runtime/IScheduler.h - platform/external/ARMComputeLibrary - Git at Google

 /*
  * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #ifndef ARM_COMPUTE_ISCHEDULER_H
 #define ARM_COMPUTE_ISCHEDULER_H

 #include "arm_compute/core/CPP/CPPTypes.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/experimental/Types.h"

 #include <functional>
 #include <limits>

 namespace arm_compute
 {
 class ICPPKernel;
 class ITensor;
 class Window;

 /** Scheduler interface to run kernels */
 class IScheduler
 {
 public:
     /** Strategies available to split a workload */
     enum class StrategyHint
     {
         STATIC,  /**< Split the workload evenly among the threads */
         DYNAMIC, /**< Split the workload dynamically using a bucket system */
     };

     /** Function to be used and map a given thread id to a logical core id
      *
      * Mapping function expects the thread index and total number of cores as input,
      * and returns the logical core index to bind against
      */
     using BindFunc = std::function<int(int, int)>;

     /** When arm_compute::ISchedular::Hints::_split_dimension is initialized with this value
      * then the schedular is free to break down the problem space over as many dimensions
      * as it wishes
      */
     static constexpr unsigned int split_dimensions_all = std::numeric_limits<unsigned>::max();

     /** Scheduler hints
      *
      * Collection of preferences set by the function regarding how to split a given workload
      */
     class Hints
     {
     public:
         /** Constructor
          *
          * @param[in] split_dimension Dimension along which to split the kernel's execution window.
          * @param[in] strategy        (Optional) Split strategy.
          * @param[in] threshold       (Optional) Dynamic scheduling capping threshold.
          */
         Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC, int threshold = 0)
             : _split_dimension(split_dimension), _strategy(strategy), _threshold(threshold)
         {
         }
         /** Set the split_dimension hint
          *
          * @param[in] split_dimension Dimension along which to split the kernel's execution window.
          *
          * @return the Hints object
          */
         Hints &set_split_dimension(unsigned int split_dimension)
         {
             _split_dimension = split_dimension;
             return *this;
         }
         /** Return the prefered split dimension
          *
          * @return The split dimension
          */
         unsigned int split_dimension() const
         {
             return _split_dimension;
         }

         /** Set the strategy hint
          *
          * @param[in] strategy Prefered strategy to use to split the workload
          *
          * @return the Hints object
          */
         Hints &set_strategy(StrategyHint strategy)
         {
             _strategy = strategy;
             return *this;
         }
         /** Return the prefered strategy to use to split workload.
          *
          * @return The strategy
          */
         StrategyHint strategy() const
         {
             return _strategy;
         }
         /** Return the granule capping threshold to be used by dynamic scheduling.
          *
          * @return The capping threshold
          */
         int threshold() const
         {
             return _threshold;
         }

     private:
         unsigned int _split_dimension{};
         StrategyHint _strategy{};
         int          _threshold{};
     };
     /** Signature for the workloads to execute */
     using Workload = std::function<void(const ThreadInfo &)>;
     /** Default constructor. */
     IScheduler();

     /** Destructor. */
     virtual ~IScheduler() = default;

     /** Sets the number of threads the scheduler will use to run the kernels.
      *
      * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified.
      */
     virtual void set_num_threads(unsigned int num_threads) = 0;

     /** Sets the number of threads the scheduler will use to run the kernels but also using a binding function to pin the threads to given logical cores
      *
      * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified.
      * @param[in] func        Binding function to use.
      */
     virtual void set_num_threads_with_affinity(unsigned int num_threads, BindFunc func);

     /** Returns the number of threads that the SingleThreadScheduler has in its pool.
      *
      * @return Number of threads available in SingleThreadScheduler.
      */
     virtual unsigned int num_threads() const = 0;

     /** Runs the kernel in the same thread as the caller synchronously.
      *
      * @param[in] kernel Kernel to execute.
      * @param[in] hints  Hints for the scheduler.
      */
     virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0;

     /** Runs the kernel in the same thread as the caller synchronously.
      *
      * @param[in] kernel  Kernel to execute.
      * @param[in] hints   Hints for the scheduler.
      * @param[in] window  Window to use for kernel execution.
      * @param[in] tensors Vector containing the tensors to operate on.
      */
     virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) = 0;

     /** Execute all the passed workloads
      *
      * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
      *
      * @param[in] workloads Array of workloads to run
      * @param[in] tag       String that can be used by profiling tools to identify the workloads run by the scheduler (Can be null).
      */
     virtual void run_tagged_workloads(std::vector<Workload> &workloads, const char *tag);

     /** Get CPU info.
      *
      * @return CPU info.
      */
     CPUInfo &cpu_info();
     /** Get a hint for the best possible number of execution threads
      *
      * @warning In case we can't work out the best number of threads,
      *          std::thread::hardware_concurrency() is returned else 1 in case of bare metal builds
      *
      * @return Best possible number of execution threads to use
      */
     unsigned int num_threads_hint() const;

 protected:
     /** Execute all the passed workloads
      *
      * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
      *
      * @param[in] workloads Array of workloads to run
      */
     virtual void run_workloads(std::vector<Workload> &workloads) = 0;

     /** Common scheduler logic to execute the given kernel
      *
      * @param[in] kernel  Kernel to execute.
      * @param[in] hints   Hints for the scheduler.
      * @param[in] window  Window to use for kernel execution.
      * @param[in] tensors Vector containing the tensors to operate on.
      */
     void schedule_common(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors);

     /** Adjust the number of windows to the optimize performance
      * (used for small workloads where smaller number of threads might improve the performance)
      *
      * @param[in] window           Window to use for kernel execution
      * @param[in] split_dimension  Axis of dimension to split
      * @param[in] init_num_windows Initial number of sub-windows to split
      * @param[in] kernel           Kernel to execute
      * @param[in] cpu_info         The CPU platform used to create the context.
      *
      * @return Adjusted number of windows
      */
     std::size_t adjust_num_of_windows(const Window &window, std::size_t split_dimension, std::size_t init_num_windows, const ICPPKernel &kernel, const CPUInfo &cpu_info);

 private:
     unsigned int _num_threads_hint = {};
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_ISCHEDULER_H */
	/*
	* Copyright (c) 2017-2021 Arm Limited.
	*
	* SPDX-License-Identifier: MIT
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to
	* deal in the Software without restriction, including without limitation the
	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	* sell copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in all
	* copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/
	#ifndef ARM_COMPUTE_ISCHEDULER_H
	#define ARM_COMPUTE_ISCHEDULER_H

	#include "arm_compute/core/CPP/CPPTypes.h"
	#include "arm_compute/core/Types.h"
	#include "arm_compute/core/experimental/Types.h"

	#include <functional>
	#include <limits>

	namespace arm_compute
	{
	class ICPPKernel;
	class ITensor;
	class Window;

	/** Scheduler interface to run kernels */
	class IScheduler
	{
	public:
	/** Strategies available to split a workload */
	enum class StrategyHint
	{
	STATIC, /*< Split the workload evenly among the threads /
	DYNAMIC, /*< Split the workload dynamically using a bucket system /
	};

	/** Function to be used and map a given thread id to a logical core id
	*
	* Mapping function expects the thread index and total number of cores as input,
	* and returns the logical core index to bind against
	*/
	using BindFunc = std::function<int(int, int)>;

	/** When arm_compute::ISchedular::Hints::_split_dimension is initialized with this value
	* then the schedular is free to break down the problem space over as many dimensions
	* as it wishes
	*/
	static constexpr unsigned int split_dimensions_all = std::numeric_limits<unsigned>::max();

	/** Scheduler hints
	*
	* Collection of preferences set by the function regarding how to split a given workload
	*/
	class Hints
	{
	public:
	/** Constructor
	*
	* @param[in] split_dimension Dimension along which to split the kernel's execution window.
	* @param[in] strategy (Optional) Split strategy.
	* @param[in] threshold (Optional) Dynamic scheduling capping threshold.
	*/
	Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC, int threshold = 0)
	: _split_dimension(split_dimension), _strategy(strategy), _threshold(threshold)
	{
	}
	/** Set the split_dimension hint
	*
	* @param[in] split_dimension Dimension along which to split the kernel's execution window.
	*
	* @return the Hints object
	*/
	Hints &set_split_dimension(unsigned int split_dimension)
	{
	_split_dimension = split_dimension;
	return *this;
	}
	/** Return the prefered split dimension
	*
	* @return The split dimension
	*/
	unsigned int split_dimension() const
	{
	return _split_dimension;
	}

	/** Set the strategy hint
	*
	* @param[in] strategy Prefered strategy to use to split the workload
	*
	* @return the Hints object
	*/
	Hints &set_strategy(StrategyHint strategy)
	{
	_strategy = strategy;
	return *this;
	}
	/** Return the prefered strategy to use to split workload.
	*
	* @return The strategy
	*/
	StrategyHint strategy() const
	{
	return _strategy;
	}
	/** Return the granule capping threshold to be used by dynamic scheduling.
	*
	* @return The capping threshold
	*/
	int threshold() const
	{
	return _threshold;
	}

	private:
	unsigned int _split_dimension{};
	StrategyHint _strategy{};
	int _threshold{};
	};
	/** Signature for the workloads to execute */
	using Workload = std::function<void(const ThreadInfo &)>;
	/** Default constructor. */
	IScheduler();

	/** Destructor. */
	virtual ~IScheduler() = default;

	/** Sets the number of threads the scheduler will use to run the kernels.
	*
	* @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified.
	*/
	virtual void set_num_threads(unsigned int num_threads) = 0;

	/** Sets the number of threads the scheduler will use to run the kernels but also using a binding function to pin the threads to given logical cores
	*
	* @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified.
	* @param[in] func Binding function to use.
	*/
	virtual void set_num_threads_with_affinity(unsigned int num_threads, BindFunc func);

	/** Returns the number of threads that the SingleThreadScheduler has in its pool.
	*
	* @return Number of threads available in SingleThreadScheduler.
	*/
	virtual unsigned int num_threads() const = 0;

	/** Runs the kernel in the same thread as the caller synchronously.
	*
	* @param[in] kernel Kernel to execute.
	* @param[in] hints Hints for the scheduler.
	*/
	virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0;

	/** Runs the kernel in the same thread as the caller synchronously.
	*
	* @param[in] kernel Kernel to execute.
	* @param[in] hints Hints for the scheduler.
	* @param[in] window Window to use for kernel execution.
	* @param[in] tensors Vector containing the tensors to operate on.
	*/
	virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) = 0;

	/** Execute all the passed workloads
	*
	* @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
	*
	* @param[in] workloads Array of workloads to run
	* @param[in] tag String that can be used by profiling tools to identify the workloads run by the scheduler (Can be null).
	*/
	virtual void run_tagged_workloads(std::vector<Workload> &workloads, const char *tag);

	/** Get CPU info.
	*
	* @return CPU info.
	*/
	CPUInfo &cpu_info();
	/** Get a hint for the best possible number of execution threads
	*
	* @warning In case we can't work out the best number of threads,
	* std::thread::hardware_concurrency() is returned else 1 in case of bare metal builds
	*
	* @return Best possible number of execution threads to use
	*/
	unsigned int num_threads_hint() const;

	protected:
	/** Execute all the passed workloads
	*
	* @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
	*
	* @param[in] workloads Array of workloads to run
	*/
	virtual void run_workloads(std::vector<Workload> &workloads) = 0;

	/** Common scheduler logic to execute the given kernel
	*
	* @param[in] kernel Kernel to execute.
	* @param[in] hints Hints for the scheduler.
	* @param[in] window Window to use for kernel execution.
	* @param[in] tensors Vector containing the tensors to operate on.
	*/
	void schedule_common(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors);

	/** Adjust the number of windows to the optimize performance
	* (used for small workloads where smaller number of threads might improve the performance)
	*
	* @param[in] window Window to use for kernel execution
	* @param[in] split_dimension Axis of dimension to split
	* @param[in] init_num_windows Initial number of sub-windows to split
	* @param[in] kernel Kernel to execute
	* @param[in] cpu_info The CPU platform used to create the context.
	*
	* @return Adjusted number of windows
	*/
	std::size_t adjust_num_of_windows(const Window &window, std::size_t split_dimension, std::size_t init_num_windows, const ICPPKernel &kernel, const CPUInfo &cpu_info);

	private:
	unsigned int _num_threads_hint = {};
	};
	} // namespace arm_compute
	#endif /* ARM_COMPUTE_ISCHEDULER_H */