neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h - platform/hardware/interfaces - Git at Google

 /*
  * Copyright (C) 2019 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
 #define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H

 #include "ExecutionBurstUtils.h"

 #include <android-base/macros.h>
 #include <android/hardware/neuralnetworks/1.0/types.h>
 #include <android/hardware/neuralnetworks/1.1/types.h>
 #include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
 #include <android/hardware/neuralnetworks/1.2/IBurstContext.h>
 #include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
 #include <android/hardware/neuralnetworks/1.2/types.h>
 #include <fmq/MessageQueue.h>
 #include <hidl/MQDescriptor.h>

 #include <atomic>
 #include <chrono>
 #include <map>
 #include <memory>
 #include <mutex>
 #include <stack>
 #include <tuple>
 #include <utility>
 #include <vector>

 namespace android::nn {

 /**
  * The ExecutionBurstController class manages both the serialization and
  * deserialization of data across FMQ, making it appear to the runtime as a
  * regular synchronous inference. Additionally, this class manages the burst's
  * memory cache.
  */
 class ExecutionBurstController {
     DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController);

   public:
     /**
      * NN runtime burst callback object and memory cache.
      *
      * ExecutionBurstCallback associates a hidl_memory object with a slot number
      * to be passed across FMQ. The ExecutionBurstServer can use this callback
      * to retrieve this hidl_memory corresponding to the slot via HIDL.
      *
      * Whenever a hidl_memory object is copied, it will duplicate the underlying
      * file descriptor. Because the NN runtime currently copies the hidl_memory
      * on each execution, it is difficult to associate hidl_memory objects with
      * previously cached hidl_memory objects. For this reason, callers of this
      * class must pair each hidl_memory object with an associated key. For
      * efficiency, if two hidl_memory objects represent the same underlying
      * buffer, they must use the same key.
      */
     class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback {
         DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback);

       public:
         ExecutionBurstCallback() = default;

         hardware::Return<void> getMemories(const hardware::hidl_vec<int32_t>& slots,
                                            getMemories_cb cb) override;

         /**
          * This function performs one of two different actions:
          * 1) If a key corresponding to a memory resource is unrecognized by the
          *    ExecutionBurstCallback object, the ExecutionBurstCallback object
          *    will allocate a slot, bind the memory to the slot, and return the
          *    slot identifier.
          * 2) If a key corresponding to a memory resource is recognized by the
          *    ExecutionBurstCallback object, the ExecutionBurstCallback object
          *    will return the existing slot identifier.
          *
          * @param memories Memory resources used in an inference.
          * @param keys Unique identifiers where each element corresponds to a
          *     memory resource element in "memories".
          * @return Unique slot identifiers where each returned slot element
          *     corresponds to a memory resource element in "memories".
          */
         std::vector<int32_t> getSlots(const hardware::hidl_vec<hardware::hidl_memory>& memories,
                                       const std::vector<intptr_t>& keys);

         /*
          * This function performs two different actions:
          * 1) Removes an entry from the cache (if present), including the local
          *    storage of the hidl_memory object. Note that this call does not
          *    free any corresponding hidl_memory object in ExecutionBurstServer,
          *    which is separately freed via IBurstContext::freeMemory.
          * 2) Return whether a cache entry was removed and which slot was removed if
          *    found. If the key did not to correspond to any entry in the cache, a
          *    slot number of 0 is returned. The slot number and whether the entry
          *    existed is useful so the same slot can be freed in the
          *    ExecutionBurstServer's cache via IBurstContext::freeMemory.
          */
         std::pair<bool, int32_t> freeMemory(intptr_t key);

       private:
         int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key);
         int32_t allocateSlotLocked();

         std::mutex mMutex;
         std::stack<int32_t, std::vector<int32_t>> mFreeSlots;
         std::map<intptr_t, int32_t> mMemoryIdToSlot;
         std::vector<hardware::hidl_memory> mMemoryCache;
     };

     /**
      * Creates a burst controller on a prepared model.
      *
      * Prefer this over ExecutionBurstController's constructor.
      *
      * @param preparedModel Model prepared for execution to execute on.
      * @param pollingTimeWindow How much time (in microseconds) the
      *     ExecutionBurstController is allowed to poll the FMQ before waiting on
      *     the blocking futex. Polling may result in lower latencies at the
      *     potential cost of more power usage.
      * @return ExecutionBurstController Execution burst controller object.
      */
     static std::unique_ptr<ExecutionBurstController> create(
             const sp<hardware::neuralnetworks::V1_2::IPreparedModel>& preparedModel,
             std::chrono::microseconds pollingTimeWindow);

     // prefer calling ExecutionBurstController::create
     ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender,
                              const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver,
                              const sp<hardware::neuralnetworks::V1_2::IBurstContext>& burstContext,
                              const sp<ExecutionBurstCallback>& callback,
                              const sp<hardware::hidl_death_recipient>& deathHandler = nullptr);

     // explicit destructor to unregister the death recipient
     ~ExecutionBurstController();

     /**
      * Execute a request on a model.
      *
      * @param request Arguments to be executed on a model.
      * @param measure Whether to collect timing measurements, either YES or NO
      * @param memoryIds Identifiers corresponding to each memory object in the
      *     request's pools.
      * @return A tuple of:
      *     - result code of the execution
      *     - dynamic output shapes from the execution
      *     - any execution time measurements of the execution
      *     - whether or not a failed burst execution should be re-run using a
      *       different path (e.g., IPreparedModel::executeSynchronously)
      */
     std::tuple<int, std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
                hardware::neuralnetworks::V1_2::Timing, bool>
     compute(const hardware::neuralnetworks::V1_0::Request& request,
             hardware::neuralnetworks::V1_2::MeasureTiming measure,
             const std::vector<intptr_t>& memoryIds);

     /**
      * Propagate a user's freeing of memory to the service.
      *
      * @param key Key corresponding to the memory object.
      */
     void freeMemory(intptr_t key);

   private:
     std::mutex mMutex;
     const std::shared_ptr<RequestChannelSender> mRequestChannelSender;
     const std::shared_ptr<ResultChannelReceiver> mResultChannelReceiver;
     const sp<hardware::neuralnetworks::V1_2::IBurstContext> mBurstContext;
     const sp<ExecutionBurstCallback> mMemoryCache;
     const sp<hardware::hidl_death_recipient> mDeathHandler;
 };

 }  // namespace android::nn

 #endif  // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
	/*
	* Copyright (C) 2019 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
	#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H

	#include "ExecutionBurstUtils.h"

	#include <android-base/macros.h>
	#include <android/hardware/neuralnetworks/1.0/types.h>
	#include <android/hardware/neuralnetworks/1.1/types.h>
	#include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
	#include <android/hardware/neuralnetworks/1.2/IBurstContext.h>
	#include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
	#include <android/hardware/neuralnetworks/1.2/types.h>
	#include <fmq/MessageQueue.h>
	#include <hidl/MQDescriptor.h>

	#include <atomic>
	#include <chrono>
	#include <map>
	#include <memory>
	#include <mutex>
	#include <stack>
	#include <tuple>
	#include <utility>
	#include <vector>

	namespace android::nn {

	/**
	* The ExecutionBurstController class manages both the serialization and
	* deserialization of data across FMQ, making it appear to the runtime as a
	* regular synchronous inference. Additionally, this class manages the burst's
	* memory cache.
	*/
	class ExecutionBurstController {
	DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController);

	public:
	/**
	* NN runtime burst callback object and memory cache.
	*
	* ExecutionBurstCallback associates a hidl_memory object with a slot number
	* to be passed across FMQ. The ExecutionBurstServer can use this callback
	* to retrieve this hidl_memory corresponding to the slot via HIDL.
	*
	* Whenever a hidl_memory object is copied, it will duplicate the underlying
	* file descriptor. Because the NN runtime currently copies the hidl_memory
	* on each execution, it is difficult to associate hidl_memory objects with
	* previously cached hidl_memory objects. For this reason, callers of this
	* class must pair each hidl_memory object with an associated key. For
	* efficiency, if two hidl_memory objects represent the same underlying
	* buffer, they must use the same key.
	*/
	class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback {
	DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback);

	public:
	ExecutionBurstCallback() = default;

	hardware::Return<void> getMemories(const hardware::hidl_vec<int32_t>& slots,
	getMemories_cb cb) override;

	/**
	* This function performs one of two different actions:
	* 1) If a key corresponding to a memory resource is unrecognized by the
	* ExecutionBurstCallback object, the ExecutionBurstCallback object
	* will allocate a slot, bind the memory to the slot, and return the
	* slot identifier.
	* 2) If a key corresponding to a memory resource is recognized by the
	* ExecutionBurstCallback object, the ExecutionBurstCallback object
	* will return the existing slot identifier.
	*
	* @param memories Memory resources used in an inference.
	* @param keys Unique identifiers where each element corresponds to a
	* memory resource element in "memories".
	* @return Unique slot identifiers where each returned slot element
	* corresponds to a memory resource element in "memories".
	*/
	std::vector<int32_t> getSlots(const hardware::hidl_vec<hardware::hidl_memory>& memories,
	const std::vector<intptr_t>& keys);

	/*
	* This function performs two different actions:
	* 1) Removes an entry from the cache (if present), including the local
	* storage of the hidl_memory object. Note that this call does not
	* free any corresponding hidl_memory object in ExecutionBurstServer,
	* which is separately freed via IBurstContext::freeMemory.
	* 2) Return whether a cache entry was removed and which slot was removed if
	* found. If the key did not to correspond to any entry in the cache, a
	* slot number of 0 is returned. The slot number and whether the entry
	* existed is useful so the same slot can be freed in the
	* ExecutionBurstServer's cache via IBurstContext::freeMemory.
	*/
	std::pair<bool, int32_t> freeMemory(intptr_t key);

	private:
	int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key);
	int32_t allocateSlotLocked();

	std::mutex mMutex;
	std::stack<int32_t, std::vector<int32_t>> mFreeSlots;
	std::map<intptr_t, int32_t> mMemoryIdToSlot;
	std::vector<hardware::hidl_memory> mMemoryCache;
	};

	/**
	* Creates a burst controller on a prepared model.
	*
	* Prefer this over ExecutionBurstController's constructor.
	*
	* @param preparedModel Model prepared for execution to execute on.
	* @param pollingTimeWindow How much time (in microseconds) the
	* ExecutionBurstController is allowed to poll the FMQ before waiting on
	* the blocking futex. Polling may result in lower latencies at the
	* potential cost of more power usage.
	* @return ExecutionBurstController Execution burst controller object.
	*/
	static std::unique_ptr<ExecutionBurstController> create(
	const sp<hardware::neuralnetworks::V1_2::IPreparedModel>& preparedModel,
	std::chrono::microseconds pollingTimeWindow);

	// prefer calling ExecutionBurstController::create
	ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender,
	const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver,
	const sp<hardware::neuralnetworks::V1_2::IBurstContext>& burstContext,
	const sp<ExecutionBurstCallback>& callback,
	const sp<hardware::hidl_death_recipient>& deathHandler = nullptr);

	// explicit destructor to unregister the death recipient
	~ExecutionBurstController();

	/**
	* Execute a request on a model.
	*
	* @param request Arguments to be executed on a model.
	* @param measure Whether to collect timing measurements, either YES or NO
	* @param memoryIds Identifiers corresponding to each memory object in the
	* request's pools.
	* @return A tuple of:
	* - result code of the execution
	* - dynamic output shapes from the execution
	* - any execution time measurements of the execution
	* - whether or not a failed burst execution should be re-run using a
	* different path (e.g., IPreparedModel::executeSynchronously)
	*/
	std::tuple<int, std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
	hardware::neuralnetworks::V1_2::Timing, bool>
	compute(const hardware::neuralnetworks::V1_0::Request& request,
	hardware::neuralnetworks::V1_2::MeasureTiming measure,
	const std::vector<intptr_t>& memoryIds);

	/**
	* Propagate a user's freeing of memory to the service.
	*
	* @param key Key corresponding to the memory object.
	*/
	void freeMemory(intptr_t key);

	private:
	std::mutex mMutex;
	const std::shared_ptr<RequestChannelSender> mRequestChannelSender;
	const std::shared_ptr<ResultChannelReceiver> mResultChannelReceiver;
	const sp<hardware::neuralnetworks::V1_2::IBurstContext> mBurstContext;
	const sp<ExecutionBurstCallback> mMemoryCache;
	const sp<hardware::hidl_death_recipient> mDeathHandler;
	};

	} // namespace android::nn

	#endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H