blob: d430566de3329ca1a16a6121e7712bf6e47dd512 [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "Manager"
#include "Manager.h"
#include <android/hidl/manager/1.2/IServiceManager.h>
#include <build/version.h>
#include <hidl/HidlTransportSupport.h>
#include <hidl/ServiceManagement.h>
#include <algorithm>
#include <functional>
#include <memory>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "Callbacks.h"
#include "CpuExecutor.h"
#include "ExecutionBurstController.h"
#include "HalInterfaces.h"
#include "Memory.h"
#include "MetaModel.h"
#include "ModelArgumentInfo.h"
#include "Tracing.h"
#include "Utils.h"
#include "VersionedInterfaces.h"
namespace android {
namespace nn {
using namespace hal;
const Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
// A Device with actual underlying driver
class DriverDevice : public Device {
public:
DriverDevice(std::string name, const sp<V1_0::IDevice>& device);
// Returns true if succesfully initialized.
bool initialize();
const char* getName() const override { return mName.c_str(); }
const char* getVersionString() const override { return mVersionString.c_str(); }
int64_t getFeatureLevel() const override { return mInterface->getFeatureLevel(); }
int32_t getType() const override { return mInterface->getType(); }
hidl_vec<Extension> getSupportedExtensions() const override;
void getSupportedOperations(const MetaModel& metaModel,
hidl_vec<bool>* supportedOperations) const override;
PerformanceInfo getPerformance(OperandType type) const override;
PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
return mCapabilities.relaxedFloat32toFloat16PerformanceScalar;
}
PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
return mCapabilities.relaxedFloat32toFloat16PerformanceTensor;
}
bool isCachingSupported() const override {
// Caching is supported if either of numModelCache or numDataCache is greater than 0.
return mNumCacheFiles.first > 0 || mNumCacheFiles.second > 0;
}
std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
const Model& model, ExecutionPreference preference, const std::string& cacheDir,
const std::optional<CacheToken>& maybeToken) const override;
std::pair<int, std::shared_ptr<PreparedModel>> prepareModelFromCache(
const std::string& cacheDir, const CacheToken& token) const override;
private:
std::string mName;
std::string mVersionString;
const std::shared_ptr<VersionedIDevice> mInterface;
Capabilities mCapabilities;
hidl_vec<Extension> mSupportedExtensions;
std::pair<uint32_t, uint32_t> mNumCacheFiles;
#ifdef NN_DEBUGGABLE
// For debugging: behavior of IDevice::getSupportedOperations for SampleDriver.
// 0 - all operations reported by IDevice::getSupportedOperations() supported
// 1 - some operations reported by IDevice::getSupportedOperations() supported
uint32_t mSupported = 0;
#endif // NN_DEBUGGABLE
};
// A PreparedModel with underlying IPreparedModel instance return by actual driver.
class DriverPreparedModel : public PreparedModel {
public:
DriverPreparedModel(const std::shared_ptr<VersionedIPreparedModel>& preparedModel)
: mPreparedModel(preparedModel) {}
std::tuple<int, std::vector<OutputShape>, Timing> execute(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
const std::shared_ptr<ExecutionBurstController>& burstController,
MeasureTiming measure) const override;
std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
bool blocking) const override {
return mPreparedModel->configureExecutionBurst(blocking);
}
private:
const std::shared_ptr<VersionedIPreparedModel> mPreparedModel;
};
DriverDevice::DriverDevice(std::string name, const sp<V1_0::IDevice>& device)
: mName(std::move(name)), mInterface(VersionedIDevice::create(mName, device)) {}
// TODO: handle errors from initialize correctly
bool DriverDevice::initialize() {
#ifdef NN_DEBUGGABLE
static const char samplePrefix[] = "sample";
mSupported = (mName.substr(0, sizeof(samplePrefix) - 1) == samplePrefix)
? getProp("debug.nn.sample.supported")
: 0;
#endif // NN_DEBUGGABLE
ErrorStatus status = ErrorStatus::GENERAL_FAILURE;
if (mInterface == nullptr) {
LOG(ERROR) << "DriverDevice contains invalid interface object.";
return false;
}
std::tie(status, mCapabilities) = mInterface->getCapabilities();
if (status != ErrorStatus::NONE) {
LOG(ERROR) << "IDevice::getCapabilities returned the error " << toString(status);
return false;
}
VLOG(MANAGER) << "Capab " << toString(mCapabilities);
std::tie(status, mVersionString) = mInterface->getVersionString();
// TODO(miaowang): add a validation test case for in case of error.
if (status != ErrorStatus::NONE) {
LOG(ERROR) << "IDevice::getVersionString returned the error " << toString(status);
return false;
}
std::tie(status, mSupportedExtensions) = mInterface->getSupportedExtensions();
if (status != ErrorStatus::NONE) {
LOG(ERROR) << "IDevice::getSupportedExtensions returned the error " << toString(status);
return false;
}
std::tie(status, mNumCacheFiles.first, mNumCacheFiles.second) =
mInterface->getNumberOfCacheFilesNeeded();
if (status != ErrorStatus::NONE) {
LOG(WARNING) << "IDevice::getNumberOfCacheFilesNeeded returned the error "
<< toString(status);
mNumCacheFiles = {0, 0};
}
if (mNumCacheFiles.first > static_cast<uint32_t>(Constant::MAX_NUMBER_OF_CACHE_FILES) ||
mNumCacheFiles.second > static_cast<uint32_t>(Constant::MAX_NUMBER_OF_CACHE_FILES)) {
LOG(WARNING)
<< "IDevice::getNumberOfCacheFilesNeeded returned invalid number of cache files "
"numModelCache = "
<< mNumCacheFiles.first << ", numDataCache = " << mNumCacheFiles.second;
mNumCacheFiles = {0, 0};
}
return true;
}
hidl_vec<Extension> DriverDevice::getSupportedExtensions() const {
return mSupportedExtensions;
}
void DriverDevice::getSupportedOperations(const MetaModel& metaModel,
hidl_vec<bool>* outSupportedOperations) const {
// Query the driver for what it can do.
ErrorStatus status = ErrorStatus::GENERAL_FAILURE;
hidl_vec<bool> supportedOperations;
std::tie(status, supportedOperations) = mInterface->getSupportedOperations(metaModel);
const Model& hidlModel = metaModel.getModel();
if (status != ErrorStatus::NONE) {
LOG(ERROR) << "IDevice::getSupportedOperations returned the error " << toString(status);
// Set the supported operation vectors to all false, so we won't use this driver.
outSupportedOperations->resize(hidlModel.operations.size());
std::fill(outSupportedOperations->begin(), outSupportedOperations->end(), false);
return;
}
if (supportedOperations.size() != hidlModel.operations.size()) {
LOG(ERROR) << "IDevice::getSupportedOperations returned a vector of length "
<< supportedOperations.size() << " when expecting "
<< hidlModel.operations.size();
// Set the supported operation vectors to all false, so we won't use this driver.
outSupportedOperations->resize(hidlModel.operations.size());
std::fill(outSupportedOperations->begin(), outSupportedOperations->end(), false);
return;
}
*outSupportedOperations = std::move(supportedOperations);
#ifdef NN_DEBUGGABLE
if (mSupported != 1) {
return;
}
const uint32_t baseAccumulator = std::hash<std::string>{}(mName);
for (size_t operationIndex = 0; operationIndex < outSupportedOperations->size();
operationIndex++) {
if (!(*outSupportedOperations)[operationIndex]) {
continue;
}
uint32_t accumulator = baseAccumulator;
const Operation& operation = hidlModel.operations[operationIndex];
accumulator ^= static_cast<uint32_t>(operation.type);
auto accumulateOperands = [&hidlModel, &accumulator](const hidl_vec<uint32_t>& operands) {
for (uint32_t operandIndex : operands) {
const Operand& operand = hidlModel.operands[operandIndex];
accumulator ^= static_cast<uint32_t>(operand.type);
accumulator ^= operand.dimensions.size();
for (uint32_t dimension : operand.dimensions) {
accumulator ^= dimension;
if (operand.lifetime == OperandLifeTime::CONSTANT_COPY ||
operand.lifetime == OperandLifeTime::CONSTANT_REFERENCE) {
accumulator ^= 1;
}
}
}
};
accumulateOperands(operation.inputs);
accumulateOperands(operation.outputs);
if (accumulator & 1) {
(*outSupportedOperations)[operationIndex] = false;
}
}
#endif // NN_DEBUGGABLE
}
PerformanceInfo DriverDevice::getPerformance(OperandType type) const {
return lookup(mCapabilities.operandPerformance, type);
}
// Opens cache file by filename and sets the handle to the opened fd. Returns false on fail. The
// handle is expected to come in as empty, and is only set to a fd when the function returns true.
// The file descriptor is always opened with both read and write permission.
static bool createCacheHandle(const std::string& cache, bool createIfNotExist,
hidl_handle* handle) {
CHECK(handle->getNativeHandle() == nullptr);
int fd = open(cache.c_str(), createIfNotExist ? (O_RDWR | O_CREAT) : O_RDWR, S_IRUSR | S_IWUSR);
NN_RET_CHECK_GE(fd, 0);
native_handle_t* cacheNativeHandle = native_handle_create(1, 0);
if (cacheNativeHandle == nullptr) {
close(fd);
return false;
}
cacheNativeHandle->data[0] = fd;
handle->setTo(cacheNativeHandle, /*shouldOwn=*/true);
return true;
}
// Opens a list of cache files and returns the handle vector. Returns empty vector on fail.
// The file descriptors are always opened with both read and write permission.
static hidl_vec<hidl_handle> createCacheHandleVec(uint32_t numCacheFiles,
const std::string& baseFileName,
bool createIfNotExist) {
CHECK(numCacheFiles <= static_cast<uint32_t>(Constant::MAX_NUMBER_OF_CACHE_FILES));
hidl_vec<hidl_handle> handles(numCacheFiles);
for (uint32_t i = 0; i < numCacheFiles; i++) {
std::string filename = baseFileName + std::to_string(i);
VLOG(COMPILATION) << "Cache " << i << ": " << filename;
if (!createCacheHandle(filename, createIfNotExist, &handles[i])) {
return hidl_vec<hidl_handle>();
}
}
return handles;
}
// Maps token to cache file names and sets the handle vectors to the opened fds. Returns false on
// fail and leaves the vectors empty. Each vector is expected to come in as empty.
static bool getCacheHandles(const std::string& cacheDir, const CacheToken& token,
const std::pair<uint32_t, uint32_t>& numCacheFiles,
bool createIfNotExist, hidl_vec<hidl_handle>* modelCache,
hidl_vec<hidl_handle>* dataCache) {
// The filename includes ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN * 2 characters for token,
// and 1 character for model/data cache identifier.
std::string filename(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN * 2 + 1, '0');
for (uint32_t i = 0; i < ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN; i++) {
filename[i * 2] = 'A' + (token[i] & 0x0F);
filename[i * 2 + 1] = 'A' + (token[i] >> 4);
}
CHECK(cacheDir.empty() || cacheDir.back() == '/');
std::string cacheFileName = cacheDir + filename;
cacheFileName[ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN * 2] = '1';
*modelCache = createCacheHandleVec(numCacheFiles.first, cacheFileName, createIfNotExist);
if (modelCache->size() != numCacheFiles.first) {
return false;
}
cacheFileName[ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN * 2] = '2';
*dataCache = createCacheHandleVec(numCacheFiles.second, cacheFileName, createIfNotExist);
if (dataCache->size() != numCacheFiles.second) {
modelCache->resize(0);
return false;
}
return true;
}
static std::pair<int, std::shared_ptr<PreparedModel>> prepareModelCheck(
ErrorStatus status, const std::shared_ptr<VersionedIPreparedModel>& preparedModel,
const char* prepareName, const char* serviceName) {
if (status != ErrorStatus::NONE) {
LOG(ERROR) << prepareName << " on " << serviceName << " failed: "
<< "prepareReturnStatus=" << toString(status);
return {ANEURALNETWORKS_OP_FAILED, nullptr};
}
if (preparedModel == nullptr) {
LOG(ERROR) << prepareName << " on " << serviceName << " failed: preparedModel is nullptr";
return {ANEURALNETWORKS_OP_FAILED, nullptr};
}
return {ANEURALNETWORKS_NO_ERROR, std::make_shared<DriverPreparedModel>(preparedModel)};
}
std::pair<int, std::shared_ptr<PreparedModel>> DriverDevice::prepareModel(
const Model& model, ExecutionPreference preference, const std::string& cacheDir,
const std::optional<CacheToken>& maybeToken) const {
// Note that some work within VersionedIDevice will be subtracted from the IPC layer
NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel");
hidl_vec<hidl_handle> modelCache, dataCache;
if (!maybeToken.has_value() ||
!getCacheHandles(cacheDir, *maybeToken, mNumCacheFiles,
/*createIfNotExist=*/true, &modelCache, &dataCache)) {
modelCache.resize(0);
dataCache.resize(0);
}
static const CacheToken kNullToken{};
const CacheToken token = maybeToken.value_or(kNullToken);
const auto [status, preparedModel] =
mInterface->prepareModel(model, preference, modelCache, dataCache, token);
return prepareModelCheck(status, preparedModel, "prepareModel", getName());
}
std::pair<int, std::shared_ptr<PreparedModel>> DriverDevice::prepareModelFromCache(
const std::string& cacheDir, const CacheToken& token) const {
// Note that some work within VersionedIDevice will be subtracted from the IPC layer
NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModelFromCache");
VLOG(COMPILATION) << "prepareModelFromCache";
hidl_vec<hidl_handle> modelCache, dataCache;
if (!getCacheHandles(cacheDir, token, mNumCacheFiles,
/*createIfNotExist=*/false, &modelCache, &dataCache)) {
return {ANEURALNETWORKS_OP_FAILED, nullptr};
}
const auto [status, preparedModel] =
mInterface->prepareModelFromCache(modelCache, dataCache, token);
return prepareModelCheck(status, preparedModel, "prepareModelFromCache", getName());
}
// Convert ModelArgumentInfo to HIDL RequestArgument. For pointer arguments, use the location
// information in ptrArgsLocations.
static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos,
const std::vector<DataLocation>& ptrArgsLocations,
hidl_vec<RequestArgument>* ioInfos) {
size_t count = argumentInfos.size();
ioInfos->resize(count);
uint32_t ptrArgsIndex = 0;
for (size_t i = 0; i < count; i++) {
const auto& info = argumentInfos[i];
(*ioInfos)[i] = {
.hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
.location = info.state == ModelArgumentInfo::POINTER
? ptrArgsLocations[ptrArgsIndex++]
: info.locationAndLength,
.dimensions = info.dimensions,
};
}
}
// Figures out how to place each of the input or outputs in a buffer. This just
// does the layout and memory allocation, it does not copy data. Aligns each
// input a bit.
static std::tuple<int, std::unique_ptr<MemoryAshmem>, std::vector<DataLocation>>
allocatePointerArgumentsToPool(const std::vector<ModelArgumentInfo>& args,
MemoryTracker* memories) {
CHECK(memories != nullptr);
std::vector<DataLocation> ptrArgsLocations;
const uint32_t nextPoolIndex = memories->size();
int64_t total = 0;
for (const auto& info : args) {
if (info.state == ModelArgumentInfo::POINTER) {
const DataLocation& loc = info.locationAndLength;
// TODO Good enough alignment?
total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
ptrArgsLocations.push_back({.poolIndex = nextPoolIndex,
.offset = static_cast<uint32_t>(total),
.length = loc.length});
total += loc.length;
}
};
if (total > 0xFFFFFFFF) {
LOG(ERROR) << "allocatePointerArgumentsToPool: ANeuralNetworksExecution: Size of all "
"inputs or outputs exceeds 2^32.";
return {ANEURALNETWORKS_BAD_DATA, nullptr, std::vector<DataLocation>{}};
}
if (total <= 0) {
return {ANEURALNETWORKS_NO_ERROR, nullptr, std::vector<DataLocation>{}};
}
auto [n, memory] = MemoryAshmem::create(total);
if (n != ANEURALNETWORKS_NO_ERROR) {
return {n, nullptr, std::vector<DataLocation>{}};
}
memories->add(memory.get());
return {ANEURALNETWORKS_NO_ERROR, std::move(memory), std::move(ptrArgsLocations)};
}
// Perform computation on an actual HIDL driver.
//
// Because HIDL cannot take raw pointers, two separate memory pools will be allocated for inputs and
// outputs specified by pointers. The input pointer data will be copied to the input pool prior to
// execution, and the output pointer data will be copied out from the output pool after the
// execution.
//
// The HIDL invocation will choose between sync/async execution according to
// DeviceManager::mSyncExecHal.
std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
const MemoryTracker& memories,
const std::shared_ptr<ExecutionBurstController>& burstController,
MeasureTiming measure) const {
NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
// Make a copy of the memory tracker as we will append memory pools for pointer arguments.
MemoryTracker localMemories = memories;
// We separate the input & output pools so accelerators only need to copy
// the contents of the input pools. We could also use it to set protection
// on read only memory but that's not currently done.
// Layout the input and output data
const auto [n1, inputPtrArgsMemory, inputPtrArgsLocations] =
allocatePointerArgumentsToPool(inputs, &localMemories);
if (n1 != ANEURALNETWORKS_NO_ERROR) {
return {n1, {}, kNoTiming};
}
const auto [n2, outputPtrArgsMemory, outputPtrArgsLocations] =
allocatePointerArgumentsToPool(outputs, &localMemories);
if (n2 != ANEURALNETWORKS_NO_ERROR) {
return {n2, {}, kNoTiming};
}
// Copy the input data that was specified via a pointer.
if (inputPtrArgsMemory != nullptr) {
uint32_t ptrInputIndex = 0;
for (const auto& info : inputs) {
if (info.state == ModelArgumentInfo::POINTER) {
const DataLocation& loc = inputPtrArgsLocations[ptrInputIndex++];
uint8_t* const data = inputPtrArgsMemory->getPointer();
memcpy(data + loc.offset, info.buffer, loc.length);
}
}
}
Request request;
setRequestArgumentArray(inputs, inputPtrArgsLocations, &request.inputs);
setRequestArgumentArray(outputs, outputPtrArgsLocations, &request.outputs);
uint32_t count = localMemories.size();
request.pools.resize(count);
for (uint32_t i = 0; i < count; i++) {
request.pools[i] = localMemories[i]->getHidlMemory();
}
NNTRACE_FULL_SWITCH(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
"DriverPreparedModel::execute::execute");
int n = ANEURALNETWORKS_OP_FAILED;
std::vector<OutputShape> outputShapes;
Timing timing = kNoTiming;
// compute using burst if present
const bool burstCompute = (burstController != nullptr);
bool burstFallback = false;
if (burstCompute) {
std::vector<intptr_t> memoryIds;
memoryIds.reserve(localMemories.size());
for (const Memory* memory : localMemories) {
memory->usedBy(burstController);
memoryIds.push_back(memory->getKey());
}
VLOG(EXECUTION) << "Before ExecutionBurstController->tryCompute() "
<< SHOW_IF_DEBUG(toString(request));
std::tie(n, outputShapes, timing, burstFallback) =
burstController->tryCompute(request, measure, memoryIds);
}
// compute from IPreparedModel if either:
// (1) burst was not supplied, or
// (2) the burst execution failed and requested a fallback execution
if (!burstCompute || burstFallback) {
const bool preferSynchronous = DeviceManager::get()->syncExecHal();
std::tie(n, outputShapes, timing) =
mPreparedModel->execute(request, measure, preferSynchronous);
}
if (n != ANEURALNETWORKS_NO_ERROR) {
VLOG(EXECUTION) << "**Execution failed**";
return {n, std::move(outputShapes), timing};
}
// Copy the output data from shared memory to the output buffers.
NNTRACE_RT_SWITCH(NNTRACE_PHASE_RESULTS, "DriverPreparedModel::execute");
if (outputPtrArgsMemory != nullptr) {
uint32_t ptrOutputIndex = 0;
for (const auto& info : outputs) {
if (info.state == ModelArgumentInfo::POINTER) {
const DataLocation& loc = outputPtrArgsLocations[ptrOutputIndex++];
const uint8_t* const data = outputPtrArgsMemory->getPointer();
memcpy(info.buffer, data + loc.offset, loc.length);
}
}
}
VLOG(EXECUTION) << "DriverPreparedModel::execute completed";
return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
}
// A special abstracted device for the CPU. Only one instance of this class will exist.
// Use get() to retrieve it.
class CpuDevice : public Device {
public:
// Returns the singleton CPU fallback device.
static std::shared_ptr<CpuDevice> get() {
static std::shared_ptr<CpuDevice> instance(new CpuDevice);
return instance;
}
const char* getName() const override { return kName.c_str(); }
const char* getVersionString() const override { return kVersionString.c_str(); }
int64_t getFeatureLevel() const override { return kFeatureLevel; }
int32_t getType() const override { return ANEURALNETWORKS_DEVICE_CPU; }
hidl_vec<Extension> getSupportedExtensions() const override { return {/* No extensions. */}; }
void getSupportedOperations(const MetaModel& metaModel,
hidl_vec<bool>* supportedOperations) const override;
PerformanceInfo getPerformance(OperandType) const override { return kPerformance; }
PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
return kPerformance;
}
PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
return kPerformance;
}
bool isCachingSupported() const override { return false; }
std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
const Model& model, ExecutionPreference preference, const std::string& cacheDir,
const std::optional<CacheToken>& maybeToken) const override;
std::pair<int, std::shared_ptr<PreparedModel>> prepareModelFromCache(
const std::string& /*cacheDir*/, const CacheToken& /*token*/) const override {
CHECK(false) << "Should never call prepareModelFromCache on CpuDevice";
return {ANEURALNETWORKS_OP_FAILED, nullptr};
}
private:
CpuDevice() = default;
const int64_t kFeatureLevel = __ANDROID_API__;
const std::string kName = "nnapi-reference";
const std::string kVersionString = build::GetBuildNumber();
// Since the performance is a ratio compared to the CPU performance,
// by definition the performance of the CPU is 1.0.
const PerformanceInfo kPerformance = {.execTime = 1.0f, .powerUsage = 1.0f};
};
// A special abstracted PreparedModel for the CPU, constructed by CpuDevice.
class CpuPreparedModel : public PreparedModel {
public:
// Factory method for CpuPreparedModel. Returns ANEURALNETWORKS_NO_ERROR and
// a prepared model object if successfully created. Returns an error code
// and nullptr otherwise.
static std::pair<int, std::shared_ptr<PreparedModel>> create(Model hidlModel);
std::tuple<int, std::vector<OutputShape>, Timing> execute(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
const std::shared_ptr<ExecutionBurstController>& burstController,
MeasureTiming measure) const override;
std::shared_ptr<ExecutionBurstController> configureExecutionBurst(bool) const override {
return nullptr;
}
// Prefer to use CpuPreparedModel::create.
CpuPreparedModel(Model model, std::vector<RunTimePoolInfo> poolInfos)
: mModel(std::move(model)), mModelPoolInfos(std::move(poolInfos)) {}
private:
const Model mModel;
const std::vector<RunTimePoolInfo> mModelPoolInfos;
};
void CpuDevice::getSupportedOperations(const MetaModel& metaModel,
hidl_vec<bool>* supportedOperations) const {
const Model& hidlModel = metaModel.getModel();
const size_t count = hidlModel.operations.size();
hidl_vec<bool> result(count);
for (size_t i = 0; i < count; i++) {
// TODO(b/119870033): Decide whether and how post-P operations would be supported on CPU.
// We may want to use the slicer for CpuDevice just as we do for
// DriverDevice.
OperationType operationType = hidlModel.operations[i].type;
result[i] = !isExtensionOperationType(operationType) &&
operationType != OperationType::OEM_OPERATION;
}
*supportedOperations = std::move(result);
}
std::pair<int, std::shared_ptr<PreparedModel>> CpuDevice::prepareModel(
const Model& model, ExecutionPreference preference, const std::string& /*cacheDir*/,
const std::optional<CacheToken>& maybeToken) const {
CHECK(!maybeToken.has_value())
<< "Should never call prepareModel with cache information on CpuDevice";
if (!validateModel(model) || !validateExecutionPreference(preference)) {
return {ANEURALNETWORKS_OP_FAILED, nullptr};
}
return CpuPreparedModel::create(model);
}
std::pair<int, std::shared_ptr<PreparedModel>> CpuPreparedModel::create(Model hidlModel) {
std::vector<RunTimePoolInfo> poolInfos;
if (!setRunTimePoolInfosFromHidlMemories(&poolInfos, hidlModel.pools)) {
return {ANEURALNETWORKS_UNMAPPABLE, nullptr};
}
std::shared_ptr<PreparedModel> preparedModel =
std::make_shared<CpuPreparedModel>(std::move(hidlModel), std::move(poolInfos));
return {ANEURALNETWORKS_NO_ERROR, std::move(preparedModel)};
}
static std::tuple<int, std::vector<OutputShape>, Timing> computeOnCpu(
const Model& model, const Request& request,
const std::vector<RunTimePoolInfo>& modelPoolInfos,
const std::vector<RunTimePoolInfo>& requestPoolInfos) {
NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "computeOnCpu");
CpuExecutor executor;
int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
const auto& outputShapes = executor.getOutputShapes();
return {err, outputShapes, kNoTiming};
}
// Perform computation on NNAPI CPU reference implementation.
//
// Contrary to DriverPreparedModel::execute, the NNAPI CPU reference executor lives in the
// same process as the NNAPI runtime and can take raw pointers. We will create as many pools as
// there are input/output in this method to avoid data copying.
//
// Will choose between sync/async execution according to DeviceManager::mSyncExecCpu.
std::tuple<int, std::vector<OutputShape>, Timing> CpuPreparedModel::execute(
const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
const MemoryTracker& memories,
const std::shared_ptr<ExecutionBurstController>& /*burstController*/,
MeasureTiming /*measure*/) const {
std::vector<RunTimePoolInfo> requestPoolInfos;
requestPoolInfos.reserve(memories.size());
for (const Memory* mem : memories) {
if (std::optional<RunTimePoolInfo> poolInfo =
RunTimePoolInfo::createFromHidlMemory(mem->getHidlMemory())) {
requestPoolInfos.emplace_back(*poolInfo);
} else {
return {ANEURALNETWORKS_UNMAPPABLE, {}, kNoTiming};
}
}
// Create as many pools as there are input / output.
auto fixPointerArguments =
[&requestPoolInfos](const std::vector<ModelArgumentInfo>& argumentInfos) {
std::vector<DataLocation> ptrArgsLocations;
for (const ModelArgumentInfo& argumentInfo : argumentInfos) {
if (argumentInfo.state == ModelArgumentInfo::POINTER) {
ptrArgsLocations.push_back(
{.poolIndex = static_cast<uint32_t>(requestPoolInfos.size()),
.offset = 0,
.length = argumentInfo.locationAndLength.length});
requestPoolInfos.emplace_back(RunTimePoolInfo::createFromExistingBuffer(
static_cast<uint8_t*>(argumentInfo.buffer)));
}
}
return ptrArgsLocations;
};
const std::vector<DataLocation> inputPtrArgsLocations = fixPointerArguments(inputs);
const std::vector<DataLocation> outputPtrArgsLocations = fixPointerArguments(outputs);
Request request;
setRequestArgumentArray(inputs, inputPtrArgsLocations, &request.inputs);
setRequestArgumentArray(outputs, outputPtrArgsLocations, &request.outputs);
if (!DeviceManager::get()->syncExecCpu()) {
// TODO: use a thread pool
// TODO(mikie): this could have NNTRACE so we could measure the overhead
// of spinning up a new thread.
std::tuple<int, std::vector<OutputShape>, Timing> result = {};
std::thread([this, &request, &requestPoolInfos, &result] {
result = computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos);
}).join();
return result;
}
return computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos);
}
DeviceManager* DeviceManager::get() {
static DeviceManager manager;
return &manager;
}
std::shared_ptr<Device> DeviceManager::getCpuDevice() {
return CpuDevice::get();
}
std::shared_ptr<Device> DeviceManager::forTest_makeDriverDevice(const std::string& name,
const sp<V1_0::IDevice>& device) {
auto driverDevice = std::make_shared<DriverDevice>(name, device);
CHECK(driverDevice->initialize());
return driverDevice;
}
void DeviceManager::findAvailableDevices() {
using ::android::hidl::manager::V1_2::IServiceManager;
VLOG(MANAGER) << "findAvailableDevices";
sp<IServiceManager> manager = hardware::defaultServiceManager1_2();
if (manager == nullptr) {
LOG(ERROR) << "Unable to open defaultServiceManager";
return;
}
manager->listManifestByInterface(
V1_0::IDevice::descriptor, [this](const hidl_vec<hidl_string>& names) {
for (const auto& name : names) {
VLOG(MANAGER) << "Found interface " << name.c_str();
sp<V1_0::IDevice> device = V1_0::IDevice::getService(name);
if (device == nullptr) {
LOG(ERROR) << "Got a null IDEVICE for " << name.c_str();
continue;
}
registerDevice(name.c_str(), device);
}
});
// register CPU fallback device
mDevices.push_back(CpuDevice::get());
mDevicesCpuOnly.push_back(CpuDevice::get());
}
void DeviceManager::registerDevice(const char* name, const sp<V1_0::IDevice>& device) {
auto d = std::make_shared<DriverDevice>(name, device);
if (d->initialize()) {
mDevices.push_back(d);
}
}
DeviceManager::DeviceManager() {
VLOG(MANAGER) << "DeviceManager::DeviceManager";
findAvailableDevices();
#ifdef NN_DEBUGGABLE
mStrictSlicing = (getProp("debug.nn.strict-slicing") != 0);
mPartitioning = getProp("debug.nn.partition", kPartitioningDefault);
mDebugNNCpuOnly = (getProp("debug.nn.cpuonly") != 0);
mSyncExecCpu = (getProp("debug.nn.syncexec-cpu", 1) != 0);
if (!mSyncExecHalSetter) {
mSyncExecHal = (getProp("debug.nn.syncexec-hal", 1) != 0);
}
mSyncExecRuntime = (getProp("debug.nn.syncexec-runtime") != 0);
#endif // NN_DEBUGGABLE
}
} // namespace nn
} // namespace android