blob: 2c7357416081961f7e0dd8b52062d4ee1bd3fbc8 [file] [log] [blame] [edit]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Provides C++ classes to more easily use the Neural Networks API.
#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
#include "NeuralNetworks.h"
#include <assert.h>
#include <math.h>
#include <algorithm>
#include <optional>
#include <string>
#include <utility>
#include <vector>
namespace android {
namespace nn {
namespace wrapper {
enum class Type {
FLOAT32 = ANEURALNETWORKS_FLOAT32,
INT32 = ANEURALNETWORKS_INT32,
UINT32 = ANEURALNETWORKS_UINT32,
TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32,
TENSOR_INT32 = ANEURALNETWORKS_TENSOR_INT32,
TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
BOOL = ANEURALNETWORKS_BOOL,
TENSOR_QUANT16_SYMM = ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16,
TENSOR_BOOL8 = ANEURALNETWORKS_TENSOR_BOOL8,
FLOAT16 = ANEURALNETWORKS_FLOAT16,
TENSOR_QUANT8_SYMM_PER_CHANNEL = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL,
TENSOR_QUANT16_ASYMM = ANEURALNETWORKS_TENSOR_QUANT16_ASYMM,
TENSOR_QUANT8_SYMM = ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
MODEL = ANEURALNETWORKS_MODEL,
};
enum class ExecutePreference {
PREFER_LOW_POWER = ANEURALNETWORKS_PREFER_LOW_POWER,
PREFER_FAST_SINGLE_ANSWER = ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER,
PREFER_SUSTAINED_SPEED = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED
};
enum class ExecutePriority {
LOW = ANEURALNETWORKS_PRIORITY_LOW,
MEDIUM = ANEURALNETWORKS_PRIORITY_MEDIUM,
HIGH = ANEURALNETWORKS_PRIORITY_HIGH,
DEFAULT = ANEURALNETWORKS_PRIORITY_DEFAULT,
};
enum class Result {
NO_ERROR = ANEURALNETWORKS_NO_ERROR,
OUT_OF_MEMORY = ANEURALNETWORKS_OUT_OF_MEMORY,
INCOMPLETE = ANEURALNETWORKS_INCOMPLETE,
UNEXPECTED_NULL = ANEURALNETWORKS_UNEXPECTED_NULL,
BAD_DATA = ANEURALNETWORKS_BAD_DATA,
OP_FAILED = ANEURALNETWORKS_OP_FAILED,
UNMAPPABLE = ANEURALNETWORKS_UNMAPPABLE,
BAD_STATE = ANEURALNETWORKS_BAD_STATE,
OUTPUT_INSUFFICIENT_SIZE = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE,
UNAVAILABLE_DEVICE = ANEURALNETWORKS_UNAVAILABLE_DEVICE,
MISSED_DEADLINE_TRANSIENT = ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT,
MISSED_DEADLINE_PERSISTENT = ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT,
};
struct SymmPerChannelQuantParams {
ANeuralNetworksSymmPerChannelQuantParams params;
std::vector<float> scales;
SymmPerChannelQuantParams(std::vector<float> scalesVec, uint32_t channelDim)
: scales(std::move(scalesVec)) {
params = {
.channelDim = channelDim,
.scaleCount = static_cast<uint32_t>(scales.size()),
.scales = scales.size() > 0 ? scales.data() : nullptr,
};
}
SymmPerChannelQuantParams(const SymmPerChannelQuantParams& other)
: params(other.params), scales(other.scales) {
params.scales = scales.size() > 0 ? scales.data() : nullptr;
}
SymmPerChannelQuantParams& operator=(const SymmPerChannelQuantParams& other) {
if (this != &other) {
params = other.params;
scales = other.scales;
params.scales = scales.size() > 0 ? scales.data() : nullptr;
}
return *this;
}
};
struct OperandType {
ANeuralNetworksOperandType operandType;
std::vector<uint32_t> dimensions;
std::optional<SymmPerChannelQuantParams> channelQuant;
OperandType(const OperandType& other)
: operandType(other.operandType),
dimensions(other.dimensions),
channelQuant(other.channelQuant) {
operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
}
OperandType& operator=(const OperandType& other) {
if (this != &other) {
operandType = other.operandType;
dimensions = other.dimensions;
channelQuant = other.channelQuant;
operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
}
return *this;
}
OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0)
: dimensions(std::move(d)), channelQuant(std::nullopt) {
operandType = {
.type = static_cast<int32_t>(type),
.dimensionCount = static_cast<uint32_t>(dimensions.size()),
.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
.scale = scale,
.zeroPoint = zeroPoint,
};
}
OperandType(Type type, std::vector<uint32_t> data, SymmPerChannelQuantParams&& channelQuant)
: dimensions(std::move(data)), channelQuant(std::move(channelQuant)) {
assert(type == Type::TENSOR_QUANT8_SYMM_PER_CHANNEL);
operandType = {
.type = static_cast<int32_t>(type),
.dimensionCount = static_cast<uint32_t>(dimensions.size()),
.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
.scale = 0.0f,
.zeroPoint = 0,
};
}
};
class Memory {
public:
Memory(size_t size, int protect, int fd, size_t offset) {
mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) ==
ANEURALNETWORKS_NO_ERROR;
}
Memory(AHardwareBuffer* buffer) {
mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) ==
ANEURALNETWORKS_NO_ERROR;
}
~Memory() { ANeuralNetworksMemory_free(mMemory); }
// Disallow copy semantics to ensure the runtime object can only be freed
// once. Copy semantics could be enabled if some sort of reference counting
// or deep-copy system for runtime objects is added later.
Memory(const Memory&) = delete;
Memory& operator=(const Memory&) = delete;
// Move semantics to remove access to the runtime object from the wrapper
// object that is being moved. This ensures the runtime object will be
// freed only once.
Memory(Memory&& other) { *this = std::move(other); }
Memory& operator=(Memory&& other) {
if (this != &other) {
ANeuralNetworksMemory_free(mMemory);
mMemory = other.mMemory;
mValid = other.mValid;
other.mMemory = nullptr;
other.mValid = false;
}
return *this;
}
ANeuralNetworksMemory* get() const { return mMemory; }
bool isValid() const { return mValid; }
private:
ANeuralNetworksMemory* mMemory = nullptr;
bool mValid = true;
};
class Model {
public:
Model() {
// TODO handle the value returned by this call
ANeuralNetworksModel_create(&mModel);
}
~Model() { ANeuralNetworksModel_free(mModel); }
// Disallow copy semantics to ensure the runtime object can only be freed
// once. Copy semantics could be enabled if some sort of reference counting
// or deep-copy system for runtime objects is added later.
Model(const Model&) = delete;
Model& operator=(const Model&) = delete;
// Move semantics to remove access to the runtime object from the wrapper
// object that is being moved. This ensures the runtime object will be
// freed only once.
Model(Model&& other) { *this = std::move(other); }
Model& operator=(Model&& other) {
if (this != &other) {
ANeuralNetworksModel_free(mModel);
mModel = other.mModel;
mNextOperandId = other.mNextOperandId;
mValid = other.mValid;
other.mModel = nullptr;
other.mNextOperandId = 0;
other.mValid = false;
}
return *this;
}
Result finish() {
if (mValid) {
auto result = static_cast<Result>(ANeuralNetworksModel_finish(mModel));
if (result != Result::NO_ERROR) {
mValid = false;
}
return result;
} else {
return Result::BAD_STATE;
}
}
uint32_t addOperand(const OperandType* type) {
if (ANeuralNetworksModel_addOperand(mModel, &(type->operandType)) !=
ANEURALNETWORKS_NO_ERROR) {
mValid = false;
}
if (type->channelQuant) {
if (ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
mModel, mNextOperandId, &type->channelQuant.value().params) !=
ANEURALNETWORKS_NO_ERROR) {
mValid = false;
}
}
return mNextOperandId++;
}
void setOperandValue(uint32_t index, const void* buffer, size_t length) {
if (ANeuralNetworksModel_setOperandValue(mModel, index, buffer, length) !=
ANEURALNETWORKS_NO_ERROR) {
mValid = false;
}
}
void setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
size_t length) {
if (ANeuralNetworksModel_setOperandValueFromMemory(mModel, index, memory->get(), offset,
length) != ANEURALNETWORKS_NO_ERROR) {
mValid = false;
}
}
void addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
const std::vector<uint32_t>& outputs) {
if (ANeuralNetworksModel_addOperation(mModel, type, static_cast<uint32_t>(inputs.size()),
inputs.data(), static_cast<uint32_t>(outputs.size()),
outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
mValid = false;
}
}
void identifyInputsAndOutputs(const std::vector<uint32_t>& inputs,
const std::vector<uint32_t>& outputs) {
if (ANeuralNetworksModel_identifyInputsAndOutputs(
mModel, static_cast<uint32_t>(inputs.size()), inputs.data(),
static_cast<uint32_t>(outputs.size()),
outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
mValid = false;
}
}
void relaxComputationFloat32toFloat16(bool isRelax) {
if (ANeuralNetworksModel_relaxComputationFloat32toFloat16(mModel, isRelax) ==
ANEURALNETWORKS_NO_ERROR) {
mRelaxed = isRelax;
}
}
ANeuralNetworksModel* getHandle() const { return mModel; }
bool isValid() const { return mValid; }
bool isRelaxed() const { return mRelaxed; }
protected:
ANeuralNetworksModel* mModel = nullptr;
// We keep track of the operand ID as a convenience to the caller.
uint32_t mNextOperandId = 0;
bool mValid = true;
bool mRelaxed = false;
};
class Event {
public:
Event() {}
~Event() { ANeuralNetworksEvent_free(mEvent); }
// Disallow copy semantics to ensure the runtime object can only be freed
// once. Copy semantics could be enabled if some sort of reference counting
// or deep-copy system for runtime objects is added later.
Event(const Event&) = delete;
Event& operator=(const Event&) = delete;
// Move semantics to remove access to the runtime object from the wrapper
// object that is being moved. This ensures the runtime object will be
// freed only once.
Event(Event&& other) { *this = std::move(other); }
Event& operator=(Event&& other) {
if (this != &other) {
ANeuralNetworksEvent_free(mEvent);
mEvent = other.mEvent;
other.mEvent = nullptr;
}
return *this;
}
Result wait() { return static_cast<Result>(ANeuralNetworksEvent_wait(mEvent)); }
// Only for use by Execution
void set(ANeuralNetworksEvent* newEvent) {
ANeuralNetworksEvent_free(mEvent);
mEvent = newEvent;
}
// Only for use by Execution
ANeuralNetworksEvent* getHandle() const { return mEvent; }
private:
ANeuralNetworksEvent* mEvent = nullptr;
};
class Compilation {
public:
Compilation(const Model* model) {
int result = ANeuralNetworksCompilation_create(model->getHandle(), &mCompilation);
if (result != 0) {
// TODO Handle the error
}
}
~Compilation() { ANeuralNetworksCompilation_free(mCompilation); }
// Disallow copy semantics to ensure the runtime object can only be freed
// once. Copy semantics could be enabled if some sort of reference counting
// or deep-copy system for runtime objects is added later.
Compilation(const Compilation&) = delete;
Compilation& operator=(const Compilation&) = delete;
// Move semantics to remove access to the runtime object from the wrapper
// object that is being moved. This ensures the runtime object will be
// freed only once.
Compilation(Compilation&& other) { *this = std::move(other); }
Compilation& operator=(Compilation&& other) {
if (this != &other) {
ANeuralNetworksCompilation_free(mCompilation);
mCompilation = other.mCompilation;
other.mCompilation = nullptr;
}
return *this;
}
Result setPreference(ExecutePreference preference) {
return static_cast<Result>(ANeuralNetworksCompilation_setPreference(
mCompilation, static_cast<int32_t>(preference)));
}
Result setPriority(ExecutePriority priority) {
return static_cast<Result>(ANeuralNetworksCompilation_setPriority(
mCompilation, static_cast<int32_t>(priority)));
}
Result setCaching(const std::string& cacheDir, const std::vector<uint8_t>& token) {
if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) {
return Result::BAD_DATA;
}
return static_cast<Result>(ANeuralNetworksCompilation_setCaching(
mCompilation, cacheDir.c_str(), token.data()));
}
Result finish() { return static_cast<Result>(ANeuralNetworksCompilation_finish(mCompilation)); }
ANeuralNetworksCompilation* getHandle() const { return mCompilation; }
private:
ANeuralNetworksCompilation* mCompilation = nullptr;
};
class Execution {
public:
Execution(const Compilation* compilation) {
int result = ANeuralNetworksExecution_create(compilation->getHandle(), &mExecution);
if (result != 0) {
// TODO Handle the error
}
}
~Execution() { ANeuralNetworksExecution_free(mExecution); }
// Disallow copy semantics to ensure the runtime object can only be freed
// once. Copy semantics could be enabled if some sort of reference counting
// or deep-copy system for runtime objects is added later.
Execution(const Execution&) = delete;
Execution& operator=(const Execution&) = delete;
// Move semantics to remove access to the runtime object from the wrapper
// object that is being moved. This ensures the runtime object will be
// freed only once.
Execution(Execution&& other) { *this = std::move(other); }
Execution& operator=(Execution&& other) {
if (this != &other) {
ANeuralNetworksExecution_free(mExecution);
mExecution = other.mExecution;
other.mExecution = nullptr;
}
return *this;
}
Result setInput(uint32_t index, const void* buffer, size_t length,
const ANeuralNetworksOperandType* type = nullptr) {
return static_cast<Result>(
ANeuralNetworksExecution_setInput(mExecution, index, type, buffer, length));
}
Result setInputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
return static_cast<Result>(ANeuralNetworksExecution_setInputFromMemory(
mExecution, index, type, memory->get(), offset, length));
}
Result setOutput(uint32_t index, void* buffer, size_t length,
const ANeuralNetworksOperandType* type = nullptr) {
return static_cast<Result>(
ANeuralNetworksExecution_setOutput(mExecution, index, type, buffer, length));
}
Result setOutputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
return static_cast<Result>(ANeuralNetworksExecution_setOutputFromMemory(
mExecution, index, type, memory->get(), offset, length));
}
Result startCompute(Event* event) {
ANeuralNetworksEvent* ev = nullptr;
Result result = static_cast<Result>(ANeuralNetworksExecution_startCompute(mExecution, &ev));
event->set(ev);
return result;
}
Result startComputeWithDependencies(const std::vector<const Event*>& dependencies,
uint64_t duration, Event* event) {
std::vector<const ANeuralNetworksEvent*> deps(dependencies.size());
std::transform(dependencies.begin(), dependencies.end(), deps.begin(),
[](const Event* e) { return e->getHandle(); });
ANeuralNetworksEvent* ev = nullptr;
Result result = static_cast<Result>(ANeuralNetworksExecution_startComputeWithDependencies(
mExecution, deps.data(), deps.size(), duration, &ev));
event->set(ev);
return result;
}
Result compute() { return static_cast<Result>(ANeuralNetworksExecution_compute(mExecution)); }
Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) {
uint32_t rank = 0;
Result result = static_cast<Result>(
ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank));
dimensions->resize(rank);
if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) ||
rank == 0) {
return result;
}
result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions(
mExecution, index, dimensions->data()));
return result;
}
private:
ANeuralNetworksExecution* mExecution = nullptr;
};
} // namespace wrapper
} // namespace nn
} // namespace android
#endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H