shim/sl/canonical/ArmnnDriverImpl.cpp - platform/external/armnn - Git at Google

 //
 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //

 #include "ArmnnDriverImpl.hpp"
 #include "ArmnnPreparedModel.hpp"
 #include "ModelToINetworkTransformer.hpp"
 #include "SystemPropertiesUtils.hpp"

 #include <armnnDeserializer/IDeserializer.hpp>

 #include <log/log.h>
 #include <sys/stat.h>

 namespace
 {

 Capabilities GenerateCapabilities()
 {
     VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";

     float defaultPerfValue = .1f;
     const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
                                                             /* powerUsage */ defaultPerfValue
                                                           };
     std::vector<OperandType> operandsTypes({
                 OperandType::FLOAT32,
                 OperandType::INT32,
                 OperandType::UINT32,
                 OperandType::TENSOR_FLOAT32,
                 OperandType::TENSOR_INT32,
                 OperandType::TENSOR_QUANT8_ASYMM,
                 OperandType::BOOL,
                 OperandType::TENSOR_QUANT16_SYMM,
                 OperandType::TENSOR_FLOAT16,
                 OperandType::TENSOR_BOOL8,
                 OperandType::FLOAT16,
                 OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
                 OperandType::TENSOR_QUANT16_ASYMM,
                 OperandType::TENSOR_QUANT8_SYMM,
                 OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
     });

     std::vector<Capabilities::OperandPerformance> operandPerformances;
     operandPerformances.reserve(operandsTypes.size());

     for (auto opType : operandsTypes)
     {
         operandPerformances.push_back(
                 Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo });
     }

     auto operandPerformanceTable =
                Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();

     return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
              /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
              /* operandPerformance */ std::move(operandPerformanceTable),
              /* ifPerformance */ defaultPerfInfo,
              /* whilePerformance */ defaultPerfInfo };
 }

 size_t Hash(std::vector<uint8_t>& cacheData)
 {
     std::size_t hash = cacheData.size();
     for (auto& i : cacheData)
     {
         hash = ((hash << 5) - hash) + i;
     }
     return hash;
 }

 } // anonymous namespace

 using namespace android::nn;

 namespace armnn_driver
 {

 bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
 {
     bool valid = true;

     if (*sharedHandle < 0)
     {
         return !valid;
     }

     int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
     if (dataCacheFileAccessMode != O_RDWR)
     {
         return !valid;
     }

     return valid;
 }

 GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
     const armnn::IRuntimePtr& runtime,
     const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
     const DriverOptions& options,
     const Model& model,
     const std::vector<SharedHandle>& modelCacheHandle,
     const std::vector<SharedHandle>& dataCacheHandle,
     const CacheToken& token,
     bool float32ToFloat16,
     Priority priority)
 {
     VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";

     if (!runtime)
     {
         return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
     }

     if (const auto result = validate(model); !result.ok())
     {
         return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
     }

     // Deliberately ignore any unsupported operations requested by the options -
     // at this point we're being asked to prepare a model that we've already declared support for
     // and the operation indices may be different to those in getSupportedOperations anyway.
     std::set<unsigned int> unsupportedOperations;
     ModelToINetworkTransformer modelConverter(options.GetBackends(),
                                               model,
                                               unsupportedOperations);

     if (modelConverter.GetConversionResult() != ConversionResult::Success)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
     }

     // Serialize the network graph to a .armnn file if an output directory
     // has been specified in the drivers' arguments.
     std::vector<uint8_t> dataCacheData;
     bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
     auto serializedNetworkFileName =
             SerializeNetwork(*modelConverter.GetINetwork(),
                              options.GetRequestInputsAndOutputsDumpDir(),
                              dataCacheData,
                              serializeToFile);

     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
     armnn::OptimizerOptionsOpaque OptOptions;
     OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());

     int cachedFd = -1;
     bool saveCachedNetwork = options.SaveCachedNetwork();

     unsigned int numberOfCachedModelFiles = 0;
     if (modelCacheHandle.size() > 0)
     {
         unsigned int index = 0;
         for (auto& backend : options.GetBackends())
         {
             // modelCacheHandle size should be equal to numberOfCachedModelFiles
             // modelCacheHandle vector should be in same order as backends
             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
             if (numberOfCacheFiles > 0)
             {
                 numberOfCachedModelFiles += numberOfCacheFiles;
                 // For GpuAcc numberOfCachedFiles is 1
                 if (backend == armnn::Compute::GpuAcc)
                 {
                     cachedFd = *modelCacheHandle[index];
                     saveCachedNetwork = true;
                 }
                 index += numberOfCachedModelFiles;
             }
         }
     }

     armnn::BackendOptions gpuAcc("GpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
         { "SaveCachedNetwork", saveCachedNetwork },
         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
         { "CachedFileDescriptor", cachedFd }
     });

     armnn::BackendOptions cpuAcc("CpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
         { "NumberOfThreads", options.GetNumberOfThreads() }
     });
     OptOptions.AddModelOption(gpuAcc);
     OptOptions.AddModelOption(cpuAcc);

     std::vector<std::string> errMessages;
     try
     {
         optNet = armnn::Optimize(*modelConverter.GetINetwork(),
                                  options.GetBackends(),
                                  runtime->GetDeviceSpec(),
                                  OptOptions,
                                  errMessages);
     }
     catch (std::exception& e)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
     }

     // Check that the optimized network is valid.
     if (!optNet)
     {
         std::stringstream message;
         message << "Invalid optimized network";
         for (const std::string& msg : errMessages)
         {
             message << "\n" << msg;
         }
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
     }

     // Export the optimized network graph to a dot file if an output dump directory
     // has been specified in the drivers' arguments.
     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
                                                                options.GetRequestInputsAndOutputsDumpDir());

     // Load it into the runtime.
     armnn::NetworkId netId = 0;
     std::string msg;
     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
                                                 MemorySource::Undefined,
                                                 MemorySource::Undefined,
                                                 options.IsGpuProfilingEnabled());
     auto numInputs  = getMainModel(model).inputIndexes.size();
     auto numOutputs = getMainModel(model).outputIndexes.size();
     try
     {
         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
         {
             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
         }
     }
     catch (std::exception& e)
     {
         std::stringstream message;
         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
     }

     // Now that we have a networkId for the graph rename the exported files to use it
     // so that we can associate the graph file and the input/output tensor exported files
     RenameExportedFiles(serializedNetworkFileName,
                         dotGraphFileName,
                         options.GetRequestInputsAndOutputsDumpDir(),
                         netId);

     // Cache the model
     size_t hashValue = 0;
     if (dataCacheHandle.size() == 1 )
     {
         hashValue = Hash(dataCacheData);
     }

     // Cache the model data
     if (modelCacheHandle.size() > 0)
     {
         if (modelCacheHandle.size() == numberOfCachedModelFiles)
         {
             for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
             {
                 int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
                 if (modelCacheFileAccessMode != O_RDONLY)
                 {
                     struct stat statBuffer;
                     if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
                     {
                         long modelDataSize = statBuffer.st_size;
                         if (modelDataSize > 0)
                         {
                             std::vector<uint8_t> modelData(modelDataSize);
                             pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
                             hashValue ^= Hash(modelData);
                         }
                     }
                 }
             }
         }
     }
     if (dataCacheHandle.size() == 1 && hashValue != 0)
     {
         std::vector<uint8_t> theHashValue(sizeof(hashValue));
         ::memcpy(theHashValue.data(), &hashValue, sizeof(hashValue));

         write(*dataCacheHandle[0], theHashValue.data(), theHashValue.size());
         pwrite(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), theHashValue.size());
     }

     bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
                                             options.GetBackends().end(),
                                             armnn::Compute::GpuAcc) != options.GetBackends().end());

     auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
                                                                     runtime.get(),
                                                                     model,
                                                                     options.GetRequestInputsAndOutputsDumpDir(),
                                                                     options.IsGpuProfilingEnabled(),
                                                                     priority);

     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
     // Only run this if the GpuAcc backend has been added to options
     if (std::find(options.GetBackends().begin(),
                   options.GetBackends().end(),
                   armnn::Compute::GpuAcc) != options.GetBackends().end())
     {
         if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
         {
             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
         }

         if (clTunedParameters &&
             options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
         {
             // Now that we've done one inference the CL kernel parameters will have been tuned,
             // so save the updated file.
             try
             {
                 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
             }
             catch (std::exception& error)
             {
                 VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
                              << options.GetClTunedParametersFile().c_str() << error.what();
             }
         }
     }
     return std::move(preparedModel);
 }

 GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
     const armnn::IRuntimePtr& runtime,
     const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
     const DriverOptions& options,
     const std::vector<SharedHandle>& modelCacheHandle,
     const std::vector<SharedHandle>& dataCacheHandle,
     const CacheToken& token,
     bool float32ToFloat16)
 {
     VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";

     if (!runtime)
     {
         return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
                             << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
     }

     if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                             << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
     }

     // Validate dataCacheHandle
     if (dataCacheHandle.size() != 1)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                             << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
     }

     if (!ValidateSharedHandle(dataCacheHandle[0]))
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
     }

     size_t cachedDataSize = 0;
     struct stat dataStatBuffer;
     if (fstat(*dataCacheHandle[0], &dataStatBuffer) == 0)
     {
         cachedDataSize = dataStatBuffer.st_size;
     }
     if (cachedDataSize == 0)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid cached data!";
     }

     // Check if model files cached they match the expected value
     unsigned int numberOfCachedModelFiles = 0;
     for (auto& backend : options.GetBackends())
     {
         numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
     }
     if (modelCacheHandle.size() != numberOfCachedModelFiles)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                            << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
     }

     // Read the hashValue
     std::vector<uint8_t> hashValue(sizeof(size_t));
     pread(*dataCacheHandle[0], hashValue.data(), hashValue.size(), 0);

     // Read the model
     std::vector<uint8_t> dataCacheData(cachedDataSize - hashValue.size());
     pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), hashValue.size());
     auto calculatedHashValue = Hash(dataCacheData);

     int gpuAccCachedFd = -1;
     if (modelCacheHandle.size() > 0)
     {
         unsigned int index = 0;
         for (auto& backend : options.GetBackends())
         {
             // modelCacheHandle size should be equal to numberOfCachedModelFiles
             // modelCacheHandle vector should be in same order as backends
             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
             if (numberOfCacheFiles > 0)
             {
                 if (!ValidateSharedHandle(modelCacheHandle[index]))
                 {
                     return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                             << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
                 }
                 int cachedFd = *modelCacheHandle[index];
                 struct stat statBuffer;
                 if (fstat(cachedFd, &statBuffer) == 0)
                 {
                     long modelDataSize = statBuffer.st_size;
                     if (modelDataSize > 0)
                     {
                         std::vector<uint8_t> modelData(modelDataSize);
                         pread(cachedFd, modelData.data(), modelData.size(), 0);
                         calculatedHashValue ^= Hash(modelData);

                         if (backend == armnn::Compute::GpuAcc)
                         {
                             gpuAccCachedFd = cachedFd;
                         }
                     }
                 }
                 index += numberOfCacheFiles;
             }
         }
     }

     std::vector<uint8_t> calculatedHashData(sizeof(calculatedHashValue));
     ::memcpy(calculatedHashData.data(), &calculatedHashValue, sizeof(calculatedHashValue));
     if (hashValue != calculatedHashData)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                 << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
     }

     // Deserialize the network..
     armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
     try
     {
         network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
     }
     catch (std::exception&)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
                 << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
     }

     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
     armnn::OptimizerOptionsOpaque OptOptions;
     OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());

     armnn::BackendOptions gpuAcc("GpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
         { "SaveCachedNetwork", false },
         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
         { "CachedFileDescriptor", gpuAccCachedFd }
     });

     armnn::BackendOptions cpuAcc("CpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
         { "NumberOfThreads", options.GetNumberOfThreads() }
     });
     OptOptions.AddModelOption(gpuAcc);
     OptOptions.AddModelOption(cpuAcc);

     std::vector<std::string> errMessages;
     try
     {
         optNet = armnn::Optimize(*network.get(),
                                  options.GetBackends(),
                                  runtime->GetDeviceSpec(),
                                  OptOptions,
                                  errMessages);
     }
     catch (std::exception& e)
     {
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
     }

     // Check that the optimized network is valid.
     if (!optNet)
     {
         std::stringstream message;
         message << "Invalid optimized network";
         for (const std::string& msg : errMessages)
         {
             message << "\n" << msg;
         }
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
     }

     // Export the optimized network graph to a dot file if an output dump directory
     // has been specified in the drivers' arguments.
     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
                                                                options.GetRequestInputsAndOutputsDumpDir());

     // Load it into the runtime.
     armnn::NetworkId netId = 0;
     std::string msg;
     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
                                                 MemorySource::Undefined,
                                                 MemorySource::Undefined,
                                                 options.IsGpuProfilingEnabled());
     try
     {
         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
         {
             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
         }
     }
     catch (std::exception& e)
     {
         std::stringstream message;
         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
     }

     auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
                                                       runtime.get(),
                                                       options.GetRequestInputsAndOutputsDumpDir(),
                                                       options.IsGpuProfilingEnabled(),
                                                       Priority::MEDIUM,
                                                       true);
     return std::move(preparedModel);
 }

 const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
 {
     VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
     static const Capabilities theCapabilities = GenerateCapabilities();
     return theCapabilities;
 }

 } // namespace armnn_driver
	//
	// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
	// SPDX-License-Identifier: MIT
	//

	#include "ArmnnDriverImpl.hpp"
	#include "ArmnnPreparedModel.hpp"
	#include "ModelToINetworkTransformer.hpp"
	#include "SystemPropertiesUtils.hpp"

	#include <armnnDeserializer/IDeserializer.hpp>

	#include <log/log.h>
	#include <sys/stat.h>

	namespace
	{

	Capabilities GenerateCapabilities()
	{
	VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";

	float defaultPerfValue = .1f;
	const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
	/* powerUsage */ defaultPerfValue
	};
	std::vector<OperandType> operandsTypes({
	OperandType::FLOAT32,
	OperandType::INT32,
	OperandType::UINT32,
	OperandType::TENSOR_FLOAT32,
	OperandType::TENSOR_INT32,
	OperandType::TENSOR_QUANT8_ASYMM,
	OperandType::BOOL,
	OperandType::TENSOR_QUANT16_SYMM,
	OperandType::TENSOR_FLOAT16,
	OperandType::TENSOR_BOOL8,
	OperandType::FLOAT16,
	OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
	OperandType::TENSOR_QUANT16_ASYMM,
	OperandType::TENSOR_QUANT8_SYMM,
	OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
	});

	std::vector<Capabilities::OperandPerformance> operandPerformances;
	operandPerformances.reserve(operandsTypes.size());

	for (auto opType : operandsTypes)
	{
	operandPerformances.push_back(
	Capabilities::OperandPerformance{ /* type / opType, / info */ defaultPerfInfo });
	}

	auto operandPerformanceTable =
	Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();

	return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
	/* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
	/* operandPerformance */ std::move(operandPerformanceTable),
	/* ifPerformance */ defaultPerfInfo,
	/* whilePerformance */ defaultPerfInfo };
	}

	size_t Hash(std::vector<uint8_t>& cacheData)
	{
	std::size_t hash = cacheData.size();
	for (auto& i : cacheData)
	{
	hash = ((hash << 5) - hash) + i;
	}
	return hash;
	}

	} // anonymous namespace

	using namespace android::nn;

	namespace armnn_driver
	{

	bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
	{
	bool valid = true;

	if (*sharedHandle < 0)
	{
	return !valid;
	}

	int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
	if (dataCacheFileAccessMode != O_RDWR)
	{
	return !valid;
	}

	return valid;
	}

	GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
	const armnn::IRuntimePtr& runtime,
	const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
	const DriverOptions& options,
	const Model& model,
	const std::vector<SharedHandle>& modelCacheHandle,
	const std::vector<SharedHandle>& dataCacheHandle,
	const CacheToken& token,
	bool float32ToFloat16,
	Priority priority)
	{
	VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";

	if (!runtime)
	{
	return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
	}

	if (const auto result = validate(model); !result.ok())
	{
	return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
	}

	// Deliberately ignore any unsupported operations requested by the options -
	// at this point we're being asked to prepare a model that we've already declared support for
	// and the operation indices may be different to those in getSupportedOperations anyway.
	std::set<unsigned int> unsupportedOperations;
	ModelToINetworkTransformer modelConverter(options.GetBackends(),
	model,
	unsupportedOperations);

	if (modelConverter.GetConversionResult() != ConversionResult::Success)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
	}

	// Serialize the network graph to a .armnn file if an output directory
	// has been specified in the drivers' arguments.
	std::vector<uint8_t> dataCacheData;
	bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
	auto serializedNetworkFileName =
	SerializeNetwork(*modelConverter.GetINetwork(),
	options.GetRequestInputsAndOutputsDumpDir(),
	dataCacheData,
	serializeToFile);

	// Optimize the network
	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
	armnn::OptimizerOptionsOpaque OptOptions;
	OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
	OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());

	int cachedFd = -1;
	bool saveCachedNetwork = options.SaveCachedNetwork();

	unsigned int numberOfCachedModelFiles = 0;
	if (modelCacheHandle.size() > 0)
	{
	unsigned int index = 0;
	for (auto& backend : options.GetBackends())
	{
	// modelCacheHandle size should be equal to numberOfCachedModelFiles
	// modelCacheHandle vector should be in same order as backends
	auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
	if (numberOfCacheFiles > 0)
	{
	numberOfCachedModelFiles += numberOfCacheFiles;
	// For GpuAcc numberOfCachedFiles is 1
	if (backend == armnn::Compute::GpuAcc)
	{
	cachedFd = *modelCacheHandle[index];
	saveCachedNetwork = true;
	}
	index += numberOfCachedModelFiles;
	}
	}
	}

	armnn::BackendOptions gpuAcc("GpuAcc",
	{
	{ "FastMathEnabled", options.IsFastMathEnabled() },
	{ "SaveCachedNetwork", saveCachedNetwork },
	{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
	{ "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
	{ "CachedFileDescriptor", cachedFd }
	});

	armnn::BackendOptions cpuAcc("CpuAcc",
	{
	{ "FastMathEnabled", options.IsFastMathEnabled() },
	{ "NumberOfThreads", options.GetNumberOfThreads() }
	});
	OptOptions.AddModelOption(gpuAcc);
	OptOptions.AddModelOption(cpuAcc);

	std::vector<std::string> errMessages;
	try
	{
	optNet = armnn::Optimize(*modelConverter.GetINetwork(),
	options.GetBackends(),
	runtime->GetDeviceSpec(),
	OptOptions,
	errMessages);
	}
	catch (std::exception& e)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
	}

	// Check that the optimized network is valid.
	if (!optNet)
	{
	std::stringstream message;
	message << "Invalid optimized network";
	for (const std::string& msg : errMessages)
	{
	message << "\n" << msg;
	}
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
	}

	// Export the optimized network graph to a dot file if an output dump directory
	// has been specified in the drivers' arguments.
	std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
	options.GetRequestInputsAndOutputsDumpDir());

	// Load it into the runtime.
	armnn::NetworkId netId = 0;
	std::string msg;
	armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
	MemorySource::Undefined,
	MemorySource::Undefined,
	options.IsGpuProfilingEnabled());
	auto numInputs = getMainModel(model).inputIndexes.size();
	auto numOutputs = getMainModel(model).outputIndexes.size();
	try
	{
	if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
	}
	}
	catch (std::exception& e)
	{
	std::stringstream message;
	message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
	}

	// Now that we have a networkId for the graph rename the exported files to use it
	// so that we can associate the graph file and the input/output tensor exported files
	RenameExportedFiles(serializedNetworkFileName,
	dotGraphFileName,
	options.GetRequestInputsAndOutputsDumpDir(),
	netId);

	// Cache the model
	size_t hashValue = 0;
	if (dataCacheHandle.size() == 1 )
	{
	hashValue = Hash(dataCacheData);
	}

	// Cache the model data
	if (modelCacheHandle.size() > 0)
	{
	if (modelCacheHandle.size() == numberOfCachedModelFiles)
	{
	for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
	{
	int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
	if (modelCacheFileAccessMode != O_RDONLY)
	{
	struct stat statBuffer;
	if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
	{
	long modelDataSize = statBuffer.st_size;
	if (modelDataSize > 0)
	{
	std::vector<uint8_t> modelData(modelDataSize);
	pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
	hashValue ^= Hash(modelData);
	}
	}
	}
	}
	}
	}
	if (dataCacheHandle.size() == 1 && hashValue != 0)
	{
	std::vector<uint8_t> theHashValue(sizeof(hashValue));
	::memcpy(theHashValue.data(), &hashValue, sizeof(hashValue));

	write(*dataCacheHandle[0], theHashValue.data(), theHashValue.size());
	pwrite(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), theHashValue.size());
	}

	bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
	options.GetBackends().end(),
	armnn::Compute::GpuAcc) != options.GetBackends().end());

	auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
	runtime.get(),
	model,
	options.GetRequestInputsAndOutputsDumpDir(),
	options.IsGpuProfilingEnabled(),
	priority);

	// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
	// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
	// Only run this if the GpuAcc backend has been added to options
	if (std::find(options.GetBackends().begin(),
	options.GetBackends().end(),
	armnn::Compute::GpuAcc) != options.GetBackends().end())
	{
	if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
	}

	if (clTunedParameters &&
	options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
	{
	// Now that we've done one inference the CL kernel parameters will have been tuned,
	// so save the updated file.
	try
	{
	clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
	}
	catch (std::exception& error)
	{
	VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
	<< options.GetClTunedParametersFile().c_str() << error.what();
	}
	}
	}
	return std::move(preparedModel);
	}

	GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
	const armnn::IRuntimePtr& runtime,
	const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
	const DriverOptions& options,
	const std::vector<SharedHandle>& modelCacheHandle,
	const std::vector<SharedHandle>& dataCacheHandle,
	const CacheToken& token,
	bool float32ToFloat16)
	{
	VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";

	if (!runtime)
	{
	return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
	<< "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
	}

	if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
	<< "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
	}

	// Validate dataCacheHandle
	if (dataCacheHandle.size() != 1)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
	<< "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
	}

	if (!ValidateSharedHandle(dataCacheHandle[0]))
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
	<< "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
	}

	size_t cachedDataSize = 0;
	struct stat dataStatBuffer;
	if (fstat(*dataCacheHandle[0], &dataStatBuffer) == 0)
	{
	cachedDataSize = dataStatBuffer.st_size;
	}
	if (cachedDataSize == 0)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
	<< "ArmnnDriverImpl::prepareModelFromCache(): Not valid cached data!";
	}

	// Check if model files cached they match the expected value
	unsigned int numberOfCachedModelFiles = 0;
	for (auto& backend : options.GetBackends())
	{
	numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
	}
	if (modelCacheHandle.size() != numberOfCachedModelFiles)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
	<< "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
	}

	// Read the hashValue
	std::vector<uint8_t> hashValue(sizeof(size_t));
	pread(*dataCacheHandle[0], hashValue.data(), hashValue.size(), 0);

	// Read the model
	std::vector<uint8_t> dataCacheData(cachedDataSize - hashValue.size());
	pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), hashValue.size());
	auto calculatedHashValue = Hash(dataCacheData);

	int gpuAccCachedFd = -1;
	if (modelCacheHandle.size() > 0)
	{
	unsigned int index = 0;
	for (auto& backend : options.GetBackends())
	{
	// modelCacheHandle size should be equal to numberOfCachedModelFiles
	// modelCacheHandle vector should be in same order as backends
	auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
	if (numberOfCacheFiles > 0)
	{
	if (!ValidateSharedHandle(modelCacheHandle[index]))
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
	<< "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
	}
	int cachedFd = *modelCacheHandle[index];
	struct stat statBuffer;
	if (fstat(cachedFd, &statBuffer) == 0)
	{
	long modelDataSize = statBuffer.st_size;
	if (modelDataSize > 0)
	{
	std::vector<uint8_t> modelData(modelDataSize);
	pread(cachedFd, modelData.data(), modelData.size(), 0);
	calculatedHashValue ^= Hash(modelData);

	if (backend == armnn::Compute::GpuAcc)
	{
	gpuAccCachedFd = cachedFd;
	}
	}
	}
	index += numberOfCacheFiles;
	}
	}
	}

	std::vector<uint8_t> calculatedHashData(sizeof(calculatedHashValue));
	::memcpy(calculatedHashData.data(), &calculatedHashValue, sizeof(calculatedHashValue));
	if (hashValue != calculatedHashData)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
	<< "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
	}

	// Deserialize the network..
	armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
	try
	{
	network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
	}
	catch (std::exception&)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
	<< "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
	}

	// Optimize the network
	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
	armnn::OptimizerOptionsOpaque OptOptions;
	OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
	OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());

	armnn::BackendOptions gpuAcc("GpuAcc",
	{
	{ "FastMathEnabled", options.IsFastMathEnabled() },
	{ "SaveCachedNetwork", false },
	{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
	{ "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
	{ "CachedFileDescriptor", gpuAccCachedFd }
	});

	armnn::BackendOptions cpuAcc("CpuAcc",
	{
	{ "FastMathEnabled", options.IsFastMathEnabled() },
	{ "NumberOfThreads", options.GetNumberOfThreads() }
	});
	OptOptions.AddModelOption(gpuAcc);
	OptOptions.AddModelOption(cpuAcc);

	std::vector<std::string> errMessages;
	try
	{
	optNet = armnn::Optimize(*network.get(),
	options.GetBackends(),
	runtime->GetDeviceSpec(),
	OptOptions,
	errMessages);
	}
	catch (std::exception& e)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
	}

	// Check that the optimized network is valid.
	if (!optNet)
	{
	std::stringstream message;
	message << "Invalid optimized network";
	for (const std::string& msg : errMessages)
	{
	message << "\n" << msg;
	}
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
	}

	// Export the optimized network graph to a dot file if an output dump directory
	// has been specified in the drivers' arguments.
	std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
	options.GetRequestInputsAndOutputsDumpDir());

	// Load it into the runtime.
	armnn::NetworkId netId = 0;
	std::string msg;
	armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
	MemorySource::Undefined,
	MemorySource::Undefined,
	options.IsGpuProfilingEnabled());
	try
	{
	if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
	{
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
	}
	}
	catch (std::exception& e)
	{
	std::stringstream message;
	message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
	}

	auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
	runtime.get(),
	options.GetRequestInputsAndOutputsDumpDir(),
	options.IsGpuProfilingEnabled(),
	Priority::MEDIUM,
	true);
	return std::move(preparedModel);
	}

	const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
	{
	VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
	static const Capabilities theCapabilities = GenerateCapabilities();
	return theCapabilities;
	}

	} // namespace armnn_driver