IVGCVSW-2773 Integrate new CL tuner into ArmNN
!armnn:1037
Change-Id: Ice9c0fba405859d97aa848daa67c609166f2a8dc
Signed-off-by: Ruomei Yan <ruomei.yan@arm.com>
diff --git a/ArmnnDevice.cpp b/ArmnnDevice.cpp
index 60bfa74..216b010 100644
--- a/ArmnnDevice.cpp
+++ b/ArmnnDevice.cpp
@@ -57,7 +57,8 @@
armnn::IRuntime::CreationOptions options;
if (!m_Options.GetClTunedParametersFile().empty())
{
- m_ClTunedParameters = armnn::IGpuAccTunedParameters::Create(m_Options.GetClTunedParametersMode());
+ m_ClTunedParameters = armnn::IGpuAccTunedParameters::Create(m_Options.GetClTunedParametersMode(),
+ m_Options.GetClTuningLevel());
try
{
m_ClTunedParameters->Load(m_Options.GetClTunedParametersFile().c_str());
diff --git a/DriverOptions.cpp b/DriverOptions.cpp
index cd4b6bf..6615e51 100644
--- a/DriverOptions.cpp
+++ b/DriverOptions.cpp
@@ -32,6 +32,7 @@
: m_Backends({computeDevice})
, m_VerboseLogging(false)
, m_ClTunedParametersMode(armnn::IGpuAccTunedParameters::Mode::UseTunedParameters)
+ , m_ClTuningLevel(armnn::IGpuAccTunedParameters::TuningLevel::Rapid)
, m_EnableGpuProfiling(false)
, m_fp16Enabled(fp16Enabled)
{
@@ -41,6 +42,7 @@
: m_Backends(backends)
, m_VerboseLogging(false)
, m_ClTunedParametersMode(armnn::IGpuAccTunedParameters::Mode::UseTunedParameters)
+ , m_ClTuningLevel(armnn::IGpuAccTunedParameters::TuningLevel::Rapid)
, m_EnableGpuProfiling(false)
, m_fp16Enabled(fp16Enabled)
{
@@ -49,6 +51,7 @@
DriverOptions::DriverOptions(int argc, char** argv)
: m_VerboseLogging(false)
, m_ClTunedParametersMode(armnn::IGpuAccTunedParameters::Mode::UseTunedParameters)
+ , m_ClTuningLevel(armnn::IGpuAccTunedParameters::TuningLevel::Rapid)
, m_EnableGpuProfiling(false)
, m_fp16Enabled(false)
{
@@ -56,6 +59,7 @@
std::string unsupportedOperationsAsString;
std::string clTunedParametersModeAsString;
+ std::string clTuningLevelAsString;
po::options_description optionsDesc("Options");
optionsDesc.add_options()
@@ -89,6 +93,13 @@
"If 'UpdateTunedParameters', will also find the optimum parameters when preparing new networks and update "
"the file accordingly.")
+ ("cl-tuning-level,o",
+ po::value<std::string>(&clTuningLevelAsString)->default_value("rapid"),
+ "exhaustive: all lws values are tested "
+ "normal: reduced number of lws values but enough to still have the performance really close to the "
+ "exhaustive approach "
+ "rapid: only 3 lws values should be tested for each kernel ")
+
("gpu-profiling,p",
po::bool_switch(&m_EnableGpuProfiling),
"Turns GPU profiling on")
@@ -166,6 +177,24 @@
ALOGW("Requested unknown cl-tuned-parameters-mode '%s'. Defaulting to UseTunedParameters",
clTunedParametersModeAsString.c_str());
}
+
+ if (clTuningLevelAsString == "exhaustive")
+ {
+ m_ClTuningLevel = armnn::IGpuAccTunedParameters::TuningLevel::Exhaustive;
+ }
+ else if (clTuningLevelAsString == "normal")
+ {
+ m_ClTuningLevel = armnn::IGpuAccTunedParameters::TuningLevel::Normal;
+ }
+ else if (clTuningLevelAsString == "rapid")
+ {
+ m_ClTuningLevel = armnn::IGpuAccTunedParameters::TuningLevel::Rapid;
+ }
+ else
+ {
+ ALOGW("Requested unknown cl-tuner-mode '%s'. Defaulting to rapid",
+ clTuningLevelAsString.c_str());
+ }
}
}
diff --git a/DriverOptions.hpp b/DriverOptions.hpp
index 637ccd6..895af57 100644
--- a/DriverOptions.hpp
+++ b/DriverOptions.hpp
@@ -28,6 +28,7 @@
const std::set<unsigned int>& GetForcedUnsupportedOperations() const { return m_ForcedUnsupportedOperations; }
const std::string& GetClTunedParametersFile() const { return m_ClTunedParametersFile; }
armnn::IGpuAccTunedParameters::Mode GetClTunedParametersMode() const { return m_ClTunedParametersMode; }
+ armnn::IGpuAccTunedParameters::TuningLevel GetClTuningLevel() const { return m_ClTuningLevel; }
bool IsGpuProfilingEnabled() const { return m_EnableGpuProfiling; }
bool GetFp16Enabled() const { return m_fp16Enabled; }
@@ -38,6 +39,7 @@
std::set<unsigned int> m_ForcedUnsupportedOperations;
std::string m_ClTunedParametersFile;
armnn::IGpuAccTunedParameters::Mode m_ClTunedParametersMode;
+ armnn::IGpuAccTunedParameters::TuningLevel m_ClTuningLevel;
bool m_EnableGpuProfiling;
bool m_fp16Enabled;
};