blob: 3287de2aeb2cb8632bb2c728aafdeb09879d9167 [file] [log] [blame]
/* Copyright 2019 Google LLC. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_PATH_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_PATH_H_
#include <cstdint>
#include "size_util.h"
// Detect ARM, 32-bit or 64-bit
#ifdef __aarch64__
#define RUY_ARM_64
#elif defined(__arm__)
#define RUY_ARM_32
#endif
// Detect NEON.
#if (defined __ARM_NEON) || (defined __ARM_NEON__)
#define RUY_NEON
#endif
// Define 32bit ARM NEON and 64 bit ARM NEON
#if defined(RUY_NEON) && defined(RUY_ARM_32)
#define RUY_NEON_32
#endif
#if defined(RUY_NEON) && defined(RUY_ARM_64)
#define RUY_NEON_64
#endif
namespace ruy {
// A Path is a choice of implementation path, e.g. between reference code
// and optimized code, or between different optimized code paths using different
// instruction sets.
//
// It's important that any symbol that depends on such implementation
// details, is somehow templatized in such a Path, so that different Path values
// yield different symbols, so we never have the situation where a symbols has
// multiple inequivalent definitions based on which code paths are compiled.
// That would be a violation of the ODR (One Definition Rule) which is Undefined
// Behavior, and one of the most serious issues plaguing both Eigen and
// gemmlowp.
//
// This enum is actually a bit-field: aside from kNone, all other values are
// powers of two, thus are one bit each. We define bit-wise operators below
// for this enum. Some places in Ruy accept a Path bit-field where multiple
// Paths may be selected, while some other places require a single Path (i.e.
// just one of the enum values here). Typically, user-facing parts of Ruy
// accept arbitrary bit-fields, allowing the user to compile support for
// multiple paths and to inform Ruy of all the paths that are to be enabled
// at runtime; then, typically in dispatch.h, we internally pick one
// specific path and from there on, internal Ruy code deals with only one
// path.
//
// When a user selects a set of compiled paths, Ruy internally dispatches to the
// "best" one, which typically means the newest optimized instructions for a
// given base architecture (such as ARM). Higher values of this enum correspond
// to "better" code paths within a given base architecture for which Ruy has
// optimized code paths.
enum class Path : std::uint8_t {
// This is a special null value, representing the absence of any path.
kNone = 0,
// Reference multiplication code.
// The main purpose of this path is to have a very simple standalone Mul
// implementation to check against.
// This path bypasses almost all of Ruy's internal implementation details.
//
// This is intended for testing/development.
kReference = 0x1,
// Standard C++ implementation of Ruy's architecture-specific parts.
// Unlike Path::kReference, this path exercises most of Ruy's internal logic.
//
// This is intended for testing/development.
kStandardCpp = 0x2,
// Optimized path using a widely available subset of ARM NEON instructions.
kNeon = 0x4,
// Optimized path making use of ARM NEON dot product instructions that are
// available on newer ARM cores.
kNeonDotprod = 0x8,
};
inline constexpr Path operator|(Path p, Path q) {
return static_cast<Path>(static_cast<std::uint32_t>(p) |
static_cast<std::uint32_t>(q));
}
inline constexpr Path operator&(Path p, Path q) {
return static_cast<Path>(static_cast<std::uint32_t>(p) &
static_cast<std::uint32_t>(q));
}
inline constexpr Path operator^(Path p, Path q) {
return static_cast<Path>(static_cast<std::uint32_t>(p) ^
static_cast<std::uint32_t>(q));
}
inline constexpr Path operator~(Path p) {
return static_cast<Path>(~static_cast<std::uint32_t>(p));
}
inline Path GetMostSignificantPath(Path path_mask) {
return static_cast<Path>(round_down_pot(static_cast<int>(path_mask)));
}
// ruy::kAllPaths represents all Path's that make sense to on a given
// base architecture.
#ifdef __linux__
#ifdef RUY_NEON_64
constexpr Path kAllPaths =
Path::kReference | Path::kStandardCpp | Path::kNeon | Path::kNeonDotprod;
#elif defined RUY_NEON_32
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon;
#else
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp;
#endif // RUY_NEON_64
#else // __linux__
// We don't know how to do runtime dotprod detection outside of linux for now.
#if defined(RUY_NEON_64) || defined(RUY_NEON_32)
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon;
#else
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp;
#endif // defined(RUY_NEON_64) || defined(RUY_NEON_32)
#endif // __linux__
} // namespace ruy
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_PATH_H_