blob: d3c5b06307d130c5999809b0a63127894c4ddc7e [file] [log] [blame]
/* Copyright 2019 Google LLC. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef RUY_RUY_PATH_H_
#define RUY_RUY_PATH_H_
#include <cstdint>
#include "ruy/platform.h"
#include "ruy/size_util.h"
namespace ruy {
// A Path is an implementation path, typically corresponding to a SIMD
// instruction set being targetted. For example, on the ARM architecture,
// Path::kNeon means using NEON instructions, and Path::kNeonDotprod means
// also using the newer NEON dot-product instructions.
//
// Different Path enum values are defined on different CPU architectures,
// corresponding to different SIMD ISA extensions available there.
//
// Path::kStandardCpp is the one Path that is always available.
//
// Path enum values are bits and may be OR-ed to form "sets of Paths".
// Ruy entry points such as ruy::Mul either implicitly use such a set of Paths,
// or allow passing an explicit one as a template parameter. The meaning of such
// an OR-ed Path combination is "compile all of
// these paths; which path is used will be determined at runtime". This is why
// for most users, it is enough to call ruy::Mul(...), which will compile a
// reasonable selection of paths for the target CPU architecture's various
// SIMD ISA extensions, and let ruy determine at runtime which one to use.
// Internally, after the actual path has been resolved, ruy's internal functions
// templatized on a Path tend to require that to be a single bit.
//
// An element of ruy's internal design was to allow for code compiled for
// multiple such paths to coexist without violating the C++ One Definition Rule
// (ODR). This is achieved by having all ruy internal functions, whose
// definition depends on a choice of Path, be templatized on a Path, so that
// each path-specific specialization is a separate symbol. There is never
// a need to compile ruy code with different compilation flags to enable
// different SIMD extensions and dispatch at runtime between them, as this is
// taken care of internally by ruy in an ODR-correct way.
enum class Path : std::uint8_t {
// This is a special null value, representing the absence of any path.
kNone = 0,
// Standard C++ implementation of Ruy's architecture-specific parts.
//
// This is intended for testing/development, and as a fallback for when
// the SIMD ISA extensions required by other paths are unavailable at runtime.
kStandardCpp = 0x1,
// Internal, test-only variants of StandardCpp used to exercise more corners
// of internal ruy logic.
// They are intentionally omitted from ruy::kAllPaths and ruy::kNonArchPaths,
// and are only ever used in dedicated ruy tests explicitly referencing them.
kInternalStandardCppVariant1 = 0x2,
kInternalStandardCppVariant2 = 0x4,
kInternalStandardCppVariant3 = 0x8,
#if RUY_PLATFORM_ARM
// Optimized path using a widely available subset of ARM NEON instructions.
kNeon = 0x10,
// Optimized path making use of ARM NEON dot product instructions that are
// available on newer ARM cores.
kNeonDotprod = 0x20,
#endif // RUY_PLATFORM_ARM
#if RUY_PLATFORM_X86
// Optimized for AVX
// Compiled with -mavx
kAvx = 0x10,
// Optimized for AVX2+FMA.
// Compiled with -mavx2 -mfma.
kAvx2Fma = 0x20,
// Optimized for AVX-512.
// Compiled with -mavx512f -mavx512vl -mavx512cd -mavx512bw -mavx512dq.
kAvx512 = 0x40,
#endif // RUY_PLATFORM_X86
};
inline constexpr Path operator|(Path p, Path q) {
return static_cast<Path>(static_cast<std::uint32_t>(p) |
static_cast<std::uint32_t>(q));
}
inline constexpr Path operator&(Path p, Path q) {
return static_cast<Path>(static_cast<std::uint32_t>(p) &
static_cast<std::uint32_t>(q));
}
inline constexpr Path operator^(Path p, Path q) {
return static_cast<Path>(static_cast<std::uint32_t>(p) ^
static_cast<std::uint32_t>(q));
}
inline constexpr Path operator~(Path p) {
return static_cast<Path>(~static_cast<std::uint32_t>(p));
}
inline constexpr bool Disjoint(Path p, Path q) {
return (p & q) == Path::kNone;
}
inline Path GetMostSignificantPath(Path path_mask) {
return static_cast<Path>(round_down_pot(static_cast<int>(path_mask)));
}
// We define three disjoint sets of paths.
//
// kNonArchPaths is the set of paths that are defined regardless of
// the CPU architecture (excluding some internal test-only paths).
// These paths are slow, but portable. At the moment,
// that is only kStandardCpp. In the past, that used to also include a
// kReference path providing an even more basic implementation, but that has
// been split out into a separate library, see the ReferenceMul function.
constexpr Path kNonArchPaths = Path::kStandardCpp;
// The other two are specific to each CPU architecture. Note that these sets
// do NOT include a fallback for when none of these architecture paths are
// supported at runtime by the CPU. For that, see the other constants defined
// further below.
//
// kDefaultArchPaths is the set of architecture-specific paths that
// we recommend for most users. It is part of kDefaultPaths defined
// below.
//
// kExtraArchPaths is the set of all other architecture-specific paths
// that for whatever reason we're not recommending to most users at the moment.
// Typically that would include work-in-progress paths, or paths targeting
// minority hardware that isn't the best compromise of code size to performance
// for most users.
#if RUY_PLATFORM_NEON_64
constexpr Path kDefaultArchPaths = Path::kNeon | Path::kNeonDotprod;
constexpr Path kExtraArchPaths = Path::kNone;
#elif RUY_PLATFORM_NEON_32
constexpr Path kDefaultArchPaths = Path::kNeon;
constexpr Path kExtraArchPaths = Path::kNone;
#elif RUY_PLATFORM_X86
constexpr Path kDefaultArchPaths = Path::kAvx | Path::kAvx2Fma | Path::kAvx512;
constexpr Path kExtraArchPaths = Path::kNone;
#else
constexpr Path kDefaultArchPaths = Path::kNone;
constexpr Path kExtraArchPaths = Path::kNone;
#endif
// kNonArchPathsIncludingInternalVariants is the set of all
// non-architecture-specific paths without exception. This includes some paths
// that are internal-only and test-only and not useful to any user.
static constexpr Path kNonArchPathsIncludingInternalVariants =
kNonArchPaths | Path::kInternalStandardCppVariant1 |
Path::kInternalStandardCppVariant2 | Path::kInternalStandardCppVariant3;
// Enforce that kDefaultArchPaths, kExtraArchPaths and
// kNonArchPathsIncludingInternalVariants are mutually disjoint,
// and that kNonArchPaths is a subset of kNonArchPathsIncludingInternalVariants.
static_assert(Disjoint(kDefaultArchPaths, kExtraArchPaths), "");
static_assert(Disjoint(kDefaultArchPaths,
kNonArchPathsIncludingInternalVariants),
"");
static_assert(Disjoint(kExtraArchPaths, kNonArchPathsIncludingInternalVariants),
"");
static_assert(Disjoint(kNonArchPaths, ~kNonArchPathsIncludingInternalVariants),
"");
// We now define two aggregate sets of paths for convenience, including
// both architecture-specific paths and some portable fallbacks.
//
// kDefaultPaths is the set of paths that we recommend most users to use.
// It is what ruy::Mul(...), the entry point not taking an explicit Path value,
// uses.
constexpr Path kDefaultPaths = Path::kStandardCpp | kDefaultArchPaths;
// kAllPaths is the set of all paths that are available to compile, except
// some some internal test-only paths that no user would ever want to use.
// In addition to the Default paths, it also includes the extra
// architecture paths, as well as any other non-arch path besides kStandardCpp
// (there is none at the moment).
constexpr Path kAllPaths = kNonArchPaths | kDefaultArchPaths | kExtraArchPaths;
// kAllPathsIncludingInternalVariants is the set of all paths without exception.
// This includes some paths that are internal-only and test-only and not useful
// to any user.
static constexpr Path kAllPathsIncludingInternalVariants =
kAllPaths | kNonArchPathsIncludingInternalVariants;
static_assert(Disjoint(kDefaultPaths, ~kAllPaths), "");
static_assert(Disjoint(kAllPaths, ~kAllPathsIncludingInternalVariants), "");
} // namespace ruy
#endif // RUY_RUY_PATH_H_