blob: 5cb23d38bc7d809803fe9865939726d880b87361 [file] [log] [blame]
/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BERBERIS_INTRINSICS_SIMD_REGISTER_H_
#define BERBERIS_INTRINSICS_SIMD_REGISTER_H_
#include <stdint.h>
#include <string.h>
#include "berberis/intrinsics/intrinsics_float.h"
namespace berberis {
class SIMD128Register;
/*
* We want to use partial specialization for SIMD128Register::[GS]et, but it's
* it's not allowed for class members. Use helper functions instead.
*/
template <typename T>
T SIMD128RegisterGet(const SIMD128Register* reg, int index);
template <typename T>
T SIMD128RegisterSet(SIMD128Register* reg, T elem, int index);
class SIMD128Register {
public:
template <typename T>
explicit SIMD128Register(T elem) {
Set<T>(elem, 0);
}
SIMD128Register() = default;
SIMD128Register(const SIMD128Register&) = default;
SIMD128Register(SIMD128Register&&) = default;
SIMD128Register& operator=(const SIMD128Register&) = default;
SIMD128Register& operator=(SIMD128Register&&) = default;
template <typename T>
T Get(int index) const {
return SIMD128RegisterGet<T>(this, index);
}
template <typename T>
T Set(T elem, int index) {
return SIMD128RegisterSet<T>(this, elem, index);
}
template <typename T>
auto Get() const -> std::enable_if_t<sizeof(T) == 16, T> {
return SIMD128RegisterGet<T>(this, 0);
}
template <typename T>
auto Set(T elem) -> std::enable_if_t<sizeof(T) == 16, T> {
return SIMD128RegisterSet<T>(this, elem, 0);
}
private:
union {
#ifdef __GNUC__
// Note: we are violating strict aliasing rules in the code below (Get and Set function) thus we
// need to mask these fields "may_alias". Unknown attributes could be silently ignored by the
// compiler. We protect definitions with #ifdef __GNU__ to make sure may_alias is not ignored.
[[gnu::vector_size(16), gnu::may_alias]] int8_t int8;
[[gnu::vector_size(16), gnu::may_alias]] uint8_t uint8;
[[gnu::vector_size(16), gnu::may_alias]] int16_t int16;
[[gnu::vector_size(16), gnu::may_alias]] uint16_t uint16;
[[gnu::vector_size(16), gnu::may_alias]] int32_t int32;
[[gnu::vector_size(16), gnu::may_alias]] uint32_t uint32;
[[gnu::vector_size(16), gnu::may_alias]] int64_t int64;
[[gnu::vector_size(16), gnu::may_alias]] uint64_t uint64;
#if defined(__x86_64)
[[gnu::vector_size(16), gnu::may_alias]] __uint128_t uint128;
#endif
// Note: we couldn't use Float32/Float64 here because [[gnu::vector]] only works with
// raw integer or FP-types.
[[gnu::vector_size(16), gnu::may_alias]] float float32;
[[gnu::vector_size(16), gnu::may_alias]] double float64;
#else
#error Unsupported compiler.
#endif
};
template <typename T>
friend T SIMD128RegisterGet(const SIMD128Register* reg, int index);
template <typename T>
friend T SIMD128RegisterSet(SIMD128Register* reg, T elem, int index);
};
static_assert(sizeof(SIMD128Register) == 16, "Unexpected size of SIMD128Register");
#if defined(__i386__)
static_assert(alignof(SIMD128Register) == 16, "Unexpected align of SIMD128Register");
#elif defined(__x86_64)
static_assert(alignof(SIMD128Register) == 16, "Unexpected align of SIMD128Register");
#else
#error Unsupported architecture
#endif
/*
* Partial specializations of SIMD128Register getters/setters for most types
*
* GNU C makes it possible to use unions to quickly and efficiently
* operate with subvalues of different types:
* http://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#Type-punning
* Unfortunately it's not a valid ANSI C code thus we always do that via
* Get<type>(index) and Set<type>(value, index) accessors.
*
* For other compilers one will need to use memcpy to guarantee safety.
*/
#ifdef __GNUC__
#define SIMD_128_REGISTER_GETTER_SETTER(TYPE, MEMBER) \
template <> \
inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) { \
CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \
return reg->MEMBER[index]; \
} \
template <> \
inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \
CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \
return reg->MEMBER[index] = elem; \
}
#define SIMD_128_REGISTER_GETTER_SETTЕR(TYPE, MEMBER_TYPE, MEMBER) \
template <> \
inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) { \
CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \
static_assert(sizeof(TYPE) == sizeof(MEMBER_TYPE)); \
/* Don't use bit_cast because it's unsafe if -O0 is used. */ \
/* See intrinsics_float.h for explanation. */ \
TYPE elem; \
MEMBER_TYPE melem; \
melem = reg->MEMBER[index]; \
memcpy(&elem, &melem, sizeof(TYPE)); \
return elem; \
} \
template <> \
inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \
CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \
static_assert(sizeof(TYPE) == sizeof(MEMBER_TYPE)); \
/* Don't use bit_cast because it's unsafe if -O0 is used. */ \
/* See intrinsics_float.h for explanation. */ \
MEMBER_TYPE melem; \
memcpy(&melem, &elem, sizeof(TYPE)); \
reg->MEMBER[index] = melem; \
return elem; \
}
#endif
SIMD_128_REGISTER_GETTER_SETTER(int8_t, int8);
SIMD_128_REGISTER_GETTER_SETTER(uint8_t, uint8);
SIMD_128_REGISTER_GETTER_SETTER(int16_t, int16);
SIMD_128_REGISTER_GETTER_SETTER(uint16_t, uint16);
SIMD_128_REGISTER_GETTER_SETTER(int32_t, int32);
SIMD_128_REGISTER_GETTER_SETTER(uint32_t, uint32);
SIMD_128_REGISTER_GETTER_SETTER(int64_t, int64);
SIMD_128_REGISTER_GETTER_SETTER(uint64_t, uint64);
#if defined(__x86_64)
SIMD_128_REGISTER_GETTER_SETTER(__uint128_t, uint128);
#endif
SIMD_128_REGISTER_GETTER_SETTЕR(intrinsics::Float32, float, float32);
SIMD_128_REGISTER_GETTER_SETTЕR(intrinsics::Float64, double, float64);
#undef SIMD_128_REGISTER_GETTER_SETTER
} // namespace berberis
#endif // BERBERIS_INTRINSICS_SIMD_REGISTER_H_