Add CountRZero function to bit_util.h
And use it to implement vfirst.m instruction.
Test: berberis_all
Change-Id: Id5680fe76e23fb1349fcc1c649a6817c033e3b96
diff --git a/base/bit_util_test.cc b/base/bit_util_test.cc
index bd96fb5..10ce4ce 100644
--- a/base/bit_util_test.cc
+++ b/base/bit_util_test.cc
@@ -56,19 +56,34 @@
static_assert(BitUtilLog2(16) == 4);
static_assert(BitUtilLog2(sizeof(void*)) > 0);
-static_assert(Popcount(~int32_t{1}) == 31);
-static_assert(Popcount(RawInt32{~Int32{1}}) == RawInt32{31});
-static_assert(Popcount(SatInt32{~Int32{1}}) == SatInt32{31});
-static_assert(Popcount(~Int32{1}) == Int32{31});
-static_assert(Popcount(~int64_t{1}) == 63);
-static_assert(Popcount(RawInt64{~Int64{1}}) == RawInt64{63});
-static_assert(Popcount(SatInt64{~Int64{1}}) == SatInt64{63});
-static_assert(Popcount(~Int64{1}) == Int64{63});
+static_assert(CountRZero(~uint32_t{1}) == 1);
+static_assert(CountRZero(RawInt32{~UInt32{1}}) == RawInt32{1});
+static_assert(CountRZero(SatUInt32{~Int32{1}}) == SatUInt32{1});
+static_assert(CountRZero(~UInt32{1}) == UInt32{1});
+static_assert(CountRZero(~uint64_t{1}) == 1);
+static_assert(CountRZero(RawInt64{~UInt64{1}}) == RawInt64{1});
+static_assert(CountRZero(SatUInt64{~Int64{1}}) == SatUInt64{1});
+static_assert(CountRZero(~UInt64{1}) == UInt64{1});
#if defined(__x86_64__)
-static_assert(Popcount(~__int128_t{1}) == 127);
-static_assert(Popcount(RawInt128{~Int128{1}}) == RawInt128{127});
-static_assert(Popcount(SatInt128{~Int128{1}}) == SatInt128{127});
-static_assert(Popcount(~Int128{1}) == Int128{127});
+static_assert(CountRZero(~static_cast<unsigned __int128>(1) << 64) == 65);
+static_assert(CountRZero(RawInt128{~UInt128{1}}) == RawInt128{1});
+static_assert(CountRZero(SatUInt128{~Int128{1}}) == SatUInt128{1});
+static_assert(CountRZero(~UInt128{1} << UInt128{64}) == UInt128{65});
+#endif
+
+static_assert(Popcount(~uint32_t{1}) == 31);
+static_assert(Popcount(RawInt32{~UInt32{1}}) == RawInt32{31});
+static_assert(Popcount(SatUInt32{~Int32{1}}) == SatUInt32{31});
+static_assert(Popcount(~UInt32{1}) == UInt32{31});
+static_assert(Popcount(~uint64_t{1}) == 63);
+static_assert(Popcount(RawInt64{~UInt64{1}}) == RawInt64{63});
+static_assert(Popcount(SatUInt64{~Int64{1}}) == SatUInt64{63});
+static_assert(Popcount(~UInt64{1}) == UInt64{63});
+#if defined(__x86_64__)
+static_assert(Popcount(~static_cast<unsigned __int128>(1)) == 127);
+static_assert(Popcount(RawInt128{~UInt128{1}}) == RawInt128{127});
+static_assert(Popcount(SatUInt128{~Int128{1}}) == SatUInt128{127});
+static_assert(Popcount(~UInt128{1}) == UInt128{127});
#endif
static_assert(Add(SatInt8{126}, SatInt8{1}) == std::tuple{SatInt8{127}, false});
diff --git a/base/include/berberis/base/bit_util.h b/base/include/berberis/base/bit_util.h
index 0ed05a3..19be49c 100644
--- a/base/include/berberis/base/bit_util.h
+++ b/base/include/berberis/base/bit_util.h
@@ -180,6 +180,7 @@
// TODO(b/260725458): Use std::countr_zero after C++20 becomes available
return __builtin_ctz(x);
}
+
// Signextend bits from size to the corresponding signed type of sizeof(Type) size.
// If the result of this function is assigned to a wider signed type it'll automatically
// sign-extend.
@@ -217,9 +218,47 @@
}
template <typename T>
-[[nodiscard]] constexpr T Popcount(T x) {
- // We couldn't use std::popcount yet ( http://b/318678905 ) for __uint128_t .
+[[nodiscard]] constexpr T CountRZero(T x) {
+ // We couldn't use C++20 std::countr_zero yet ( http://b/318678905 ) for __uint128_t .
// Switch to std::popcount when/if that bug would be fixed.
+ static_assert(!std::is_signed_v<T>);
+#if defined(__x86_64__)
+ if constexpr (sizeof(T) == sizeof(unsigned __int128)) {
+ if (static_cast<uint64_t>(x) == 0) {
+ return __builtin_ctzll(x >> 64) + 64;
+ }
+ return __builtin_ctzll(x);
+ } else
+#endif
+ if constexpr (sizeof(T) == sizeof(uint64_t)) {
+ return __builtin_ctzll(x);
+ } else if constexpr (sizeof(T) == sizeof(uint32_t)) {
+ return __builtin_ctz(x);
+ } else {
+ static_assert(kDependentTypeFalse<T>);
+ }
+}
+
+template <typename T>
+[[nodiscard]] constexpr Raw<T> CountRZero(Raw<T> x) {
+ return {CountRZero(x.value)};
+}
+
+template <typename T>
+[[nodiscard]] constexpr Saturating<T> CountRZero(Saturating<T> x) {
+ return {CountRZero(x.value)};
+}
+
+template <typename T>
+[[nodiscard]] constexpr Wrapping<T> CountRZero(Wrapping<T> x) {
+ return {CountRZero(x.value)};
+}
+
+template <typename T>
+[[nodiscard]] constexpr T Popcount(T x) {
+ // We couldn't use C++20 std::popcount yet ( http://b/318678905 ) for __uint128_t .
+ // Switch to std::popcount when/if that bug would be fixed.
+ static_assert(!std::is_signed_v<T>);
#if defined(__x86_64__)
if constexpr (sizeof(T) == sizeof(unsigned __int128)) {
return __builtin_popcountll(x) + __builtin_popcountll(x >> 64);
@@ -901,8 +940,8 @@
}
template <typename BaseType>
-[[nodiscard]] constexpr auto Widen(intrinsics::WrappedFloatType<BaseType> source)
- -> Wrapping<typename TypeTraits<intrinsics::WrappedFloatType<BaseType>>::Wide> {
+[[nodiscard]] constexpr auto Widen(intrinsics::WrappedFloatType<BaseType> source) ->
+ typename TypeTraits<intrinsics::WrappedFloatType<BaseType>>::Wide {
return {source.value};
}
@@ -930,8 +969,8 @@
}
template <typename BaseType>
-[[nodiscard]] constexpr auto Narrow(intrinsics::WrappedFloatType<BaseType> source)
- -> Wrapping<typename TypeTraits<intrinsics::WrappedFloatType<BaseType>>::Narrow> {
+[[nodiscard]] constexpr auto Narrow(intrinsics::WrappedFloatType<BaseType> source) ->
+ typename TypeTraits<intrinsics::WrappedFloatType<BaseType>>::Narrow {
return {source.value};
}
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index 0888944..a5a666d 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -455,17 +455,17 @@
template <enum PreferredIntrinsicsImplementation = kUseAssemblerImplementationIfPossible>
inline std::tuple<SIMD128Register> Vcpopm(SIMD128Register simd_src) {
- Int128 src = simd_src.Get<Int128>();
+ UInt128 src = simd_src.Get<UInt128>();
return Popcount(src);
}
template <enum PreferredIntrinsicsImplementation = kUseAssemblerImplementationIfPossible>
inline std::tuple<SIMD128Register> Vfirstm(SIMD128Register simd_src) {
- Int128 src = simd_src.Get<Int128>();
+ UInt128 src = simd_src.Get<UInt128>();
if (src == Int128{0}) {
- return Int128{-1};
+ return ~UInt128{0};
}
- return Popcount(src ^ (src - Int128{1})) - Int128{1};
+ return CountRZero(src);
}
#ifndef __x86_64__