Project import generated by Copybara.

GitOrigin-RevId: 71dd4e17dd6493986e4b3647288d8e4c22a86c44
Change-Id: I30e7247ab9113149501f3962be08fde38dc961b0
diff --git a/src/__support/math_extras.h b/src/__support/math_extras.h
index 47df2a4..954bcb1 100644
--- a/src/__support/math_extras.h
+++ b/src/__support/math_extras.h
@@ -66,7 +66,7 @@
 
 #define RETURN_IF(TYPE, BUILTIN)                                               \
   if constexpr (cpp::is_same_v<T, TYPE>)                                       \
-    return BUILTIN(a, b, carry_in, carry_out);
+    return BUILTIN(a, b, carry_in, &carry_out);
 
 // Returns the result of 'a + b' taking into account 'carry_in'.
 // The carry out is stored in 'carry_out' it not 'nullptr', dropped otherwise.
@@ -74,7 +74,7 @@
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, T>
 add_with_carry(T a, T b, T carry_in, T &carry_out) {
-  if constexpr (!cpp::is_constant_evaluated()) {
+  if (!cpp::is_constant_evaluated()) {
 #if __has_builtin(__builtin_addcb)
     RETURN_IF(unsigned char, __builtin_addcb)
 #elif __has_builtin(__builtin_addcs)
@@ -100,7 +100,7 @@
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, T>
 sub_with_borrow(T a, T b, T carry_in, T &carry_out) {
-  if constexpr (!cpp::is_constant_evaluated()) {
+  if (!cpp::is_constant_evaluated()) {
 #if __has_builtin(__builtin_subcb)
     RETURN_IF(unsigned char, __builtin_subcb)
 #elif __has_builtin(__builtin_subcs)
diff --git a/src/__support/threads/linux/barrier.h b/src/__support/threads/linux/barrier.h
index f0655bf..a632aa4 100644
--- a/src/__support/threads/linux/barrier.h
+++ b/src/__support/threads/linux/barrier.h
@@ -36,14 +36,19 @@
   int wait();
 };
 
-static_assert(
-    sizeof(Barrier) == sizeof(pthread_barrier_t),
-    "The public pthread_barrier_t type cannot accommodate the internal "
-    "barrier type.");
+static_assert(sizeof(Barrier) <= sizeof(pthread_barrier_t),
+              "The public pthread_barrier_t type cannot accommodate the "
+              "internal barrier type.");
 
-static_assert(alignof(Barrier) == alignof(pthread_barrier_t),
-              "The public pthread_barrier_t type has a different alignment "
-              "than the internal barrier type.");
+static_assert(alignof(Barrier) <= alignof(pthread_barrier_t),
+              "The public pthread_barrier_t type has insufficient alignment "
+              "for the internal barrier type.");
+
+static_assert(sizeof(CndVar) <= 24,
+              "CndVar size exceeds the size in __barrier_type.h");
+
+static_assert(sizeof(Mutex) <= 24,
+              "Mutex size exceeds the size in __barrier_type.h");
 
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/src/string/memory_utils/aarch64/inline_strlen.h b/src/string/memory_utils/aarch64/inline_strlen.h
new file mode 100644
index 0000000..36fd1aa
--- /dev/null
+++ b/src/string/memory_utils/aarch64/inline_strlen.h
@@ -0,0 +1,53 @@
+//===-- Strlen implementation for aarch64 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
+
+#if defined(__ARM_NEON)
+#include "src/__support/CPP/bit.h" // countr_zero
+
+#include <arm_neon.h>
+#include <stddef.h> // size_t
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace neon {
+[[gnu::no_sanitize_address]] [[maybe_unused]] LIBC_INLINE static size_t
+string_length(const char *src) {
+  using Vector __attribute__((may_alias)) = uint8x8_t;
+
+  uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
+  const Vector *block_ptr =
+      reinterpret_cast<const Vector *>(src - misalign_bytes);
+  Vector v = *block_ptr;
+  Vector vcmp = vceqz_u8(v);
+  uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp);
+  uint64_t cmp = vget_lane_u64(cmp_mask, 0);
+  cmp = cmp >> (misalign_bytes << 3);
+  if (cmp)
+    return cpp::countr_zero(cmp) >> 3;
+
+  while (true) {
+    ++block_ptr;
+    v = *block_ptr;
+    vcmp = vceqz_u8(v);
+    cmp_mask = vreinterpret_u64_u8(vcmp);
+    cmp = vget_lane_u64(cmp_mask, 0);
+    if (cmp)
+      return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
+                                 reinterpret_cast<uintptr_t>(src) +
+                                 (cpp::countr_zero(cmp) >> 3));
+  }
+}
+} // namespace neon
+
+namespace string_length_impl = neon;
+
+} // namespace LIBC_NAMESPACE_DECL
+#endif // __ARM_NEON
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
diff --git a/src/string/memory_utils/x86_64/inline_strlen.h b/src/string/memory_utils/x86_64/inline_strlen.h
new file mode 100644
index 0000000..739f8c1
--- /dev/null
+++ b/src/string/memory_utils/x86_64/inline_strlen.h
@@ -0,0 +1,107 @@
+//===-- Strlen implementation for x86_64 ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
+
+#include "src/__support/CPP/bit.h" // countr_zero
+
+#include <immintrin.h>
+#include <stddef.h> // size_t
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace string_length_internal {
+// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
+template <typename Vector, typename Mask>
+[[gnu::no_sanitize_address]] LIBC_INLINE static Mask
+compare_and_mask(const Vector *block_ptr);
+
+template <typename Vector, typename Mask,
+          decltype(compare_and_mask<Vector, Mask>)>
+[[gnu::no_sanitize_address]] LIBC_INLINE static size_t
+string_length_vector(const char *src) {
+  uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
+
+  const Vector *block_ptr =
+      reinterpret_cast<const Vector *>(src - misalign_bytes);
+  auto cmp = compare_and_mask<Vector, Mask>(block_ptr) >> misalign_bytes;
+  if (cmp)
+    return cpp::countr_zero(cmp);
+
+  while (true) {
+    block_ptr++;
+    cmp = compare_and_mask<Vector, Mask>(block_ptr);
+    if (cmp)
+      return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
+                                 reinterpret_cast<uintptr_t>(src) +
+                                 cpp::countr_zero(cmp));
+  }
+}
+
+template <>
+LIBC_INLINE uint32_t
+compare_and_mask<__m128i, uint32_t>(const __m128i *block_ptr) {
+  __m128i v = _mm_load_si128(block_ptr);
+  __m128i z = _mm_setzero_si128();
+  __m128i c = _mm_cmpeq_epi8(z, v);
+  return _mm_movemask_epi8(c);
+}
+
+namespace sse2 {
+[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
+  return string_length_vector<__m128i, uint32_t,
+                              compare_and_mask<__m128i, uint32_t>>(src);
+}
+} // namespace sse2
+
+#if defined(__AVX2__)
+template <>
+LIBC_INLINE uint32_t
+compare_and_mask<__m256i, uint32_t>(const __m256i *block_ptr) {
+  __m256i v = _mm256_load_si256(block_ptr);
+  __m256i z = _mm256_setzero_si256();
+  __m256i c = _mm256_cmpeq_epi8(z, v);
+  return _mm256_movemask_epi8(c);
+}
+
+namespace avx2 {
+[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
+  return string_length_vector<__m256i, uint32_t,
+                              compare_and_mask<__m256i, uint32_t>>(src);
+}
+} // namespace avx2
+#endif
+
+#if defined(__AVX512F__)
+template <>
+LIBC_INLINE __mmask64
+compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) {
+  __m512i v = _mm512_load_si512(block_ptr);
+  __m512i z = _mm512_setzero_si512();
+  return _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ);
+}
+namespace avx512 {
+[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
+  return string_length_vector<__m512i, __mmask64,
+                              compare_and_mask<__m512i, __mmask64>>(src);
+}
+} // namespace avx512
+#endif
+} // namespace string_length_internal
+
+#if defined(__AVX512F__)
+namespace string_length_impl = string_length_internal::avx512;
+#elif defined(__AVX2__)
+namespace string_length_impl = string_length_internal::avx2;
+#else
+namespace string_length_impl = string_length_internal::sse2;
+#endif
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
diff --git a/src/string/string_utils.h b/src/string/string_utils.h
index 80e5783..ce46158 100644
--- a/src/string/string_utils.h
+++ b/src/string/string_utils.h
@@ -22,6 +22,16 @@
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
 
+#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
+#if defined(LIBC_TARGET_ARCH_IS_X86)
+#include "src/string/memory_utils/x86_64/inline_strlen.h"
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
+#include "src/string/memory_utils/aarch64/inline_strlen.h"
+#else
+namespace string_length_impl = LIBC_NAMESPACE::wide_read;
+#endif
+#endif
+
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
@@ -53,7 +63,7 @@
 // high bit set will no longer have it set, narrowing the list of bytes which
 // result in non-zero values to just the zero byte.
 template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
-  constexpr Word LOW_BITS = repeat_byte<Word>(0x01);
+  constexpr unsigned int LOW_BITS = repeat_byte<Word>(0x01);
   constexpr Word HIGH_BITS = repeat_byte<Word>(0x80);
   Word subtracted = block - LOW_BITS;
   Word inverted = ~block;
@@ -81,16 +91,23 @@
   return static_cast<size_t>(char_ptr - src);
 }
 
-// Returns the length of a string, denoted by the first occurrence
-// of a null terminator.
-template <typename T> LIBC_INLINE size_t string_length(const T *src) {
-#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
+namespace wide_read {
+LIBC_INLINE size_t string_length(const char *src) {
   // Unsigned int is the default size for most processors, and on x86-64 it
   // performs better than larger sizes when the src pointer can't be assumed to
   // be aligned to a word boundary, so it's the size we use for reading the
   // string a block at a time.
+  return string_length_wide_read<unsigned int>(src);
+}
+
+} // namespace wide_read
+
+// Returns the length of a string, denoted by the first occurrence
+// of a null terminator.
+template <typename T> LIBC_INLINE size_t string_length(const T *src) {
+#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
   if constexpr (cpp::is_same_v<T, char>)
-    return string_length_wide_read<unsigned int>(src);
+    return string_length_impl::string_length(src);
 #endif
   size_t length;
   for (length = 0; *src; ++src, ++length)
@@ -99,8 +116,9 @@
 }
 
 template <typename Word>
-LIBC_INLINE void *find_first_character_wide_read(const unsigned char *src,
-                                                 unsigned char ch, size_t n) {
+[[gnu::no_sanitize_address]] LIBC_INLINE void *
+find_first_character_wide_read(const unsigned char *src, unsigned char ch,
+                               size_t n) {
   const unsigned char *char_ptr = src;
   size_t cur = 0;
 
@@ -184,33 +202,36 @@
 template <bool SkipDelim = true>
 LIBC_INLINE char *string_token(char *__restrict src,
                                const char *__restrict delimiter_string,
-                               char **__restrict saveptr) {
-  // Return nullptr immediately if both src AND saveptr are nullptr
-  if (LIBC_UNLIKELY(src == nullptr && ((src = *saveptr) == nullptr)))
+                               char **__restrict context) {
+  // Return nullptr immediately if both src AND context are nullptr
+  if (LIBC_UNLIKELY(src == nullptr && ((src = *context) == nullptr)))
     return nullptr;
 
   static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
-  cpp::bitset<256> delimiter_set;
+  cpp::bitset<256> delims;
   for (; *delimiter_string != '\0'; ++delimiter_string)
-    delimiter_set.set(static_cast<size_t>(*delimiter_string));
+    delims.set(static_cast<size_t>(*delimiter_string));
 
+  char *tok_start = src;
   if constexpr (SkipDelim)
-    for (; *src != '\0' && delimiter_set.test(static_cast<size_t>(*src)); ++src)
-      ;
-  if (*src == '\0') {
-    *saveptr = src;
+    while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
+      ++tok_start;
+  if (*tok_start == '\0' && SkipDelim) {
+    *context = nullptr;
     return nullptr;
   }
-  char *token = src;
-  for (; *src != '\0'; ++src) {
-    if (delimiter_set.test(static_cast<size_t>(*src))) {
-      *src = '\0';
-      ++src;
-      break;
-    }
+
+  char *tok_end = tok_start;
+  while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
+    ++tok_end;
+
+  if (*tok_end == '\0') {
+    *context = nullptr;
+  } else {
+    *tok_end = '\0';
+    *context = tok_end + 1;
   }
-  *saveptr = src;
-  return token;
+  return tok_start;
 }
 
 LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
diff --git a/test/src/string/strsep_test.cpp b/test/src/string/strsep_test.cpp
index 06318de..e2a5d52 100644
--- a/test/src/string/strsep_test.cpp
+++ b/test/src/string/strsep_test.cpp
@@ -18,21 +18,21 @@
 TEST(LlvmLibcStrsepTest, NoTokenFound) {
   {
     char s[] = "";
-    char *string = s, *orig = s;
+    char *string = s;
     EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), nullptr);
-    EXPECT_EQ(orig, string);
+    EXPECT_TRUE(string == nullptr);
   }
   {
     char s[] = "abcde";
     char *string = s, *orig = s;
     EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), orig);
-    EXPECT_EQ(string, orig + 5);
+    EXPECT_TRUE(string == nullptr);
   }
   {
     char s[] = "abcde";
     char *string = s, *orig = s;
     EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, "fghijk"), orig);
-    EXPECT_EQ(string, orig + 5);
+    EXPECT_TRUE(string == nullptr);
   }
 }
 
@@ -53,6 +53,14 @@
   }
 }
 
+TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
+  char s[] = "a";
+  char *string = s;
+  ASSERT_STREQ(LIBC_NAMESPACE::strsep(&string, ":"), "a");
+  ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+}
+
 #if defined(LIBC_ADD_NULL_CHECKS)
 
 TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {
diff --git a/test/src/string/strtok_r_test.cpp b/test/src/string/strtok_r_test.cpp
index fdc27ba..a19390d 100644
--- a/test/src/string/strtok_r_test.cpp
+++ b/test/src/string/strtok_r_test.cpp
@@ -122,3 +122,12 @@
   token = LIBC_NAMESPACE::strtok_r(nullptr, "_:,_", &reserve);
   ASSERT_STREQ(token, nullptr);
 }
+
+TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
+  char src[] = "a";
+  char *reserve = nullptr;
+  char *token = LIBC_NAMESPACE::strtok_r(src, ":", &reserve);
+  ASSERT_STREQ(token, "a");
+  ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+}
diff --git a/test/src/string/strtok_test.cpp b/test/src/string/strtok_test.cpp
index b820653..76efedd 100644
--- a/test/src/string/strtok_test.cpp
+++ b/test/src/string/strtok_test.cpp
@@ -76,3 +76,10 @@
   token = LIBC_NAMESPACE::strtok(nullptr, "_:,_");
   ASSERT_STREQ(token, nullptr);
 }
+
+TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
+  char src[] = "a";
+  ASSERT_STREQ("a", LIBC_NAMESPACE::strtok(src, ":"));
+  ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+}