Upgrade FXdiv to 561254d968e5679460e6a0a743206410284d9f46

Exempt-From-Owner-Approval: upgrade
Change-Id: I7a7b45fc6c860fb0ecdd58baf7a1e700ce16e229
diff --git a/.gitignore b/.gitignore
index 73b2998..c10cb60 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,10 +2,17 @@
 build.ninja
 
 # Build objects and artifacts
-deps/
-build/
+bazel-bin
+bazel-genfiles
+bazel-out
+bazel-testlogs
+bazel-FXdiv
 bin/
+build/
+build-*/
+deps/
 lib/
+libs/
 *.pyc
 *.pyo
 
diff --git a/BUILD.bazel b/BUILD.bazel
new file mode 100644
index 0000000..7b0ba72
--- /dev/null
+++ b/BUILD.bazel
@@ -0,0 +1,85 @@
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
+
+licenses(["notice"])
+
+################################# FXdiv library ################################
+
+cc_library(
+    name = "FXdiv",
+    hdrs = [
+        "include/fxdiv.h",
+    ],
+    includes = [
+        "include",
+    ],
+    strip_include_prefix = "include",
+    deps = [],
+    visibility = ["//visibility:public"],
+)
+
+################################## Unit tests ##################################
+
+cc_test(
+    name = "multiply_high_test",
+    srcs = ["test/multiply-high.cc"],
+    deps = [
+        ":FXdiv",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "quotient_test",
+    srcs = ["test/quotient.cc"],
+    deps = [
+        ":FXdiv",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+################################## Benchmarks ##################################
+
+cc_binary(
+    name = "init_bench",
+    srcs = ["bench/init.cc"],
+    deps = [
+        ":FXdiv",
+        "@com_google_benchmark//:benchmark",
+    ],
+)
+
+cc_binary(
+    name = "multiply_bench",
+    srcs = ["bench/multiply.cc"],
+    deps = [
+        ":FXdiv",
+        "@com_google_benchmark//:benchmark",
+    ],
+)
+
+cc_binary(
+    name = "divide_bench",
+    srcs = ["bench/divide.cc"],
+    deps = [
+        ":FXdiv",
+        "@com_google_benchmark//:benchmark",
+    ],
+)
+
+cc_binary(
+    name = "quotient_bench",
+    srcs = ["bench/quotient.cc"],
+    deps = [
+        ":FXdiv",
+        "@com_google_benchmark//:benchmark",
+    ],
+)
+
+cc_binary(
+    name = "round_down_bench",
+    srcs = ["bench/round-down.cc"],
+    deps = [
+        ":FXdiv",
+        "@com_google_benchmark//:benchmark",
+    ],
+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a74d59d..bcae6b5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,4 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
-
-INCLUDE(GNUInstallDirs)
+CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
 
 # ---[ Project
 PROJECT(FXdiv C CXX)
@@ -16,45 +14,39 @@
 ENDIF()
 
 # ---[ CMake options
+INCLUDE(GNUInstallDirs)
+
 IF(FXDIV_BUILD_TESTS)
   ENABLE_TESTING()
 ENDIF()
 
 # ---[ Download deps
-SET(CONFU_DEPENDENCIES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/deps
-  CACHE PATH "Confu-style dependencies source directory")
-SET(CONFU_DEPENDENCIES_BINARY_DIR ${CMAKE_BINARY_DIR}/deps
-  CACHE PATH "Confu-style dependencies binary directory")
-
 IF(FXDIV_BUILD_TESTS AND NOT DEFINED GOOGLETEST_SOURCE_DIR)
-  MESSAGE(STATUS "Downloading Google Test to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest (define GOOGLETEST_SOURCE_DIR to avoid it)")
-  CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download/CMakeLists.txt")
+  MESSAGE(STATUS "Downloading Google Test to ${CMAKE_BINARY_DIR}/googletest-source (define GOOGLETEST_SOURCE_DIR to avoid it)")
+  CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CMAKE_BINARY_DIR}/googletest-download/CMakeLists.txt")
   EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
-    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
+    WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googletest-download")
   EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
-    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
-  SET(GOOGLETEST_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" CACHE STRING "Google Test source directory")
+    WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googletest-download")
+  SET(GOOGLETEST_SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-source" CACHE STRING "Google Test source directory")
 ENDIF()
 
 IF(FXDIV_BUILD_BENCHMARKS AND NOT DEFINED GOOGLEBENCHMARK_SOURCE_DIR)
-  MESSAGE(STATUS "Downloading Google Benchmark to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)")
-  CONFIGURE_FILE(cmake/DownloadGoogleBenchmark.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt")
+  MESSAGE(STATUS "Downloading Google Benchmark to ${CMAKE_BINARY_DIR}/googlebenchmark-source (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)")
+  CONFIGURE_FILE(cmake/DownloadGoogleBenchmark.cmake "${CMAKE_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt")
   EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
-    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
+    WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googlebenchmark-download")
   EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
-    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
-  SET(GOOGLEBENCHMARK_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" CACHE STRING "Google Benchmark source directory")
+    WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googlebenchmark-download")
+  SET(GOOGLEBENCHMARK_SOURCE_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-source" CACHE STRING "Google Benchmark source directory")
 ENDIF()
 
 # ---[ FXdiv library
-IF(${CMAKE_VERSION} VERSION_LESS "3.0")
-  ADD_LIBRARY(fxdiv STATIC include/fxdiv.h)
-  SET_TARGET_PROPERTIES(fxdiv PROPERTIES LINKER_LANGUAGE C)
-ELSE()
-  ADD_LIBRARY(fxdiv INTERFACE)
-ENDIF()
+ADD_LIBRARY(fxdiv INTERFACE)
 TARGET_INCLUDE_DIRECTORIES(fxdiv INTERFACE include)
-IF(NOT FXDIV_USE_INLINE_ASSEMBLY)
+IF(FXDIV_USE_INLINE_ASSEMBLY)
+  TARGET_COMPILE_DEFINITIONS(fxdiv INTERFACE FXDIV_USE_INLINE_ASSEMBLY=1)
+ELSE()
   TARGET_COMPILE_DEFINITIONS(fxdiv INTERFACE FXDIV_USE_INLINE_ASSEMBLY=0)
 ENDIF()
 
@@ -66,14 +58,20 @@
     SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
     ADD_SUBDIRECTORY(
       "${GOOGLETEST_SOURCE_DIR}"
-      "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest")
+      "${CMAKE_BINARY_DIR}/googletest")
   ENDIF()
 
   ADD_EXECUTABLE(multiply-high-test test/multiply-high.cc)
+  SET_TARGET_PROPERTIES(multiply-high-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_EXTENSIONS YES)
   TARGET_LINK_LIBRARIES(multiply-high-test fxdiv gtest gtest_main)
   ADD_TEST(multiply-high multiply-high-test)
 
   ADD_EXECUTABLE(quotient-test test/quotient.cc)
+  SET_TARGET_PROPERTIES(quotient-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_EXTENSIONS YES)
   TARGET_LINK_LIBRARIES(quotient-test fxdiv gtest gtest_main)
   ADD_TEST(quotient quotient-test)
 ENDIF()
@@ -84,21 +82,36 @@
     SET(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
     ADD_SUBDIRECTORY(
       "${GOOGLEBENCHMARK_SOURCE_DIR}"
-      "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark")
+      "${CMAKE_BINARY_DIR}/googlebenchmark")
   ENDIF()
 
   ADD_EXECUTABLE(init-bench bench/init.cc)
+  SET_TARGET_PROPERTIES(init-bench PROPERTIES
+    CXX_STANDARD 11
+    CXX_EXTENSIONS YES)
   TARGET_LINK_LIBRARIES(init-bench fxdiv benchmark)
 
   ADD_EXECUTABLE(multiply-bench bench/multiply.cc)
+  SET_TARGET_PROPERTIES(multiply-bench PROPERTIES
+    CXX_STANDARD 11
+    CXX_EXTENSIONS YES)
   TARGET_LINK_LIBRARIES(multiply-bench fxdiv benchmark)
 
   ADD_EXECUTABLE(divide-bench bench/divide.cc)
+  SET_TARGET_PROPERTIES(divide-bench PROPERTIES
+    CXX_STANDARD 11
+    CXX_EXTENSIONS YES)
   TARGET_LINK_LIBRARIES(divide-bench fxdiv benchmark)
 
   ADD_EXECUTABLE(quotient-bench bench/quotient.cc)
+  SET_TARGET_PROPERTIES(quotient-bench PROPERTIES
+    CXX_STANDARD 11
+    CXX_EXTENSIONS YES)
   TARGET_LINK_LIBRARIES(quotient-bench fxdiv benchmark)
 
   ADD_EXECUTABLE(round-down-bench bench/round-down.cc)
+  SET_TARGET_PROPERTIES(round-down-bench PROPERTIES
+    CXX_STANDARD 11
+    CXX_EXTENSIONS YES)
   TARGET_LINK_LIBRARIES(round-down-bench fxdiv benchmark)
 ENDIF()
diff --git a/METADATA b/METADATA
index 8360525..f0b139b 100644
--- a/METADATA
+++ b/METADATA
@@ -1,13 +1,5 @@
 name: "FXdiv"
-description:
-    "Header-only library for division via fixed-point multiplication by inverse "
-    " "
-    "On modern CPUs and GPUs integer division is several times slower than "
-    "multiplication. FXdiv implements an algorithm to replace an integer "
-    "division with a multiplication and two shifts. This algorithm improves "
-    "performance when an application performs repeated divisions by the same "
-    "divisor."
-
+description: "Header-only library for division via fixed-point multiplication by inverse  On modern CPUs and GPUs integer division is several times slower than multiplication. FXdiv implements an algorithm to replace an integer division with a multiplication and two shifts. This algorithm improves performance when an application performs repeated divisions by the same divisor."
 third_party {
   url {
     type: HOMEPAGE
@@ -17,7 +9,11 @@
     type: GIT
     value: "https://github.com/Maratyszcza/FXdiv"
   }
-  version: "fd804a929fc64be9e40ee58bb51ed9f9cac98244"
-  last_upgrade_date { year: 2020 month: 2 day: 3 }
+  version: "561254d968e5679460e6a0a743206410284d9f46"
   license_type: NOTICE
+  last_upgrade_date {
+    year: 2020
+    month: 4
+    day: 13
+  }
 }
diff --git a/README.md b/README.md
index 2e9e231..b8ef0d3 100644
--- a/README.md
+++ b/README.md
@@ -40,21 +40,31 @@
 
 ## Status
 
-Project is in alpha stage. API is unstable. Currently working features:
+Currently working features:
 
 | Platform        | uint32_t | uint64_t | size_t   |
 | --------------- |:--------:|:--------:|:--------:|
 | x86-64 gcc      | Works    | Works    | Works    |
+| x86-64 clang    | Works    | Works    | Works    |
 | x86-64 MSVC     | Works    | Works    | Works    |
 | x86 gcc         | Works    | Works    | Works    |
+| x86 clang       | Works    | Works    | Works    |
 | x86 MSVC        | Works    | Works    | Works    |
 | ARMv7 gcc       | Works    | Works    | Works    |
+| ARMv7 clang     | Works    | Works    | Works    |
+| ARMv7 MSVC*     | Compiles | Compiles | Compiles |
+| ARM64 gcc       | Works    | Works    | Works    |
+| ARM64 clang     | Works    | Works    | Works    |
+| ARM64 MSVC*     | Compiles | Compiles | Compiles |
 | PPC64 gcc       | Works    | Works    | Works    |
-| PNaCl clang     | Works    | Works    | Works    |
+| WAsm clang      | Works    | Works    | Works    |
 | Asm.js clang    | Works    | Works    | Works    |
+| PNaCl clang     | Works    | Works    | Works    |
 | CUDA            | Untested | Untested | Untested |
 | OpenCL          | Untested | Untested | Untested |
 
+*ARMv7 and ARM64 builds with MSVC are presumed to work, but were only verified to compile successfully
+
 ## References
 
 - Granlund, Torbjörn, and Peter L. Montgomery. "Division by invariant integers using multiplication." In ACM SIGPLAN Notices, vol. 29, no. 6, pp. 61-72. ACM, 1994. Available: [gmplib.org/~tege/divcnst-pldi94.pdf](https://gmplib.org/~tege/divcnst-pldi94.pdf)
diff --git a/WORKSPACE b/WORKSPACE
new file mode 100644
index 0000000..4fbe23d
--- /dev/null
+++ b/WORKSPACE
@@ -0,0 +1,30 @@
+workspace(name = "FXdiv")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+# Bazel rule definitions
+http_archive(
+    name = "rules_cc",
+    strip_prefix = "rules_cc-master",
+    urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
+)
+
+# Google Test framework, used by most unit-tests.
+http_archive(
+    name = "com_google_googletest",
+    strip_prefix = "googletest-master",
+    urls = ["https://github.com/google/googletest/archive/master.zip"],
+)
+
+# Google Benchmark library, used in micro-benchmarks.
+http_archive(
+    name = "com_google_benchmark",
+    strip_prefix = "benchmark-master",
+    urls = ["https://github.com/google/benchmark/archive/master.zip"],
+)
+
+# Android NDK location and version is auto-detected from $ANDROID_NDK_HOME environment variable
+android_ndk_repository(name = "androidndk")
+
+# Android SDK location and API is auto-detected from $ANDROID_HOME environment variable
+android_sdk_repository(name = "androidsdk")
diff --git a/cmake/DownloadGoogleBenchmark.cmake b/cmake/DownloadGoogleBenchmark.cmake
index 349e7cb..d042e07 100644
--- a/cmake/DownloadGoogleBenchmark.cmake
+++ b/cmake/DownloadGoogleBenchmark.cmake
@@ -1,13 +1,13 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.2)
+CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
 
 PROJECT(googlebenchmark-download NONE)
 
 INCLUDE(ExternalProject)
 ExternalProject_Add(googlebenchmark
-	URL https://github.com/google/benchmark/archive/v1.2.0.zip
-	URL_HASH SHA256=cc463b28cb3701a35c0855fbcefb75b29068443f1952b64dd5f4f669272e95ea
-	SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark"
-	BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark"
+	URL https://github.com/google/benchmark/archive/v1.5.0.zip
+	URL_HASH SHA256=2d22dd3758afee43842bb504af1a8385cccb3ee1f164824e4837c1c1b04d92a0
+	SOURCE_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-source"
+	BINARY_DIR "${CMAKE_BINARY_DIR}/googlebenchmark"
 	CONFIGURE_COMMAND ""
 	BUILD_COMMAND ""
 	INSTALL_COMMAND ""
diff --git a/cmake/DownloadGoogleTest.cmake b/cmake/DownloadGoogleTest.cmake
index 19f5eb1..1a0c152 100644
--- a/cmake/DownloadGoogleTest.cmake
+++ b/cmake/DownloadGoogleTest.cmake
@@ -1,13 +1,13 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.2)
+CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
 
 PROJECT(googletest-download NONE)
 
 INCLUDE(ExternalProject)
 ExternalProject_Add(googletest
-	URL https://github.com/google/googletest/archive/release-1.8.0.zip
-	URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf
-	SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest"
-	BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest"
+	URL https://github.com/google/googletest/archive/release-1.10.0.zip
+	URL_HASH SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91
+	SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-source"
+	BINARY_DIR "${CMAKE_BINARY_DIR}/googletest"
 	CONFIGURE_COMMAND ""
 	BUILD_COMMAND ""
 	INSTALL_COMMAND ""
diff --git a/include/fxdiv.h b/include/fxdiv.h
index 21a3dc1..f5a09d0 100644
--- a/include/fxdiv.h
+++ b/include/fxdiv.h
@@ -14,10 +14,13 @@
 
 #if defined(_MSC_VER)
 	#include <intrin.h>
+	#if defined(_M_IX86) || defined(_M_X64)
+		#include <immintrin.h>
+	#endif
 #endif
 
 #ifndef FXDIV_USE_INLINE_ASSEMBLY
-	#define FXDIV_USE_INLINE_ASSEMBLY 1
+	#define FXDIV_USE_INLINE_ASSEMBLY 0
 #endif
 
 static inline uint64_t fxdiv_mulext_uint32_t(uint32_t a, uint32_t b) {
@@ -121,14 +124,15 @@
 			const uint32_t l_minus_1 = 31 - clz(d - 1);
 		#elif defined(__CUDA_ARCH__)
 			const uint32_t l_minus_1 = 31 - __clz((int) (d - 1));
-		#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+		#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64))
 			unsigned long l_minus_1;
 			_BitScanReverse(&l_minus_1, (unsigned long) (d - 1));
 		#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && FXDIV_USE_INLINE_ASSEMBLY
 			uint32_t l_minus_1;
 			__asm__("BSRL %[d_minus_1], %[l_minus_1]"
 				: [l_minus_1] "=r" (l_minus_1)
-				: [d_minus_1] "r" (d - 1));
+				: [d_minus_1] "r" (d - 1)
+				: "cc");
 		#elif defined(__GNUC__)
 			const uint32_t l_minus_1 = 31 - __builtin_clz(d - 1);
 		#else
@@ -167,7 +171,11 @@
 			uint32_t q;
 			__asm__("DIVL %[d]"
 				: "=a" (q), "+d" (u_hi)
-				: [d] "r" (d), "a" (0));
+				: [d] "r" (d), "a" (0)
+				: "cc");
+		#elif (defined(_MSC_VER) && _MSC_VER >= 1920) && (defined(_M_IX86) || defined(_M_X64))
+			unsigned int remainder;
+			const uint32_t q = (uint32_t) _udiv64((unsigned __int64) ((uint64_t) u_hi << 32), (unsigned int) d, &remainder);
 		#else
 			const uint32_t q = ((uint64_t) u_hi << 32) / d;
 		#endif
@@ -192,13 +200,13 @@
 		#elif defined(__CUDA_ARCH__)
 			const uint32_t nlz_d = __clzll((long long) d);
 			const uint32_t l_minus_1 = 63 - __clzll((long long) (d - 1));
-		#elif defined(_MSC_VER) && defined(_M_X64)
+		#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
 			unsigned long l_minus_1;
 			_BitScanReverse64(&l_minus_1, (unsigned __int64) (d - 1));
 			unsigned long bsr_d;
 			_BitScanReverse64(&bsr_d, (unsigned __int64) d);
 			const uint32_t nlz_d = bsr_d ^ 0x3F;
-		#elif defined(_MSC_VER) && defined(_M_IX86)
+		#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_ARM))
 			const uint64_t d_minus_1 = d - 1;
 			const uint8_t d_is_power_of_2 = (d & d_minus_1) == 0;
 			unsigned long l_minus_1;
@@ -213,7 +221,8 @@
 			uint64_t l_minus_1;
 			__asm__("BSRQ %[d_minus_1], %[l_minus_1]"
 				: [l_minus_1] "=r" (l_minus_1)
-				: [d_minus_1] "r" (d - 1));
+				: [d_minus_1] "r" (d - 1)
+				: "cc");
 		#elif defined(__GNUC__)
 			const uint32_t l_minus_1 = 63 - __builtin_clzll(d - 1);
 			const uint32_t nlz_d = __builtin_clzll(d);
@@ -221,8 +230,8 @@
 			/* Based on Algorithm 2 from Hacker's delight */
 			const uint64_t d_minus_1 = d - 1;
 			const uint32_t d_is_power_of_2 = (d & d_minus_1) == 0;
-			uint64_t l_minus_1 = 0;
-			uint32_t x = d_minus_1;
+			uint32_t l_minus_1 = 0;
+			uint32_t x = (uint32_t) d_minus_1;
 			uint32_t y = d_minus_1 >> 32;
 			if (y != 0) {
 				l_minus_1 += 32;
@@ -260,7 +269,14 @@
 			uint64_t q;
 			__asm__("DIVQ %[d]"
 				: "=a" (q), "+d" (u_hi)
-				: [d] "r" (d), "a" (UINT64_C(0)));
+				: [d] "r" (d), "a" (UINT64_C(0))
+				: "cc");
+		#elif 0 && defined(__GNUC__) && defined(__SIZEOF_INT128__)
+			/* GCC, Clang, and Intel Compiler fail to inline optimized implementation and call into support library for 128-bit division */
+			const uint64_t q = (uint64_t) (((unsigned __int128) u_hi << 64) / ((unsigned __int128) d));
+		#elif (defined(_MSC_VER) && _MSC_VER >= 1920) && defined(_M_X64)
+			unsigned __int64 remainder;
+			const uint64_t q = (uint64_t) _udiv128((unsigned __int64) u_hi, 0, (unsigned __int64) d, &remainder);
 		#else
 			/* Implementation based on code from Hacker's delight */