swr: missing _BitScanForward64 on 32 bits win

the code does not compile on 32 bits systems
for mingw we can use gcc intrinsics like the unix side
for msvc a generic implementation is provided

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Krzysztof Raszkowski <krzysztof.raszkowski@intel.com>
Reviewed-by: Jan Zielinski <jan.zielinski@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6705>
diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h b/src/gallium/drivers/swr/rasterizer/common/os.h
index f48ed64..14a613e 100644
--- a/src/gallium/drivers/swr/rasterizer/common/os.h
+++ b/src/gallium/drivers/swr/rasterizer/common/os.h
@@ -103,6 +103,34 @@
 #define _mm_popcount_sizeT _mm_popcnt_u32
 #endif
 
+#if !defined(_WIN64)
+inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)
+{
+#ifdef __GNUC__
+    *Index = __builtin_ctzll(Mask);
+#else
+    *Index = 0;
+    for (int i = 0; i < 64; ++ i)
+      if ((1ULL << i) & Mask)
+        *Index = i;
+#endif
+    return (Mask != 0);
+}
+
+inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)
+{
+#ifdef __GNUC__
+    *Index = 63 - __builtin_clzll(Mask);
+#else
+    *Index = 0;
+    for (int i = 63; i >= 0; -- i)
+      if ((1ULL << i) & Mask)
+        *Index = i;
+#endif
+    return (Mask != 0);
+}
+#endif
+
 #elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
 
 #define SWR_API