Make find_first_set works on x86 MSVC (#38637)

Summary:
Fixes https://github.com/pytorch/pytorch/issues/38322#issuecomment-630031072.
Tested locally.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/38637

Differential Revision: D21620059

Pulled By: ezyang

fbshipit-source-id: 50af50ce29e46759f11a196fa0fedca2740214bb
diff --git a/c10/util/Bitset.h b/c10/util/Bitset.h
index 5510cfc..835fe6d 100644
--- a/c10/util/Bitset.h
+++ b/c10/util/Bitset.h
@@ -77,13 +77,29 @@
   // (i.e. if the very first bit is set, this function returns '1'), and a return
   // of '0' means that there was no bit set.
   size_t find_first_set() const {
-    #if defined(_MSC_VER)
+    #if defined(_MSC_VER) && defined(_M_X64)
       unsigned long result;
       bool has_bits_set = (0 != _BitScanForward64(&result, bitset_));
       if (!has_bits_set) {
         return 0;
       }
       return result + 1;
+    #elif defined(_MSC_VER) && defined(_M_IX86)
+      unsigned long result;
+      if (static_cast<uint32_t>(bitset_) != 0) {
+        bool has_bits_set = (0 != _BitScanForward(&result, static_cast<uint32_t>(bitset_)));
+        if (!has_bits_set) {
+          return 0;
+        }
+        return result + 1;
+      }
+      else {
+        bool has_bits_set = (0 != _BitScanForward(&result, static_cast<uint32_t>(bitset_ >> 32)));
+        if (!has_bits_set) {
+          return 0;
+        }
+        return result + 33;
+      }
     #else
       return __builtin_ffsll(bitset_);
     #endif