Fix FP16 bitcasts for ClangCL on ARM/ARM64
When using ClangCL _MSC_VER is defined just like for MSVC but intrin.h
is missing. Adding not defined(__clang__) was required to distinct MSVC
and ClangCL.
diff --git a/include/fp16/bitcasts.h b/include/fp16/bitcasts.h
index 86a4e22..7dbb516 100644
--- a/include/fp16/bitcasts.h
+++ b/include/fp16/bitcasts.h
@@ -12,7 +12,7 @@
#include <immintrin.h>
#endif
-#if defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
#include <intrin.h>
#endif
@@ -24,7 +24,7 @@
return __uint_as_float((unsigned int) w);
#elif defined(__INTEL_COMPILER)
return _castu32_f32(w);
-#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
return _CopyFloatFromInt32((__int32) w);
#else
union {
@@ -42,7 +42,7 @@
return (uint32_t) __float_as_uint(f);
#elif defined(__INTEL_COMPILER)
return _castf32_u32(f);
-#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
return (uint32_t) _CopyInt32FromFloat(f);
#else
union {
@@ -60,7 +60,7 @@
return __longlong_as_double((long long) w);
#elif defined(__INTEL_COMPILER)
return _castu64_f64(w);
-#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
return _CopyDoubleFromInt64((__int64) w);
#else
union {
@@ -78,7 +78,7 @@
return (uint64_t) __double_as_longlong(f);
#elif defined(__INTEL_COMPILER)
return _castf64_u64(f);
-#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
return (uint64_t) _CopyInt64FromDouble(f);
#else
union {