Fix FP16 bitcasts for ClangCL on ARM/ARM64

When using ClangCL _MSC_VER is defined just like for MSVC but intrin.h
is missing. Adding not defined(__clang__) was required to distinct MSVC
and ClangCL.
diff --git a/include/fp16/bitcasts.h b/include/fp16/bitcasts.h
index 86a4e22..7dbb516 100644
--- a/include/fp16/bitcasts.h
+++ b/include/fp16/bitcasts.h
@@ -12,7 +12,7 @@
 	#include <immintrin.h>
 #endif
 
-#if defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
 	#include <intrin.h>
 #endif
 
@@ -24,7 +24,7 @@
 	return __uint_as_float((unsigned int) w);
 #elif defined(__INTEL_COMPILER)
 	return _castu32_f32(w);
-#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
 	return _CopyFloatFromInt32((__int32) w);
 #else
 	union {
@@ -42,7 +42,7 @@
 	return (uint32_t) __float_as_uint(f);
 #elif defined(__INTEL_COMPILER)
 	return _castf32_u32(f);
-#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
 	return (uint32_t) _CopyInt32FromFloat(f);
 #else
 	union {
@@ -60,7 +60,7 @@
 	return __longlong_as_double((long long) w);
 #elif defined(__INTEL_COMPILER)
 	return _castu64_f64(w);
-#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
 	return _CopyDoubleFromInt64((__int64) w);
 #else
 	union {
@@ -78,7 +78,7 @@
 	return (uint64_t) __double_as_longlong(f);
 #elif defined(__INTEL_COMPILER)
 	return _castf64_u64(f);
-#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
 	return (uint64_t) _CopyInt64FromDouble(f);
 #else
 	union {