Clean up non-C++14 code (#28443)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/28443

We're now on C++14, so we don't need the else branch of these ifdef's anymore
ghstack-source-id: 94904074

Test Plan: waitforsandcastle

Differential Revision: D18069136

fbshipit-source-id: f1613cab9a99ee30f99775e4a60a1b06fd0a03ff
diff --git a/aten/src/ATen/cpu/vec256/vec256.h b/aten/src/ATen/cpu/vec256/vec256.h
index ef4452d..cdec71b 100644
--- a/aten/src/ATen/cpu/vec256/vec256.h
+++ b/aten/src/ATen/cpu/vec256/vec256.h
@@ -145,27 +145,14 @@
   // swap lanes:
   //   a_swapped = {a0, a1, b0, b1}
   //   b_swapped = {a2, a3, b2, b3}
-#if __cpp_binary_literals >= 201304L
-  auto a_swapped = _mm256_permute2f128_pd(a, b, 0b0100000);
-  auto b_swapped = _mm256_permute2f128_pd(a, b, 0b0110001);
-#else  // TODO Remove else case once switch to C++14 is finished
-  static constexpr int swap_ctrl_a = 0 | (2 << 4);  // 0, 2.   4 bits apart
-  static constexpr int swap_ctrl_b = 1 | (3 << 4);  // 1, 3.   4 bits apart
-  auto a_swapped = _mm256_permute2f128_pd(a, b, swap_ctrl_a);
-  auto b_swapped = _mm256_permute2f128_pd(a, b, swap_ctrl_b);
-#endif
+  auto a_swapped = _mm256_permute2f128_pd(a, b, 0b0100000);  // 0, 2.   4 bits apart
+  auto b_swapped = _mm256_permute2f128_pd(a, b, 0b0110001);  // 1, 3.   4 bits apart
 
   // group cols crossing lanes:
   //   return {a0, b0, a1, b1}
   //          {a2, b2, a3, b3}
-#if __cpp_binary_literals >= 201304L
-  return std::make_pair(_mm256_permute4x64_pd(a_swapped, 0b11011000),
-                        _mm256_permute4x64_pd(b_swapped, 0b11011000));
-#else  // TODO Remove else case once switch to C++14 is finished
-  static constexpr int group_ctrl = 0 | (2 << 2) | (1 << 4) | (3 << 6);  // 0, 2, 1, 3
-  return std::make_pair(_mm256_permute4x64_pd(a_swapped, group_ctrl),
-                        _mm256_permute4x64_pd(b_swapped, group_ctrl));
-#endif
+  return std::make_pair(_mm256_permute4x64_pd(a_swapped, 0b11011000),  // 0, 2, 1, 3
+                        _mm256_permute4x64_pd(b_swapped, 0b11011000)); // 0, 2, 1, 3
 }
 
 template <>
@@ -179,15 +166,8 @@
   //   a_swapped = {a0, a1, a2, a3, b0, b1, b2, b3}
   //   b_swapped = {a4, a5, a6, a7, b4, b5, b6, b7}
   // TODO: can we support caching this?
-#if __cpp_binary_literals >= 201304L
-  auto a_swapped = _mm256_permute2f128_ps(a, b, 0b0100000);
-  auto b_swapped = _mm256_permute2f128_ps(a, b, 0b0110001);
-#else  // TODO Remove else case once switch to C++14 is finished
-  static constexpr int swap_ctrl_a = 0 | (2 << 4);  // 0, 2.   4 bits apart
-  static constexpr int swap_ctrl_b = 1 | (3 << 4);  // 1, 3.   4 bits apart
-  auto a_swapped = _mm256_permute2f128_ps(a, b, swap_ctrl_a);
-  auto b_swapped = _mm256_permute2f128_ps(a, b, swap_ctrl_b);
-#endif
+  auto a_swapped = _mm256_permute2f128_ps(a, b, 0b0100000);  // 0, 2.   4 bits apart
+  auto b_swapped = _mm256_permute2f128_ps(a, b, 0b0110001);  // 1, 3.   4 bits apart
 
   // group cols crossing lanes:
   //   return {a0, b0, a1, b1, a2, b2, a3, b3}
@@ -209,27 +189,14 @@
   // group cols crossing lanes:
   //   a_grouped = {a0, a1, b0, b1}
   //   b_grouped = {a2, a3, b2, b3}
-#if __cpp_binary_literals >= 201304L
-  auto a_grouped = _mm256_permute4x64_pd(a, 0b11011000);
-  auto b_grouped = _mm256_permute4x64_pd(b, 0b11011000);
-#else  // TODO Remove else case once switch to C++14 is finished
-  static constexpr int group_ctrl = 0 | (2 << 2) | (1 << 4) | (3 << 6);  // 0, 2, 1, 3
-  auto a_grouped = _mm256_permute4x64_pd(a, group_ctrl);
-  auto b_grouped = _mm256_permute4x64_pd(b, group_ctrl);
-#endif
+  auto a_grouped = _mm256_permute4x64_pd(a, 0b11011000);  // 0, 2, 1, 3
+  auto b_grouped = _mm256_permute4x64_pd(b, 0b11011000);  // 0, 2, 1, 3
 
   // swap lanes:
   //   return {a0, a1, a2, a3}
   //          {b0, b1, b2, b3}
-#if __cpp_binary_literals >= 201304L
-  return std::make_pair(_mm256_permute2f128_pd(a_grouped, b_grouped, 0b0100000),
-                        _mm256_permute2f128_pd(a_grouped, b_grouped, 0b0110001));
-#else  // TODO Remove else case once switch to C++14 is finished
-  static constexpr int swap_ctrl_a = 0 | (2 << 4);  // 0, 2.   4 bits apart
-  static constexpr int swap_ctrl_b = 1 | (3 << 4);  // 1, 3.   4 bits apart
-  return std::make_pair(_mm256_permute2f128_pd(a_grouped, b_grouped, swap_ctrl_a),
-                        _mm256_permute2f128_pd(a_grouped, b_grouped, swap_ctrl_b));
-#endif
+  return std::make_pair(_mm256_permute2f128_pd(a_grouped, b_grouped, 0b0100000),  // 0, 2.   4 bits apart
+                        _mm256_permute2f128_pd(a_grouped, b_grouped, 0b0110001)); // 1, 3.   4 bits apart
 }
 
 template <>
@@ -250,15 +217,8 @@
   // swap lanes:
   //   return {a0, a1, a2, a3, a4, a5, a6, a7}
   //          {b0, b1, b2, b3, b4, b5, b6, b7}
-#if __cpp_binary_literals >= 201304L
-  return std::make_pair(_mm256_permute2f128_ps(a_grouped, b_grouped, 0b0100000),
-                        _mm256_permute2f128_ps(a_grouped, b_grouped, 0b0110001));
-#else  // TODO Remove else case once switch to C++14 is finished
-  static constexpr int swap_ctrl_a = 0 | (2 << 4);  // 0, 2.   4 bits apart
-  static constexpr int swap_ctrl_b = 1 | (3 << 4);  // 1, 3.   4 bits apart
-  return std::make_pair(_mm256_permute2f128_ps(a_grouped, b_grouped, swap_ctrl_a),
-                        _mm256_permute2f128_ps(a_grouped, b_grouped, swap_ctrl_b));
-#endif
+  return std::make_pair(_mm256_permute2f128_ps(a_grouped, b_grouped, 0b0100000),  // 0, 2.   4 bits apart
+                        _mm256_permute2f128_ps(a_grouped, b_grouped, 0b0110001)); // 1, 3.   4 bits apart
 }
 
 #endif  // defined(__AVX2__)