Revert F16C series (MR 6774)

This reverts commit 4fb2eddfdf9adafde2e6f94de23202ee44123d59.
This reverts commit 7a1deb16f8af4e0ae4ed64511cbfcc606087f0ee.
This reverts commit 2b6a17234376817e75d1f81edf5bd1b28eefb374.
This reverts commit 5af81393e419eaf086e4de2a1d149af78cd1f54d.
This reverts commit 87900afe5bbe90c5f3ad0921b28ae1c889029ada.

A couple of problems were discovered after this series was merged that
cause breakage in different configurations:

   (1) It seems that using -mf16c also enables AVX, leading to SIGILL on
   platforms that do not support AVX.
   (2) Since clang only warns about unknown flags, and as I understand
   it Meson's handling in cc.has_argument() is broken, the F16C code is
   wrongly enabled when clang is used, even for example on ARM, leading
   to a compilation error.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3583
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6969>
diff --git a/.gitlab-ci/piglit/quick_gl.txt b/.gitlab-ci/piglit/quick_gl.txt
index 104d2f6..a24ae0d 100644
--- a/.gitlab-ci/piglit/quick_gl.txt
+++ b/.gitlab-ci/piglit/quick_gl.txt
@@ -738,6 +738,7 @@
 spec/arb_sparse_buffer/minmax: skip
 spec/arb_tessellation_shader/arb_tessellation_shader-immediate-mode-draw-patches: skip
 spec/arb_texture_buffer_object/negative-unsupported: skip
+spec/arb_texture_compression_bptc/bptc-float-modes: fail
 spec/arb_texture_cube_map/copyteximage cube samples=16: skip
 spec/arb_texture_cube_map/copyteximage cube samples=2: skip
 spec/arb_texture_cube_map/copyteximage cube samples=32: skip
@@ -1655,8 +1656,8 @@
 summary:
        name:  results
        ----  --------
-       pass:    23075
-       fail:      197
+       pass:    23074
+       fail:      198
       crash:        0
        skip:     1433
     timeout:        0
diff --git a/meson.build b/meson.build
index b855710..76f72a9 100644
--- a/meson.build
+++ b/meson.build
@@ -1111,19 +1111,6 @@
   sse41_args = []
 endif
 
-if cc.has_argument('-mf16c') and cpp.has_argument('-mf16c')
-  pre_args += '-DUSE_F16C'
-  c_args += '-mf16c'
-  cpp_args += '-mf16c'
-
-  # GCC on x86 (not x86_64) with -msse* assumes a 16 byte aligned stack, but
-  # that's not guaranteed (not sure if this also applies to -mf16c)
-  if host_machine.cpu_family() == 'x86'
-    c_args += '-mstackrealign'
-    cpp_args += '-mstackrealign'
-  endif
-endif
-
 # Check for GCC style atomics
 dep_atomic = null_dep
 
diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index bf76b6b..ed9e30f 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -29,7 +29,7 @@
 
 #include "vk_util.h"
 
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/format_srgb.h"
 #include "util/format_r11g11b10f.h"
 #include "util/format_rgb9e5.h"
@@ -1079,7 +1079,7 @@
 			if (channel->size == 32) {
 				memcpy(&v, &value->float32[c], 4);
 			} else if(channel->size == 16) {
-				v = _mesa_float_to_float16_rtz(value->float32[c]);
+				v = util_float_to_half_rtz(value->float32[c]);
 			} else {
 				fprintf(stderr, "failed to fast clear for unhandled float size in format %d\n", format);
 				return false;
diff --git a/src/freedreno/fdl/fd5_layout_test.c b/src/freedreno/fdl/fd5_layout_test.c
index e849699..71ed259 100644
--- a/src/freedreno/fdl/fd5_layout_test.c
+++ b/src/freedreno/fdl/fd5_layout_test.c
@@ -24,8 +24,7 @@
 #include "freedreno_layout.h"
 #include "fd_layout_test.h"
 #include "adreno_common.xml.h"
-#include "util/half_float.h"
-#include "util/u_math.h"
+#include "util/u_half.h"
 #include "a5xx.xml.h"
 
 #include <stdio.h>
diff --git a/src/freedreno/perfcntrs/fd2_perfcntr.c b/src/freedreno/perfcntrs/fd2_perfcntr.c
index a0f1ef8..eac2de0 100644
--- a/src/freedreno/perfcntrs/fd2_perfcntr.c
+++ b/src/freedreno/perfcntrs/fd2_perfcntr.c
@@ -25,8 +25,7 @@
  *    Rob Clark <robclark@freedesktop.org>
  */
 
-#include "util/half_float.h"
-#include "util/u_math.h"
+#include "util/u_half.h"
 #include "adreno_common.xml.h"
 #include "adreno_pm4.xml.h"
 #include "a2xx.xml.h"
diff --git a/src/freedreno/perfcntrs/fd5_perfcntr.c b/src/freedreno/perfcntrs/fd5_perfcntr.c
index 0d8d2ae..2d0579c 100644
--- a/src/freedreno/perfcntrs/fd5_perfcntr.c
+++ b/src/freedreno/perfcntrs/fd5_perfcntr.c
@@ -27,8 +27,7 @@
 #ifndef FD5_PERFCNTR_H_
 #define FD5_PERFCNTR_H_
 
-#include "util/half_float.h"
-#include "util/u_math.h"
+#include "util/u_half.h"
 #include "adreno_common.xml.h"
 #include "a5xx.xml.h"
 
diff --git a/src/freedreno/perfcntrs/fd6_perfcntr.c b/src/freedreno/perfcntrs/fd6_perfcntr.c
index f50a72e..02e55f8 100644
--- a/src/freedreno/perfcntrs/fd6_perfcntr.c
+++ b/src/freedreno/perfcntrs/fd6_perfcntr.c
@@ -27,8 +27,7 @@
 #ifndef FD6_PERFCNTR_H_
 #define FD6_PERFCNTR_H_
 
-#include "util/half_float.h"
-#include "util/u_math.h"
+#include "util/u_half.h"
 #include "adreno_common.xml.h"
 #include "adreno_pm4.xml.h"
 #include "a6xx.xml.h"
diff --git a/src/freedreno/perfcntrs/freedreno_perfcntr.h b/src/freedreno/perfcntrs/freedreno_perfcntr.h
index 250ea92..867048a 100644
--- a/src/freedreno/perfcntrs/freedreno_perfcntr.h
+++ b/src/freedreno/perfcntrs/freedreno_perfcntr.h
@@ -27,8 +27,6 @@
 #ifndef FREEDRENO_PERFCNTR_H_
 #define FREEDRENO_PERFCNTR_H_
 
-#include "util/macros.h"
-
 /*
  * Mapping very closely to the AMD_performance_monitor extension, adreno has
  * groups of performance counters where each group has N counters, which can
diff --git a/src/freedreno/registers/gen_header.py b/src/freedreno/registers/gen_header.py
index 480911a..9657dff 100644
--- a/src/freedreno/registers/gen_header.py
+++ b/src/freedreno/registers/gen_header.py
@@ -78,7 +78,7 @@
 			val = "fui(%s)" % var_name
 		elif self.type == "float" and self.high - self.low == 15:
 			type = "float"
-			val = "_mesa_float_to_half(%s)" % var_name
+			val = "util_float_to_half(%s)" % var_name
 		elif self.type in [ "address", "waddress" ]:
 			type = "uint64_t"
 			val = var_name
diff --git a/src/freedreno/rnn/headergen2.c b/src/freedreno/rnn/headergen2.c
index 478dde3..d5a3eb7 100644
--- a/src/freedreno/rnn/headergen2.c
+++ b/src/freedreno/rnn/headergen2.c
@@ -201,7 +201,7 @@
 			if (width == 32)
 				fprintf(dst, "fui(val)");
 			else if (width == 16)
-				fprintf(dst, "_mesa_float_to_half(val)");
+				fprintf(dst, "util_float_to_half(val)");
 			else
 				assert(!"invalid float size");
 		} else {
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index f1134cb..a645831 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -14,7 +14,7 @@
 #include "util/format_r11g11b10f.h"
 #include "util/format_rgb9e5.h"
 #include "util/format_srgb.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 
 static uint32_t
 tu_pack_float32_for_unorm(float val, int bits)
@@ -122,7 +122,7 @@
             else
                clear_value[i] = tu_pack_float32_for_unorm(linear, 8);
          } else if (ifmt == R2D_FLOAT16) {
-            clear_value[i] = _mesa_float_to_half(val->color.float32[i]);
+            clear_value[i] = util_float_to_half(val->color.float32[i]);
          } else {
             assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 ||
                    ifmt == R2D_INT16 || ifmt == R2D_INT8);
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index f8ef382..e665488 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -248,6 +248,7 @@
 	util/u_framebuffer.h \
 	util/u_gen_mipmap.c \
 	util/u_gen_mipmap.h \
+	util/u_half.h \
 	util/u_handle_table.c \
 	util/u_handle_table.h \
 	util/u_hash_table.c \
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
index 18ece73..58fdcc9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -37,7 +37,7 @@
 
 #include "util/u_debug.h"
 #include "util/u_math.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
@@ -257,7 +257,7 @@
    elem_type = lp_build_elem_type(gallivm, type);
 
    if(type.floating && type.width == 16)
-      elems[0] = LLVMConstInt(elem_type, _mesa_float_to_half(1.0f), 0);
+      elems[0] = LLVMConstInt(elem_type, util_float_to_half(1.0f), 0);
    else if(type.floating)
       elems[0] = LLVMConstReal(elem_type, 1.0);
    else if(type.fixed)
@@ -304,7 +304,7 @@
    LLVMValueRef elem;
 
    if(type.floating && type.width == 16) {
-      elem = LLVMConstInt(elem_type, _mesa_float_to_half((float)val), 0);
+      elem = LLVMConstInt(elem_type, util_float_to_half((float)val), 0);
    } else if(type.floating) {
       elem = LLVMConstReal(elem_type, val);
    }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index c68b885..2079a2a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -63,7 +63,7 @@
 
 #include "util/u_debug.h"
 #include "util/u_math.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_cpu_detect.h"
 
 #include "lp_bld_type.h"
@@ -204,8 +204,8 @@
      unsigned i;
 
      LLVMTypeRef func_type = LLVMFunctionType(i16t, &f32t, 1, 0);
-     LLVMValueRef func = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)_mesa_float_to_half));
-     func = LLVMBuildBitCast(builder, func, LLVMPointerType(func_type, 0), "_mesa_float_to_half");
+     LLVMValueRef func = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)util_float_to_half));
+     func = LLVMBuildBitCast(builder, func, LLVMPointerType(func_type, 0), "util_float_to_half");
 
      for (i = 0; i < length; ++i) {
         LLVMValueRef index = LLVMConstInt(i32t, i, 0);
diff --git a/src/gallium/auxiliary/meson.build b/src/gallium/auxiliary/meson.build
index 37a8e01..72f569a 100644
--- a/src/gallium/auxiliary/meson.build
+++ b/src/gallium/auxiliary/meson.build
@@ -264,6 +264,7 @@
   'util/u_framebuffer.h',
   'util/u_gen_mipmap.c',
   'util/u_gen_mipmap.h',
+  'util/u_half.h',
   'util/u_handle_table.c',
   'util/u_handle_table.h',
   'util/u_hash_table.c',
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 4a009f7..ab17c1f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -58,7 +58,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi_exec.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/rounding.h"
@@ -3325,8 +3325,8 @@
    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
    fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
    for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
-      dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) |
-         (_mesa_float_to_half(arg[1].f[chan]) << 16);
+      dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
+         (util_float_to_half(arg[1].f[chan]) << 16);
    }
    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
@@ -3344,8 +3344,8 @@
 
    fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
    for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
-      dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff);
-      dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16);
+      dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
+      dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
    }
    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 3f07a58..d48c8f9 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -32,7 +32,7 @@
 
 #include "util/u_memory.h"
 #include "util/format/u_format.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_math.h"
 #include "pipe/p_state.h"
 #include "translate.h"
@@ -110,7 +110,7 @@
 
 #define TO_64_FLOAT(x)   ((double) x)
 #define TO_32_FLOAT(x)   (x)
-#define TO_16_FLOAT(x)   _mesa_float_to_half(x)
+#define TO_16_FLOAT(x)   util_float_to_half(x)
 
 #define TO_8_USCALED(x)  ((unsigned char) x)
 #define TO_16_USCALED(x) ((unsigned short) x)
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
new file mode 100644
index 0000000..bbcc843
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -0,0 +1,143 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef U_HALF_H
+#define U_HALF_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_math.h"
+#include "util/half_float.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * References for float <-> half conversions
+ *
+ *  http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
+ *  https://gist.github.com/2156668
+ *  https://gist.github.com/2144712
+ */
+
+static inline uint16_t
+util_float_to_half(float f)
+{
+   return _mesa_float_to_half(f);
+}
+
+static inline uint16_t
+util_float_to_half_rtz(float f)
+{
+   uint32_t sign_mask  = 0x80000000;
+   uint32_t round_mask = ~0xfff;
+   uint32_t f32inf = 0xff << 23;
+   uint32_t f16inf = 0x1f << 23;
+   uint32_t sign;
+   union fi magic;
+   union fi f32;
+   uint16_t f16;
+
+   magic.ui = 0xf << 23;
+
+   f32.f = f;
+
+   /* Sign */
+   sign = f32.ui & sign_mask;
+   f32.ui ^= sign;
+
+   if (f32.ui == f32inf) {
+      /* Inf */
+      f16 = 0x7c00;
+   } else if (f32.ui > f32inf) {
+      /* NaN */
+      f16 = 0x7e00;
+   } else {
+      /* Number */
+      f32.ui &= round_mask;
+      f32.f  *= magic.f;
+      f32.ui -= round_mask;
+      /*
+       * XXX: The magic mul relies on denorms being available, otherwise
+       * all f16 denorms get flushed to zero - hence when this is used
+       * for tgsi_exec in softpipe we won't get f16 denorms.
+       */
+      /*
+       * Clamp to max finite value if overflowed.
+       * OpenGL has completely undefined rounding behavior for float to
+       * half-float conversions, and this matches what is mandated for float
+       * to fp11/fp10, which recommend round-to-nearest-finite too.
+       * (d3d10 is deeply unhappy about flushing such values to infinity, and
+       * while it also mandates round-to-zero it doesn't care nearly as much
+       * about that.)
+       */
+      if (f32.ui > f16inf)
+         f32.ui = f16inf - 1;
+
+      f16 = f32.ui >> 13;
+   }
+
+   /* Sign */
+   f16 |= sign >> 16;
+
+   return f16;
+}
+
+static inline float
+util_half_to_float(uint16_t f16)
+{
+   union fi infnan;
+   union fi magic;
+   union fi f32;
+
+   infnan.ui = 0x8f << 23;
+   infnan.f = 65536.0f;
+   magic.ui  = 0xef << 23;
+
+   /* Exponent / Mantissa */
+   f32.ui = (f16 & 0x7fff) << 13;
+
+   /* Adjust */
+   f32.f *= magic.f;
+   /* XXX: The magic mul relies on denorms being available */
+
+   /* Inf / NaN */
+   if (f32.f >= infnan.f)
+      f32.ui |= 0xff << 23;
+
+   /* Sign */
+   f32.ui |= (uint32_t)(f16 & 0x8000) << 16;
+
+   return f32.f;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_HALF_H */
+
diff --git a/src/gallium/drivers/etnaviv/etnaviv_blend.c b/src/gallium/drivers/etnaviv/etnaviv_blend.c
index 3662426..644409a 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_blend.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_blend.c
@@ -32,7 +32,7 @@
 #include "hw/common.xml.h"
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 
 void *
 etna_blend_state_create(struct pipe_context *pctx,
@@ -180,11 +180,11 @@
       VIVS_PE_ALPHA_BLEND_COLOR_A(etna_cfloat_to_uint8(cs->color[3]));
 
    cs->PE_ALPHA_COLOR_EXT0 =
-      VIVS_PE_ALPHA_COLOR_EXT0_B(_mesa_float_to_half(cs->color[rb_swap ? 2 : 0])) |
-      VIVS_PE_ALPHA_COLOR_EXT0_G(_mesa_float_to_half(cs->color[1]));
+      VIVS_PE_ALPHA_COLOR_EXT0_B(util_float_to_half(cs->color[rb_swap ? 2 : 0])) |
+      VIVS_PE_ALPHA_COLOR_EXT0_G(util_float_to_half(cs->color[1]));
    cs->PE_ALPHA_COLOR_EXT1 =
-      VIVS_PE_ALPHA_COLOR_EXT1_R(_mesa_float_to_half(cs->color[rb_swap ? 0 : 2])) |
-      VIVS_PE_ALPHA_COLOR_EXT1_A(_mesa_float_to_half(cs->color[3]));
+      VIVS_PE_ALPHA_COLOR_EXT1_R(util_float_to_half(cs->color[rb_swap ? 0 : 2])) |
+      VIVS_PE_ALPHA_COLOR_EXT1_A(util_float_to_half(cs->color[3]));
 
    return true;
 }
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
index 27aa2c2..8b3296a 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
@@ -42,7 +42,7 @@
 #include "compiler/nir/nir_builder.h"
 
 #include "tgsi/tgsi_strings.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 
 static bool
 etna_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
diff --git a/src/gallium/drivers/etnaviv/etnaviv_disasm.c b/src/gallium/drivers/etnaviv/etnaviv_disasm.c
index 67eb400..56d94e2 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_disasm.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_disasm.c
@@ -34,7 +34,7 @@
 
 #include "hw/isa.xml.h"
 #include "util/u_math.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 
 struct instr {
    /* dword0: */
@@ -379,7 +379,7 @@
             printf("%d", src->imm_val);
             break;
          case 3: /* 16-bit */
-            printf("%f/%.5X", _mesa_half_to_float(src->imm_val), src->imm_val);
+            printf("%f/%.5X", util_half_to_float(src->imm_val), src->imm_val);
             break;
          }
       } else {
diff --git a/src/gallium/drivers/etnaviv/etnaviv_zsa.c b/src/gallium/drivers/etnaviv/etnaviv_zsa.c
index 2f9f2cb..7cac316 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_zsa.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_zsa.c
@@ -29,7 +29,7 @@
 #include "etnaviv_context.h"
 #include "etnaviv_screen.h"
 #include "etnaviv_translate.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_memory.h"
 
 #include "hw/common.xml.h"
@@ -98,7 +98,7 @@
    uint32_t extra_reference = 0;
 
    if (VIV_FEATURE(screen, chipMinorFeatures1, HALF_FLOAT))
-      extra_reference = _mesa_float_to_half(SATURATE(so->alpha.ref_value));
+      extra_reference = util_float_to_half(SATURATE(so->alpha.ref_value));
 
    cs->PE_STENCIL_CONFIG_EXT =
       VIVS_PE_STENCIL_CONFIG_EXT_EXTRA_ALPHA_REF(extra_reference);
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
index 1711fd6..d1e86a5 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
@@ -276,8 +276,8 @@
 				float f_s = CLAMP(f, -1, 1);
 
 				e->fp32[c] = fui(f);
-				e->fp16[c] = _mesa_float_to_half(f);
-				e->srgb[c] = _mesa_float_to_half(f_u);
+				e->fp16[c] = util_float_to_half(f);
+				e->srgb[c] = util_float_to_half(f_u);
 				e->ui16[c] = f_u * 0xffff;
 				e->si16[c] = f_s * 0x7fff;
 				e->ui8[c]  = f_u * 0xff;
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index d4f73c4..ff44112 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -183,8 +183,8 @@
 				float f_s = CLAMP(f, -1, 1);
 
 				e->fp32[c] = fui(f);
-				e->fp16[c] = _mesa_float_to_half(f);
-				e->srgb[c] = _mesa_float_to_half(f_u);
+				e->fp16[c] = util_float_to_half(f);
+				e->srgb[c] = util_float_to_half(f_u);
 				e->ui16[c] = f_u * 0xffff;
 				e->si16[c] = f_s * 0x7fff;
 				e->ui8[c]  = f_u * 0xff;
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c
index 7ffa5c1..5b0bec1 100644
--- a/src/gallium/drivers/freedreno/freedreno_texture.c
+++ b/src/gallium/drivers/freedreno/freedreno_texture.c
@@ -167,7 +167,7 @@
 				} else {
 					bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]);
 					bcolor[desc->swizzle[j]] =
-						_mesa_float_to_half(sampler->border_color.f[j]);
+						util_float_to_half(sampler->border_color.f[j]);
 				}
 			}
 		}
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index d17d80d..248d655 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -34,7 +34,7 @@
 #include "pipe/p_state.h"
 #include "util/u_debug.h"
 #include "util/u_math.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_dynarray.h"
 #include "util/u_pack_color.h"
 
diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c
index 11d4e1a..55d3489 100644
--- a/src/gallium/drivers/lima/ir/pp/codegen.c
+++ b/src/gallium/drivers/lima/ir/pp/codegen.c
@@ -23,7 +23,7 @@
  */
 
 #include "util/ralloc.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/bitscan.h"
 
 #include "ppir.h"
@@ -568,7 +568,7 @@
 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
 {
    for (int i = 0; i < constant->num; i++)
-      code[i] = _mesa_float_to_half(constant->value[i].f);
+      code[i] = util_float_to_half(constant->value[i].f);
 }
 
 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
diff --git a/src/gallium/drivers/lima/ir/pp/disasm.c b/src/gallium/drivers/lima/ir/pp/disasm.c
index 50aa4cb..a45ab85 100644
--- a/src/gallium/drivers/lima/ir/pp/disasm.c
+++ b/src/gallium/drivers/lima/ir/pp/disasm.c
@@ -24,7 +24,7 @@
  *
  */
 
-#include "util/half_float.h"
+#include "util/u_half.h"
 
 #include "ppir.h"
 #include "codegen.h"
@@ -174,7 +174,7 @@
 {
    printf("const%u", const_num);
    for (unsigned i = 0; i < 4; i++)
-      printf(" %f", _mesa_half_to_float(val[i]));
+      printf(" %f", util_half_to_float(val[i]));
 }
 
 static void
diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c
index 5c60eee..8737e5e 100644
--- a/src/gallium/drivers/lima/lima_draw.c
+++ b/src/gallium/drivers/lima/lima_draw.c
@@ -25,7 +25,7 @@
 
 #include "util/format/u_format.h"
 #include "util/u_debug.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_helpers.h"
 #include "util/u_inlines.h"
 #include "util/u_pack_color.h"
@@ -898,7 +898,7 @@
       lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_uniform_array, 4);
 
    for (int i = 0; i < const_buff_size; i++)
-       fp16_const_buff[i] = _mesa_float_to_half(const_buff[i]);
+       fp16_const_buff[i] = util_float_to_half(const_buff[i]);
 
    *array = lima_ctx_buff_va(ctx, lima_ctx_buff_pp_uniform);
 
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
index 1276561..c67004b 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
@@ -25,7 +25,7 @@
 
 #include "util/format/u_format.h"
 #include "util/u_math.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 
 #include "nv_object.xml.h"
 #include "nv30/nv30-40_3d.xml.h"
@@ -182,11 +182,11 @@
       case PIPE_FORMAT_R16G16B16A16_FLOAT:
       case PIPE_FORMAT_R32G32B32A32_FLOAT:
          BEGIN_NV04(push, NV30_3D(BLEND_COLOR), 1);
-         PUSH_DATA (push, (_mesa_float_to_half(rgba[0]) <<  0) |
-                          (_mesa_float_to_half(rgba[1]) << 16));
+         PUSH_DATA (push, (util_float_to_half(rgba[0]) <<  0) |
+                          (util_float_to_half(rgba[1]) << 16));
          BEGIN_NV04(push, SUBC_3D(0x037c), 1);
-         PUSH_DATA (push, (_mesa_float_to_half(rgba[2]) <<  0) |
-                          (_mesa_float_to_half(rgba[3]) << 16));
+         PUSH_DATA (push, (util_float_to_half(rgba[2]) <<  0) |
+                          (util_float_to_half(rgba[3]) << 16));
          break;
       default:
          break;
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 8d3ba13..b627e01 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -26,7 +26,7 @@
 #include "r300_reg.h"
 
 #include "util/format/u_format.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_pack_color.h"
 #include "util/u_surface.h"
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 7f120f0..2d8e5b1 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -24,7 +24,7 @@
 #include "draw/draw_context.h"
 
 #include "util/u_framebuffer.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_helpers.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
@@ -620,10 +620,10 @@
         switch (cb ? cb->format : 0) {
         case PIPE_FORMAT_R16G16B16A16_FLOAT:
         case PIPE_FORMAT_R16G16B16X16_FLOAT:
-            OUT_CB(_mesa_float_to_half(c.color[2]) |
-                   (_mesa_float_to_half(c.color[3]) << 16));
-            OUT_CB(_mesa_float_to_half(c.color[0]) |
-                   (_mesa_float_to_half(c.color[1]) << 16));
+            OUT_CB(util_float_to_half(c.color[2]) |
+                   (util_float_to_half(c.color[3]) << 16));
+            OUT_CB(util_float_to_half(c.color[0]) |
+                   (util_float_to_half(c.color[1]) << 16));
             break;
 
         default:
@@ -753,7 +753,7 @@
             R300_FG_ALPHA_FUNC_ENABLE;
 
         dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
-        alpha_value_fp16 = _mesa_float_to_half(state->alpha.ref_value);
+        alpha_value_fp16 = util_float_to_half(state->alpha.ref_value);
     }
 
     BEGIN_CB(&dsa->cb_begin, 8);
diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c
index 3f0675a..7a1d04e 100644
--- a/src/gallium/drivers/v3d/v3dx_emit.c
+++ b/src/gallium/drivers/v3d/v3dx_emit.c
@@ -22,7 +22,7 @@
  */
 
 #include "util/format/u_format.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "v3d_context.h"
 #include "broadcom/common/v3d_macros.h"
 #include "broadcom/cle/v3dx_pack.h"
@@ -101,11 +101,11 @@
 
         switch (swiz) {
         case PIPE_SWIZZLE_0:
-                return _mesa_float_to_half(0.0);
+                return util_float_to_half(0.0);
         case PIPE_SWIZZLE_1:
-                return _mesa_float_to_half(1.0);
+                return util_float_to_half(1.0);
         default:
-                return _mesa_float_to_half(sampler->border_color.f[swiz]);
+                return util_float_to_half(sampler->border_color.f[swiz]);
         }
 }
 
diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c
index 06dccfc..ee41b9f 100644
--- a/src/gallium/drivers/v3d/v3dx_state.c
+++ b/src/gallium/drivers/v3d/v3dx_state.c
@@ -28,7 +28,7 @@
 #include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_helpers.h"
 #include "util/u_upload_mgr.h"
 
@@ -52,7 +52,7 @@
         v3d->blend_color.f = *blend_color;
         for (int i = 0; i < 4; i++) {
                 v3d->blend_color.hf[i] =
-                        _mesa_float_to_half(blend_color->color[i]);
+                        util_float_to_half(blend_color->color[i]);
         }
         v3d->dirty |= VC5_DIRTY_BLEND_COLOR;
 }
@@ -691,13 +691,13 @@
                                 sampler.border_color_word_3 = border.ui[3];
                         } else {
                                 sampler.border_color_word_0 =
-                                        _mesa_float_to_half(border.f[0]);
+                                        util_float_to_half(border.f[0]);
                                 sampler.border_color_word_1 =
-                                        _mesa_float_to_half(border.f[1]);
+                                        util_float_to_half(border.f[1]);
                                 sampler.border_color_word_2 =
-                                        _mesa_float_to_half(border.f[2]);
+                                        util_float_to_half(border.f[2]);
                                 sampler.border_color_word_3 =
-                                        _mesa_float_to_half(border.f[3]);
+                                        util_float_to_half(border.f[3]);
                         }
                 }
         }
diff --git a/src/gallium/frontends/nine/vertexshader9.h b/src/gallium/frontends/nine/vertexshader9.h
index 3c1b9ea..cbbd349 100644
--- a/src/gallium/frontends/nine/vertexshader9.h
+++ b/src/gallium/frontends/nine/vertexshader9.h
@@ -23,7 +23,7 @@
 #ifndef _NINE_VERTEXSHADER9_H_
 #define _NINE_VERTEXSHADER9_H_
 
-#include "util/half_float.h"
+#include "util/u_half.h"
 
 #include "iunknown.h"
 #include "device9.h"
@@ -112,8 +112,8 @@
      * Use compressed float16 values for the pointsize min/max in the key.
      * Shaders do not usually output psize.*/
     if (vs->point_size) {
-        key |= ((uint64_t)_mesa_float_to_half(asfloat(context->rs[D3DRS_POINTSIZE_MIN]))) << 32;
-        key |= ((uint64_t)_mesa_float_to_half(asfloat(context->rs[D3DRS_POINTSIZE_MAX]))) << 48;
+        key |= ((uint64_t)util_float_to_half(asfloat(context->rs[D3DRS_POINTSIZE_MIN]))) << 32;
+        key |= ((uint64_t)util_float_to_half(asfloat(context->rs[D3DRS_POINTSIZE_MAX]))) << 48;
     }
 
     res = vs->last_key != key;
diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c
index 4d9c4e2..b07db7f 100644
--- a/src/gallium/tests/unit/translate_test.c
+++ b/src/gallium/tests/unit/translate_test.c
@@ -27,7 +27,7 @@
 #include "translate/translate.h"
 #include "util/u_memory.h"
 #include "util/format/u_format.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 #include "util/u_cpu_detect.h"
 #include "rtasm/rtasm_cpu.h"
 
@@ -164,7 +164,7 @@
       double_buffer[i] = rand_double();
 
    for (i = 0; i < buffer_size / sizeof(double); ++i)
-      half_buffer[i] = _mesa_float_to_half((float) rand_double());
+      half_buffer[i] = util_float_to_half((float) rand_double());
 
    for (i = 0; i < count; ++i)
       elts[i] = i;
diff --git a/src/gallium/tests/unit/u_half_test.c b/src/gallium/tests/unit/u_half_test.c
index 7f2eba9..48a9a2d 100644
--- a/src/gallium/tests/unit/u_half_test.c
+++ b/src/gallium/tests/unit/u_half_test.c
@@ -3,11 +3,10 @@
 #include <float.h>
 
 #include "util/u_math.h"
-#include "util/half_float.h"
-#include "util/u_cpu_detect.h"
+#include "util/u_half.h"
 
-static void
-test(void)
+int
+main(int argc, char **argv)
 {
    unsigned i;
    unsigned roundtrip_fails = 0;
@@ -18,8 +17,8 @@
       union fi f;
       uint16_t rh;
 
-      f.f = _mesa_half_to_float(h);
-      rh = _mesa_float_to_half(f.f);
+      f.f = util_half_to_float(h);
+      rh = util_float_to_half(f.f);
 
       if (h != rh && !(util_is_half_nan(h) && util_is_half_nan(rh))) {
          printf("Roundtrip failed: %x -> %x = %f -> %x\n", h, f.ui, f.f, rh);
@@ -29,21 +28,9 @@
 
    if(roundtrip_fails) {
       printf("Failure! %u/65536 half floats failed a conversion to float and back.\n", roundtrip_fails);
-      exit(1);
+      return 1;
+   } else {
+      printf("Success!\n");
+      return 0;
    }
 }
-
-int
-main(int argc, char **argv)
-{
-   assert(!util_cpu_caps.has_f16c);
-   test();
-
-   /* Test f16c. */
-   util_cpu_detect();
-   if (util_cpu_caps.has_f16c)
-      test();
-
-   printf("Success!\n");
-   return 0;
-}
diff --git a/src/mesa/vbo/vbo_attrib_tmp.h b/src/mesa/vbo/vbo_attrib_tmp.h
index df61622..bfc3a31 100644
--- a/src/mesa/vbo/vbo_attrib_tmp.h
+++ b/src/mesa/vbo/vbo_attrib_tmp.h
@@ -28,7 +28,7 @@
 #include "util/format_r11g11b10f.h"
 #include "main/varray.h"
 #include "vbo_util.h"
-#include "util/half_float.h"
+#include "util/u_half.h"
 
 
 /* ATTR */
@@ -61,28 +61,28 @@
 
 
 /* half */
-#define ATTR1HV( A, V ) ATTRF( A, 1, _mesa_half_to_float((uint16_t)(V)[0]), \
+#define ATTR1HV( A, V ) ATTRF( A, 1, util_half_to_float((uint16_t)(V)[0]), \
                                0, 0, 1 )
-#define ATTR2HV( A, V ) ATTRF( A, 2, _mesa_half_to_float((uint16_t)(V)[0]), \
-                               _mesa_half_to_float((uint16_t)(V)[1]), 0, 1 )
-#define ATTR3HV( A, V ) ATTRF( A, 3, _mesa_half_to_float((uint16_t)(V)[0]), \
-                               _mesa_half_to_float((uint16_t)(V)[1]), \
-                               _mesa_half_to_float((uint16_t)(V)[2]), 1 )
-#define ATTR4HV( A, V ) ATTRF( A, 4, _mesa_half_to_float((uint16_t)(V)[0]), \
-                               _mesa_half_to_float((uint16_t)(V)[1]), \
-                               _mesa_half_to_float((uint16_t)(V)[2]), \
-                               _mesa_half_to_float((uint16_t)(V)[3]) )
+#define ATTR2HV( A, V ) ATTRF( A, 2, util_half_to_float((uint16_t)(V)[0]), \
+                               util_half_to_float((uint16_t)(V)[1]), 0, 1 )
+#define ATTR3HV( A, V ) ATTRF( A, 3, util_half_to_float((uint16_t)(V)[0]), \
+                               util_half_to_float((uint16_t)(V)[1]), \
+                               util_half_to_float((uint16_t)(V)[2]), 1 )
+#define ATTR4HV( A, V ) ATTRF( A, 4, util_half_to_float((uint16_t)(V)[0]), \
+                               util_half_to_float((uint16_t)(V)[1]), \
+                               util_half_to_float((uint16_t)(V)[2]), \
+                               util_half_to_float((uint16_t)(V)[3]) )
 
-#define ATTR1H( A, X )          ATTRF( A, 1, _mesa_half_to_float(X), 0, 0, 1 )
-#define ATTR2H( A, X, Y )       ATTRF( A, 2, _mesa_half_to_float(X), \
-                                       _mesa_half_to_float(Y), 0, 1 )
-#define ATTR3H( A, X, Y, Z )    ATTRF( A, 3, _mesa_half_to_float(X), \
-                                       _mesa_half_to_float(Y), \
-                                       _mesa_half_to_float(Z), 1 )
-#define ATTR4H( A, X, Y, Z, W ) ATTRF( A, 4, _mesa_half_to_float(X), \
-                                       _mesa_half_to_float(Y), \
-                                       _mesa_half_to_float(Z), \
-                                       _mesa_half_to_float(W) )
+#define ATTR1H( A, X )          ATTRF( A, 1, util_half_to_float(X), 0, 0, 1 )
+#define ATTR2H( A, X, Y )       ATTRF( A, 2, util_half_to_float(X), \
+                                       util_half_to_float(Y), 0, 1 )
+#define ATTR3H( A, X, Y, Z )    ATTRF( A, 3, util_half_to_float(X), \
+                                       util_half_to_float(Y), \
+                                       util_half_to_float(Z), 1 )
+#define ATTR4H( A, X, Y, Z, W ) ATTRF( A, 4, util_half_to_float(X), \
+                                       util_half_to_float(Y), \
+                                       util_half_to_float(Z), \
+                                       util_half_to_float(W) )
 
 
 /* int */
diff --git a/src/util/format/u_format_pack.py b/src/util/format/u_format_pack.py
index 3545f9a..a9a7792 100644
--- a/src/util/format/u_format_pack.py
+++ b/src/util/format/u_format_pack.py
@@ -360,7 +360,7 @@
 
     # Promote half to float
     if src_type == FLOAT and src_size == 16:
-        value = '_mesa_half_to_float(%s)' % value
+        value = 'util_half_to_float(%s)' % value
         src_size = 32
 
     # Special case for float <-> ubytes for more accurate results
@@ -436,7 +436,7 @@
             src_size = 32
 
         if dst_channel.size == 16:
-            value = '_mesa_float_to_float16_rtz(%s)' % value
+            value = 'util_float_to_half_rtz(%s)' % value
         elif dst_channel.size == 64 and src_size < 64:
             value = '(double)%s' % value
 
@@ -715,7 +715,7 @@
     print()
     print('#include "pipe/p_compiler.h"')
     print('#include "util/u_math.h"')
-    print('#include "util/half_float.h"')
+    print('#include "util/u_half.h"')
     print('#include "u_format.h"')
     print('#include "u_format_other.h"')
     print('#include "util/format_srgb.h"')
diff --git a/src/util/format/u_format_tests.c b/src/util/format/u_format_tests.c
index 0cc4de8..0c34a82 100644
--- a/src/util/format/u_format_tests.c
+++ b/src/util/format/u_format_tests.c
@@ -914,11 +914,8 @@
    {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x03FF), UNPACKED_1x1( 6.09756E-5, 0.0, 0.0, 1.0)},
 #endif
 
-   /* This fails with _mesa_float_to_float16_rtz, but passes with _mesa_float_to_float16_rtne. */
-#if 0
    /* Minimum positive denormal */
    {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0001), UNPACKED_1x1( 5.96046E-8, 0.0, 0.0, 1.0)},
-#endif
 
    /* Min representable value */
    {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfbff), UNPACKED_1x1(   -65504.0, 0.0, 0.0, 1.0)},
diff --git a/src/util/half_float.c b/src/util/half_float.c
index 05aeac1..aae690a 100644
--- a/src/util/half_float.c
+++ b/src/util/half_float.c
@@ -28,10 +28,10 @@
 #include <math.h>
 #include <assert.h>
 #include "half_float.h"
+#include "util/u_half.h"
 #include "rounding.h"
 #include "softfloat.h"
 #include "macros.h"
-#include "u_math.h"
 
 typedef union { float f; int32_t i; uint32_t u; } fi_type;
 
@@ -54,7 +54,7 @@
  *     result in the same value as if the expression were executed on the GPU.
  */
 uint16_t
-_mesa_float_to_half_slow(float val)
+_mesa_float_to_half(float val)
 {
    const fi_type fi = {val};
    const int flt_m = fi.i & 0x7fffff;
@@ -129,9 +129,9 @@
 }
 
 uint16_t
-_mesa_float_to_float16_rtz_slow(float val)
+_mesa_float_to_float16_rtz(float val)
 {
-    return _mesa_float_to_half_rtz_slow(val);
+    return _mesa_float_to_half_rtz(val);
 }
 
 /**
@@ -140,31 +140,9 @@
  * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
  */
 float
-_mesa_half_to_float_slow(uint16_t val)
+_mesa_half_to_float(uint16_t val)
 {
-   union fi infnan;
-   union fi magic;
-   union fi f32;
-
-   infnan.ui = 0x8f << 23;
-   infnan.f = 65536.0f;
-   magic.ui  = 0xef << 23;
-
-   /* Exponent / Mantissa */
-   f32.ui = (val & 0x7fff) << 13;
-
-   /* Adjust */
-   f32.f *= magic.f;
-   /* XXX: The magic mul relies on denorms being available */
-
-   /* Inf / NaN */
-   if (f32.f >= infnan.f)
-      f32.ui |= 0xff << 23;
-
-   /* Sign */
-   f32.ui |= (uint32_t)(val & 0x8000) << 16;
-
-   return f32.f;
+   return util_half_to_float(val);
 }
 
 /**
diff --git a/src/util/half_float.h b/src/util/half_float.h
index 6f9a405..c9fad9a 100644
--- a/src/util/half_float.h
+++ b/src/util/half_float.h
@@ -28,14 +28,6 @@
 
 #include <stdbool.h>
 #include <stdint.h>
-#include <string.h>
-#include "util/u_cpu_detect.h"
-
-#ifdef USE_F16C
-#include <immintrin.h>
-#define F16C_NEAREST 0
-#define F16C_TRUNCATE 3
-#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -44,48 +36,18 @@
 #define FP16_ONE     ((uint16_t) 0x3c00)
 #define FP16_ZERO    ((uint16_t) 0)
 
-uint16_t _mesa_float_to_half_slow(float val);
-float _mesa_half_to_float_slow(uint16_t val);
+uint16_t _mesa_float_to_half(float val);
+float _mesa_half_to_float(uint16_t val);
 uint8_t _mesa_half_to_unorm8(uint16_t v);
 uint16_t _mesa_uint16_div_64k_to_half(uint16_t v);
 
 /*
- * _mesa_float_to_float16_rtz_slow is no more than a wrapper to the counterpart
+ * _mesa_float_to_float16_rtz is no more than a wrapper to the counterpart
  * softfloat.h call. Still, softfloat.h conversion API is meant to be kept
  * private. In other words, only use the API published here, instead of
  * calling directly the softfloat.h one.
  */
-uint16_t _mesa_float_to_float16_rtz_slow(float val);
-
-static inline uint16_t
-_mesa_float_to_half(float val)
-{
-#ifdef USE_F16C
-   if (util_cpu_caps.has_f16c)
-      return _cvtss_sh(val, F16C_NEAREST);
-#endif
-   return _mesa_float_to_half_slow(val);
-}
-
-static inline float
-_mesa_half_to_float(uint16_t val)
-{
-#ifdef USE_F16C
-   if (util_cpu_caps.has_f16c)
-      return _cvtsh_ss(val);
-#endif
-   return _mesa_half_to_float_slow(val);
-}
-
-static inline uint16_t
-_mesa_float_to_float16_rtz(float val)
-{
-#ifdef USE_F16C
-   if (util_cpu_caps.has_f16c)
-      return _cvtss_sh(val, F16C_TRUNCATE);
-#endif
-   return _mesa_float_to_float16_rtz_slow(val);
-}
+uint16_t _mesa_float_to_float16_rtz(float val);
 
 static inline uint16_t
 _mesa_float_to_float16_rtne(float val)
diff --git a/src/util/softfloat.c b/src/util/softfloat.c
index 50cf098..365b15b 100644
--- a/src/util/softfloat.c
+++ b/src/util/softfloat.c
@@ -1435,7 +1435,7 @@
  * From f32_to_f16()
  */
 uint16_t
-_mesa_float_to_half_rtz_slow(float val)
+_mesa_float_to_half_rtz(float val)
 {
     const fi_type fi = {val};
     const uint32_t flt_m = fi.u & 0x7fffff;
diff --git a/src/util/softfloat.h b/src/util/softfloat.h
index 2e254e2..4e48c65 100644
--- a/src/util/softfloat.h
+++ b/src/util/softfloat.h
@@ -56,7 +56,7 @@
 double _mesa_double_fma_rtz(double a, double b, double c);
 float _mesa_float_fma_rtz(float a, float b, float c);
 float _mesa_double_to_f32(double x, bool rtz);
-uint16_t _mesa_float_to_half_rtz_slow(float x);
+uint16_t _mesa_float_to_half_rtz(float x);
 
 #ifdef __cplusplus
 } /* extern C */
diff --git a/src/util/tests/format/u_format_test.c b/src/util/tests/format/u_format_test.c
index f4a62a5..d33b41a 100644
--- a/src/util/tests/format/u_format_test.c
+++ b/src/util/tests/format/u_format_test.c
@@ -30,8 +30,7 @@
 #include <stdio.h>
 #include <float.h>
 
-#include "util/half_float.h"
-#include "util/u_math.h"
+#include "util/u_half.h"
 #include "util/format/u_format.h"
 #include "util/format/u_format_tests.h"
 #include "util/format/u_format_s3tc.h"