Make sure poly* types are handled as unsigned when they get printed.
diff --git a/ref_vget_lane.c b/ref_vget_lane.c
index ac3cde8..c41d285 100644
--- a/ref_vget_lane.c
+++ b/ref_vget_lane.c
@@ -38,6 +38,12 @@
   VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
   fprintf(ref_file, "%s: %" PRIx##W "\n", "vget"STR(Q)"_lane_"STR(T2##W), VAR(var, T1, W))
 
+  /* Special variant for poly* types, to clear sign bits in output.  */
+#define TEST_VGET_LANE_POLY(Q, T1, T2, W, N, L)				\
+  VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+    fprintf(ref_file, "%s: %" PRIx##W "\n", "vget"STR(Q)"_lane_"STR(T2##W), \
+	    (uint##W##_t)VAR(var, T1, W))
+
   /* Special variant for floating-point */
   union {
     uint32_t var_int32;
@@ -83,8 +89,8 @@
   TEST_VGET_LANE(, uint, u, 16, 4, 2);
   TEST_VGET_LANE(, uint, u, 32, 2, 1);
   TEST_VGET_LANE(, uint, u, 64, 1, 0);
-  TEST_VGET_LANE(, poly, p, 8, 8, 6);
-  TEST_VGET_LANE(, poly, p, 16, 4, 2);
+  TEST_VGET_LANE_POLY(, poly, p, 8, 8, 6);
+  TEST_VGET_LANE_POLY(, poly, p, 16, 4, 2);
   TEST_VGET_LANE_F(, float, f, 32, 2, 1);
 
   TEST_VGET_LANE(q, int, s, 8, 16, 15);
@@ -95,8 +101,8 @@
   TEST_VGET_LANE(q, uint, u, 16, 8, 6);
   TEST_VGET_LANE(q, uint, u, 32, 4, 2);
   TEST_VGET_LANE(q, uint, u, 64, 2, 1);
-  TEST_VGET_LANE(q, poly, p, 8, 16, 14);
-  TEST_VGET_LANE(q, poly, p, 16, 8, 6);
+  TEST_VGET_LANE_POLY(q, poly, p, 8, 16, 14);
+  TEST_VGET_LANE_POLY(q, poly, p, 16, 8, 6);
   TEST_VGET_LANE_F(q, float, f, 32, 4, 3);
 
   fprintf(ref_file, "\n");
diff --git a/ref_vreinterpret.c b/ref_vreinterpret.c
index 925c9ec..e4da704 100644
--- a/ref_vreinterpret.c
+++ b/ref_vreinterpret.c
@@ -45,6 +45,13 @@
 		    VECT_VAR(vector_res, T1, W, N));			\
   DUMP(TEST_MSG, T1, W, N, PRIx##W);
 
+#define TEST_VREINTERPRET_POLY(Q, T1, T2, W, N, TS1, TS2, WS, NS)	\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N));			\
+  DUMP_POLY(TEST_MSG, T1, W, N, PRIx##W);
+
 #define TEST_VREINTERPRET_FP(Q, T1, T2, W, N, TS1, TS2, WS, NS)		\
   VECT_VAR(vector_res, T1, W, N) =					\
     vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \
@@ -178,26 +185,26 @@
   TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 16, 4);
 
   /* vreinterpret_p8_xx */
-  TEST_VREINTERPRET(, poly, p, 8, 8, int, s, 8, 8);
-  TEST_VREINTERPRET(, poly, p, 8, 8, int, s, 16, 4);
-  TEST_VREINTERPRET(, poly, p, 8, 8, int, s, 32, 2);
-  TEST_VREINTERPRET(, poly, p, 8, 8, int, s, 64, 1);
-  TEST_VREINTERPRET(, poly, p, 8, 8, uint, u, 8, 8);
-  TEST_VREINTERPRET(, poly, p, 8, 8, uint, u, 16, 4);
-  TEST_VREINTERPRET(, poly, p, 8, 8, uint, u, 32, 2);
-  TEST_VREINTERPRET(, poly, p, 8, 8, uint, u, 64, 1);
-  TEST_VREINTERPRET(, poly, p, 8, 8, poly, p, 16, 4);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 8, 8);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 16, 4);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 32, 2);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 64, 1);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 8, 8);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 16, 4);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 32, 2);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 64, 1);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, poly, p, 16, 4);
 
   /* vreinterpret_p16_xx */
-  TEST_VREINTERPRET(, poly, p, 16, 4, int, s, 8, 8);
-  TEST_VREINTERPRET(, poly, p, 16, 4, int, s, 16, 4);
-  TEST_VREINTERPRET(, poly, p, 16, 4, int, s, 32, 2);
-  TEST_VREINTERPRET(, poly, p, 16, 4, int, s, 64, 1);
-  TEST_VREINTERPRET(, poly, p, 16, 4, uint, u, 8, 8);
-  TEST_VREINTERPRET(, poly, p, 16, 4, uint, u, 16, 4);
-  TEST_VREINTERPRET(, poly, p, 16, 4, uint, u, 32, 2);
-  TEST_VREINTERPRET(, poly, p, 16, 4, uint, u, 64, 1);
-  TEST_VREINTERPRET(, poly, p, 16, 4, poly, p, 8, 8);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 8, 8);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 16, 4);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 32, 2);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 64, 1);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 8, 8);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 16, 4);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 32, 2);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 64, 1);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, poly, p, 8, 8);
 
   /* vreinterpretq_s8_xx */
   TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 16, 8);
@@ -320,8 +327,8 @@
   TEST_VREINTERPRET(, uint, u, 16, 4, float, f, 32, 2);
   TEST_VREINTERPRET(, uint, u, 32, 2, float, f, 32, 2);
   TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 32, 2);
-  TEST_VREINTERPRET(, poly, p, 8, 8, float, f, 32, 2);
-  TEST_VREINTERPRET(, poly, p, 16, 4, float, f, 32, 2);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, float, f, 32, 2);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, float, f, 32, 2);
 
   /* vreinterpretq_xx_f32 */
   TEST_VREINTERPRET(q, int, s, 8, 16, float, f, 32, 4);
@@ -332,8 +339,8 @@
   TEST_VREINTERPRET(q, uint, u, 16, 8, float, f, 32, 4);
   TEST_VREINTERPRET(q, uint, u, 32, 4, float, f, 32, 4);
   TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 32, 4);
-  TEST_VREINTERPRET(q, poly, p, 8, 16, float, f, 32, 4);
-  TEST_VREINTERPRET(q, poly, p, 16, 8, float, f, 32, 4);
+  TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, float, f, 32, 4);
+  TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, float, f, 32, 4);
 
 #if __ARM_NEON_FP16_INTRINSICS
   /* vreinterpret_f16_xx */
@@ -371,8 +378,8 @@
   TEST_VREINTERPRET(, uint, u, 16, 4, float, f, 16, 4);
   TEST_VREINTERPRET(, uint, u, 32, 2, float, f, 16, 4);
   TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 16, 4);
-  TEST_VREINTERPRET(, poly, p, 8, 8, float, f, 16, 4);
-  TEST_VREINTERPRET(, poly, p, 16, 4, float, f, 16, 4);
+  TEST_VREINTERPRET_POLY(, poly, p, 8, 8, float, f, 16, 4);
+  TEST_VREINTERPRET_POLY(, poly, p, 16, 4, float, f, 16, 4);
   TEST_VREINTERPRET_FP(, float, f, 32, 2, float, f, 16, 4);
 
   /* vreinterpretq_xx_f16 */
@@ -384,8 +391,8 @@
   TEST_VREINTERPRET(q, uint, u, 16, 8, float, f, 16, 8);
   TEST_VREINTERPRET(q, uint, u, 32, 4, float, f, 16, 8);
   TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 16, 8);
-  TEST_VREINTERPRET(q, poly, p, 8, 16, float, f, 16, 8);
-  TEST_VREINTERPRET(q, poly, p, 16, 8, float, f, 16, 8);
+  TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, float, f, 16, 8);
+  TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, float, f, 16, 8);
   TEST_VREINTERPRET_FP(q, float, f, 32, 4, float, f, 16, 8);
 #endif
 }
diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h
index debffb6..1500905 100644
--- a/stm-arm-neon-ref.h
+++ b/stm-arm-neon-ref.h
@@ -85,6 +85,18 @@
   fprintf(ref_file, " }\n");						\
   DUMP4GCC(MSG,T,W,N,FMT);
 
+/* Use casts for remove sign bits */
+#define DUMP_POLY(MSG,T,W,N,FMT)					\
+  fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
+	  STR(VECT_VAR(result, T, W, N)));				\
+  for(i=0; i<N ; i++)							\
+    {									\
+      fprintf(ref_file, "%" FMT ", ",					\
+	      (uint##W##_t)VECT_VAR(result, T, W, N)[i]);		\
+    }									\
+  fprintf(ref_file, " }\n");						\
+  DUMP4GCC(MSG,T,W,N,FMT);
+
 #define DUMP_FP(MSG,T,W,N,FMT)						\
   fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
 	  STR(VECT_VAR(result, T, W, N)));				\
@@ -450,8 +462,8 @@
   DUMP(test_name, uint, 16, 4, PRIu16);
   DUMP(test_name, uint, 32, 2, PRIu32);
   DUMP(test_name, uint, 64, 1, PRIu64);
-  DUMP(test_name, poly, 8, 8, PRIu8);
-  DUMP(test_name, poly, 16, 4, PRIu16);
+  DUMP_POLY(test_name, poly, 8, 8, PRIu8);
+  DUMP_POLY(test_name, poly, 16, 4, PRIu16);
   DUMP_FP(test_name, float, 32, 2, PRIx32);
 #if __ARM_NEON_FP16_INTRINSICS
   DUMP_FP16(test_name, float, 16, 4, PRIu16);
@@ -465,8 +477,8 @@
   DUMP(test_name, uint, 16, 8, PRIu16);
   DUMP(test_name, uint, 32, 4, PRIu32);
   DUMP(test_name, uint, 64, 2, PRIu64);
-  DUMP(test_name, poly, 8, 16, PRIu8);
-  DUMP(test_name, poly, 16, 8, PRIu16);
+  DUMP_POLY(test_name, poly, 8, 16, PRIu8);
+  DUMP_POLY(test_name, poly, 16, 8, PRIu16);
   DUMP_FP(test_name, float, 32, 4, PRIx32);
 #if __ARM_NEON_FP16_INTRINSICS
   DUMP_FP16(test_name, float, 16, 8, PRIu16);
@@ -489,8 +501,8 @@
   DUMP(test_name, uint, 16, 4, PRIx16);
   DUMP(test_name, uint, 32, 2, PRIx32);
   DUMP(test_name, uint, 64, 1, PRIx64);
-  DUMP(test_name, poly, 8, 8, PRIx8);
-  DUMP(test_name, poly, 16, 4, PRIx16);
+  DUMP_POLY(test_name, poly, 8, 8, PRIx8);
+  DUMP_POLY(test_name, poly, 16, 4, PRIx16);
   DUMP_FP(test_name, float, 32, 2, PRIx32);
 #if __ARM_NEON_FP16_INTRINSICS
   DUMP_FP16(test_name, float, 16, 4, PRIx16);
@@ -504,8 +516,8 @@
   DUMP(test_name, uint, 16, 8, PRIx16);
   DUMP(test_name, uint, 32, 4, PRIx32);
   DUMP(test_name, uint, 64, 2, PRIx64);
-  DUMP(test_name, poly, 8, 16, PRIx8);
-  DUMP(test_name, poly, 16, 8, PRIx16);
+  DUMP_POLY(test_name, poly, 8, 16, PRIx8);
+  DUMP_POLY(test_name, poly, 16, 8, PRIx16);
   DUMP_FP(test_name, float, 32, 4, PRIx32);
 #if __ARM_NEON_FP16_INTRINSICS
   DUMP_FP16(test_name, float, 16, 8, PRIx16);