[builtins] Rounding mode support for addxf3/subxf3

Implement rounding mode support for addxf3/subxf3.

On architectures that implemented the support, this will access the
corresponding floating point environment register to apply the correct
rounding. For other architectures, it will keep the current behaviour
and use IEEE-754 default rounding mode (to nearest, ties to even).

ARM32/AArch64 support implemented in this change. i386 and AMD64 will
be added in a follow up change.

Change-Id: I422d0840af94285aabdceddc1c20dc14bac7a8ab
diff --git a/lib/builtins/CMakeLists.txt b/lib/builtins/CMakeLists.txt
index 168f3e9..a58bd37 100644
--- a/lib/builtins/CMakeLists.txt
+++ b/lib/builtins/CMakeLists.txt
@@ -94,6 +94,7 @@
   floatunsisf.c
   floatuntidf.c
   floatuntisf.c
+  fp_mode.c
   int_util.c
   lshrdi3.c
   lshrti3.c
@@ -290,6 +291,7 @@
 set(i686_SOURCES ${i686_SOURCES} ${x86_ARCH_SOURCES})
 
 set(arm_SOURCES
+  arm/fp_mode.c
   arm/bswapdi2.S
   arm/bswapsi2.S
   arm/clzdi2.S
@@ -441,7 +443,8 @@
 
 set(aarch64_SOURCES
   ${GENERIC_TF_SOURCES}
-  ${GENERIC_SOURCES})
+  ${GENERIC_SOURCES}
+  aarch64/fp_mode.c)
 
 if (MINGW)
   set(aarch64_SOURCES
diff --git a/lib/builtins/aarch64/fp_mode.c b/lib/builtins/aarch64/fp_mode.c
new file mode 100644
index 0000000..aa81fbc
--- /dev/null
+++ b/lib/builtins/aarch64/fp_mode.c
@@ -0,0 +1,53 @@
+//===----- lib/aarch64/fp_mode.c - Floaing-point mode utilities ---*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+#include "../fp_mode.h"
+
+#define AARCH64_TONEAREST  0x0
+#define AARCH64_UPWARD     0x1
+#define AARCH64_DOWNWARD   0x2
+#define AARCH64_TOWARDZERO 0x3
+#define AARCH64_RMODE_MASK (AARCH64_TONEAREST | AARCH64_UPWARD | \
+                            AARCH64_DOWNWARD | AARCH64_TOWARDZERO)
+#define AARCH64_RMODE_SHIFT 22
+
+#define AARCH64_INEXACT     0x10
+
+FE_ROUND_MODE __fe_getround() {
+#ifdef __ARM_FP
+  uint64_t fpcr;
+  __asm__ __volatile__("mrs  %0, fpcr" : "=r" (fpcr));
+  fpcr = fpcr >> AARCH64_RMODE_SHIFT & AARCH64_RMODE_MASK;
+  switch (fpcr) {
+    case AARCH64_UPWARD:
+      return FE_UPWARD;
+    case AARCH64_DOWNWARD:
+      return FE_DOWNWARD;
+    case AARCH64_TOWARDZERO:
+      return FE_TOWARDZERO;
+    case AARCH64_TONEAREST:
+    default:
+      return FE_TONEAREST;
+  }
+#else
+  return FE_TONEAREST;
+#endif
+}
+
+int __fe_raise_inexact() {
+#ifdef __ARM_FP
+  uint64_t fpsr;
+  __asm__ __volatile__("mrs  %0, fpsr" : "=r" (fpsr));
+  __asm__ __volatile__("msr  fpsr, %0" : : "ri" (fpsr | AARCH64_INEXACT));
+  return 0;
+#else
+  return 0;
+#endif
+}
diff --git a/lib/builtins/adddf3.c b/lib/builtins/adddf3.c
index 9a39013..f616192 100644
--- a/lib/builtins/adddf3.c
+++ b/lib/builtins/adddf3.c
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements double-precision soft-float addition with the IEEE-754
-// default rounding (to nearest, ties to even).
+// This file implements double-precision soft-float addition.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/addsf3.c b/lib/builtins/addsf3.c
index c5c1a41..af92223 100644
--- a/lib/builtins/addsf3.c
+++ b/lib/builtins/addsf3.c
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements single-precision soft-float addition with the IEEE-754
-// default rounding (to nearest, ties to even).
+// This file implements single-precision soft-float addition.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/addtf3.c b/lib/builtins/addtf3.c
index e4bbe02..8e21da6 100644
--- a/lib/builtins/addtf3.c
+++ b/lib/builtins/addtf3.c
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements quad-precision soft-float addition with the IEEE-754
-// default rounding (to nearest, ties to even).
+// This file implements quad-precision soft-float addition.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/arm/fp_mode.c b/lib/builtins/arm/fp_mode.c
new file mode 100644
index 0000000..8fecee8
--- /dev/null
+++ b/lib/builtins/arm/fp_mode.c
@@ -0,0 +1,53 @@
+//===----- lib/arm/fp_mode.c - Floaing-point mode utilities -------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+#include "../fp_mode.h"
+
+#define ARM_TONEAREST  0x0
+#define ARM_UPWARD     0x1
+#define ARM_DOWNWARD   0x2
+#define ARM_TOWARDZERO 0x3
+#define ARM_RMODE_MASK (ARM_TONEAREST | ARM_UPWARD | \
+                        ARM_DOWNWARD | ARM_TOWARDZERO)
+#define ARM_RMODE_SHIFT 22
+
+#define ARM_INEXACT     0x1000
+
+FE_ROUND_MODE __fe_getround() {
+#ifdef __ARM_FP
+  uint32_t fpscr;
+  __asm__ __volatile__("vmrs  %0, fpscr" : "=r" (fpscr));
+  fpscr = fpscr >> ARM_RMODE_SHIFT & ARM_RMODE_MASK;
+  switch (fpscr) {
+    case ARM_UPWARD:
+      return FE_UPWARD;
+    case ARM_DOWNWARD:
+      return FE_DOWNWARD;
+    case ARM_TOWARDZERO:
+      return FE_TOWARDZERO;
+    case ARM_TONEAREST:
+    default:
+      return FE_TONEAREST;
+  }
+#else
+  return FE_TONEAREST;
+#endif
+}
+
+int __fe_raise_inexact() {
+#ifdef __ARM_FP
+  uint32_t fpscr;
+  __asm__ __volatile__("vmrs  %0, fpscr" : "=r" (fpscr));
+  __asm__ __volatile__("vmsr  fpscr, %0" : : "ri" (fpscr | ARM_INEXACT));
+  return 0;
+#else
+  return 0;
+#endif
+}
diff --git a/lib/builtins/fp_add_impl.inc b/lib/builtins/fp_add_impl.inc
index b47be1b..5d006dc 100644
--- a/lib/builtins/fp_add_impl.inc
+++ b/lib/builtins/fp_add_impl.inc
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "fp_lib.h"
+#include "fp_mode.h"
 
 static __inline fp_t __addXf3__(fp_t a, fp_t b) {
     rep_t aRep = toRep(a);
@@ -138,7 +139,21 @@
 
     // Final rounding.  The result may overflow to infinity, but that is the
     // correct result in that case.
-    if (roundGuardSticky > 0x4) result++;
-    if (roundGuardSticky == 0x4) result += result & 1;
+    switch (__fe_getround()){
+    case FE_TONEAREST:
+      if (roundGuardSticky > 0x4) result++;
+      if (roundGuardSticky == 0x4) result += result & 1;
+      break;
+    case FE_DOWNWARD:
+      if (resultSign && roundGuardSticky) result++;
+      break;
+    case FE_UPWARD:
+      if (!resultSign && roundGuardSticky) result++;
+      break;
+    case FE_TOWARDZERO:
+      break;
+    }
+    if (roundGuardSticky)
+      __fe_raise_inexact();
     return fromRep(result);
 }
diff --git a/lib/builtins/fp_mode.c b/lib/builtins/fp_mode.c
new file mode 100644
index 0000000..9b82830
--- /dev/null
+++ b/lib/builtins/fp_mode.c
@@ -0,0 +1,24 @@
+//===----- lib/fp_mode.c - Floaing-point environment mode utilities --C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a default implementaion of fp_mode.h for architectures
+// that does not support or does not have an implementation of floating point
+// environment mode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_mode.h"
+
+// IEEE-754 default rounding (to nearest, ties to even).
+FE_ROUND_MODE __fe_getround() {
+  return FE_TONEAREST;
+}
+
+int __fe_raise_inexact() {
+  return 0;
+}
diff --git a/lib/builtins/fp_mode.h b/lib/builtins/fp_mode.h
new file mode 100644
index 0000000..51bec04
--- /dev/null
+++ b/lib/builtins/fp_mode.h
@@ -0,0 +1,29 @@
+//===----- lib/fp_mode.h - Floaing-point environment mode utilities --C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is not part of the interface of this library.
+//
+// This file defines an interface for accessing hardware floating point
+// environment mode.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_MODE
+#define FP_MODE
+
+typedef enum {
+  FE_TONEAREST,
+  FE_DOWNWARD,
+  FE_UPWARD,
+  FE_TOWARDZERO
+} FE_ROUND_MODE;
+
+FE_ROUND_MODE __fe_getround();
+int __fe_raise_inexact();
+
+#endif // FP_MODE_H
diff --git a/lib/builtins/subdf3.c b/lib/builtins/subdf3.c
index a892fa6..72b3b1a 100644
--- a/lib/builtins/subdf3.c
+++ b/lib/builtins/subdf3.c
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements double-precision soft-float subtraction with the
-// IEEE-754 default rounding (to nearest, ties to even).
+// This file implements double-precision soft-float subtraction.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/subsf3.c b/lib/builtins/subsf3.c
index 4b27861..9974f21 100644
--- a/lib/builtins/subsf3.c
+++ b/lib/builtins/subsf3.c
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements single-precision soft-float subtraction with the
-// IEEE-754 default rounding (to nearest, ties to even).
+// This file implements single-precision soft-float subtraction.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/subtf3.c b/lib/builtins/subtf3.c
index 609b816..22871cb 100644
--- a/lib/builtins/subtf3.c
+++ b/lib/builtins/subtf3.c
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements quad-precision soft-float subtraction with the
-// IEEE-754 default rounding (to nearest, ties to even).
+// This file implements quad-precision soft-float subtraction.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/test/builtins/Unit/addtf3_test.c b/test/builtins/Unit/addtf3_test.c
index 57a4729..afbca12 100644
--- a/test/builtins/Unit/addtf3_test.c
+++ b/test/builtins/Unit/addtf3_test.c
@@ -12,11 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "int_lib.h"
+#include <fenv.h>
 #include <stdio.h>
 
 #if __LDBL_MANT_DIG__ == 113
 
+#include "int_lib.h"
 #include "fp_test.h"
 
 // Returns: a + b
@@ -75,6 +76,26 @@
                      UINT64_C(0x61e58dd6c51eb77c)))
         return 1;
 
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP)
+    // Rounding mode tests on supported architectures
+    long double m = 1234.0L, n = 0.01L;
+    fesetround(FE_UPWARD);
+    if (__addtf3(m, n) != 1235.0L)
+        return 1;
+
+    fesetround(FE_DOWNWARD);
+    if (__addtf3(m, n) != 1234.0L)
+        return 1;
+
+    fesetround(FE_TOWARDZERO);
+    if (__addtf3(m, n) != 1234.0L)
+        return 1;
+
+    fesetround(FE_TONEAREST);
+    if (__addtf3(m, n) != 1234.0L)
+        return 1;
+#endif
+
 #else
     printf("skipped\n");
 
diff --git a/test/builtins/Unit/subtf3_test.c b/test/builtins/Unit/subtf3_test.c
index 771242b..96afd57 100644
--- a/test/builtins/Unit/subtf3_test.c
+++ b/test/builtins/Unit/subtf3_test.c
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <fenv.h>
 #include <stdio.h>
 
 #if __LDBL_MANT_DIG__ == 113
@@ -68,6 +69,26 @@
                      UINT64_C(0xa44a7bca780a166c)))
         return 1;
 
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP)
+    // Rounding mode tests on supported architectures
+    long double m = 1234.0L, n = 0.01L;
+    fesetround(FE_UPWARD);
+    if (__subtf3(m, n) != 1234.0L)
+        return 1;
+
+    fesetround(FE_DOWNWARD);
+    if (__subtf3(m, n) != 1233.0L)
+        return 1;
+
+    fesetround(FE_TOWARDZERO);
+    if (__subtf3(m, n) != 1233.0L)
+        return 1;
+
+    fesetround(FE_TONEAREST);
+    if (__subtf3(m, n) != 1234.0L)
+        return 1;
+#endif
+
 #else
     printf("skipped\n");