Recognize signed saturation in single clipping.

Rationale:
More saturation is better!

Bug: b/74026074

Test: test-art-host,target
Change-Id: Ib99e8965f26d96d956bcd3dbc7eb17b6c0050a24
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index e1fb7ac..758aca2 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -358,40 +358,92 @@
   return instruction;
 }
 
-// Accept various saturated addition forms.
-static bool IsSaturatedAdd(DataType::Type type, int64_t lo, int64_t hi, bool is_unsigned) {
-  //     MIN(r + s, 255)        => SAT_ADD_unsigned
-  // MAX(MIN(r + s, 127), -128) => SAT_ADD_signed etc.
+// Set value range for type (or fail).
+static bool CanSetRange(DataType::Type type,
+                        /*out*/ int64_t* uhi,
+                        /*out*/ int64_t* slo,
+                        /*out*/ int64_t* shi) {
   if (DataType::Size(type) == 1) {
-    return is_unsigned
-        ? (lo <= 0 && hi == std::numeric_limits<uint8_t>::max())
-        : (lo == std::numeric_limits<int8_t>::min() &&
-           hi == std::numeric_limits<int8_t>::max());
+    *uhi = std::numeric_limits<uint8_t>::max();
+    *slo = std::numeric_limits<int8_t>::min();
+    *shi = std::numeric_limits<int8_t>::max();
+    return true;
   } else if (DataType::Size(type) == 2) {
-    return is_unsigned
-        ? (lo <= 0 && hi == std::numeric_limits<uint16_t>::max())
-        : (lo == std::numeric_limits<int16_t>::min() &&
-           hi == std::numeric_limits<int16_t>::max());
+    *uhi = std::numeric_limits<uint16_t>::max();
+    *slo = std::numeric_limits<int16_t>::min();
+    *shi = std::numeric_limits<int16_t>::max();
+    return true;
   }
   return false;
 }
 
-// Accept various saturated subtraction forms.
-static bool IsSaturatedSub(DataType::Type type, int64_t lo, int64_t hi, bool is_unsigned) {
-  //     MAX(r - s, 0)          => SAT_SUB_unsigned
-  // MIN(MAX(r - s, -128), 127) => SAT_ADD_signed etc.
-  if (DataType::Size(type) == 1) {
-    return is_unsigned
-        ? (lo == 0 && hi >= std::numeric_limits<uint8_t>::max())
-        : (lo == std::numeric_limits<int8_t>::min() &&
-           hi == std::numeric_limits<int8_t>::max());
-  } else if (DataType::Size(type) == 2) {
-    return is_unsigned
-        ? (lo == 0 && hi >= std::numeric_limits<uint16_t>::min())
-        : (lo == std::numeric_limits<int16_t>::min() &&
-           hi == std::numeric_limits<int16_t>::max());
+// Accept various saturated addition forms.
+static bool IsSaturatedAdd(HInstruction* clippee,
+                           DataType::Type type,
+                           int64_t lo,
+                           int64_t hi,
+                           bool is_unsigned) {
+  int64_t ulo = 0, uhi = 0, slo = 0, shi = 0;
+  if (!CanSetRange(type, &uhi, &slo, &shi)) {
+    return false;
   }
-  return false;
+  // Tighten the range for signed single clipping on constant.
+  if (!is_unsigned) {
+    int64_t c = 0;
+    HInstruction* notused = nullptr;
+    if (IsAddConst(clippee, &notused, &c)) {
+      // For c in proper range and narrower operand r:
+      //    MIN(r + c,  127) c > 0
+      // or MAX(r + c, -128) c < 0 (and possibly redundant bound).
+      if (0 < c && c <= shi && hi == shi) {
+        if (lo <= (slo + c)) {
+          return true;
+        }
+      } else if (slo <= c && c < 0 && lo == slo) {
+        if (hi >= (shi + c)) {
+          return true;
+        }
+      }
+    }
+  }
+  // Detect for narrower operands r and s:
+  //     MIN(r + s, 255)        => SAT_ADD_unsigned
+  // MAX(MIN(r + s, 127), -128) => SAT_ADD_signed.
+  return is_unsigned ? (lo <= ulo && hi == uhi) : (lo == slo && hi == shi);
+}
+
+// Accept various saturated subtraction forms.
+static bool IsSaturatedSub(HInstruction* clippee,
+                           DataType::Type type,
+                           int64_t lo,
+                           int64_t hi,
+                           bool is_unsigned) {
+  int64_t ulo = 0, uhi = 0, slo = 0, shi = 0;
+  if (!CanSetRange(type, &uhi, &slo, &shi)) {
+    return false;
+  }
+  // Tighten the range for signed single clipping on constant.
+  if (!is_unsigned) {
+    int64_t c = 0;
+    if (IsInt64AndGet(clippee->InputAt(0), /*out*/ &c)) {
+      // For c in proper range and narrower operand r:
+      //    MIN(c - r,  127) c > 0
+      // or MAX(c - r, -128) c < 0 (and possibly redundant bound).
+      if (0 < c && c <= shi && hi == shi) {
+        if (lo <= (c - shi)) {
+          return true;
+        }
+      } else if (slo <= c && c < 0 && lo == slo) {
+        if (hi >= (c - slo)) {
+          return true;
+        }
+      }
+    }
+  }
+  // Detect for narrower operands r and s:
+  //     MAX(r - s, 0)          => SAT_SUB_unsigned
+  // MIN(MAX(r - s, -128), 127) => SAT_ADD_signed.
+  return is_unsigned ? (lo == ulo && hi >= uhi) : (lo == slo && hi == shi);
 }
 
 // Detect reductions of the following forms,
@@ -1909,8 +1961,8 @@
   HInstruction* s = nullptr;
   bool is_unsigned = false;
   if (IsNarrowerOperands(clippee->InputAt(0), clippee->InputAt(1), type, &r, &s, &is_unsigned) &&
-      (is_add ? IsSaturatedAdd(type, lo, hi, is_unsigned)
-              : IsSaturatedSub(type, lo, hi, is_unsigned))) {
+      (is_add ? IsSaturatedAdd(clippee, type, lo, hi, is_unsigned)
+              : IsSaturatedSub(clippee, type, lo, hi, is_unsigned))) {
     DCHECK(r != nullptr);
     DCHECK(s != nullptr);
   } else {
diff --git a/test/678-checker-simd-saturation/src/Main.java b/test/678-checker-simd-saturation/src/Main.java
index 33a6f5e..d123cc2 100644
--- a/test/678-checker-simd-saturation/src/Main.java
+++ b/test/678-checker-simd-saturation/src/Main.java
@@ -200,6 +200,110 @@
   }
 
   //
+  // Single clipping signed 8-bit saturation.
+  //
+
+  /// CHECK-START-{ARM,ARM64}: void Main.satAddPConstSByte(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar                   loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>> outer_loop:none
+  public static void satAddPConstSByte(byte[] a, byte[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      b[i] = (byte) Math.min(a[i] + 15, 127);
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.satAddNConstSByte(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar                   loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>> outer_loop:none
+  public static void satAddNConstSByte(byte[] a, byte[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      b[i] = (byte) Math.max(a[i] - 15, -128);
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.satSubPConstSByte(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar                   loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>> outer_loop:none
+  public static void satSubPConstSByte(byte[] a, byte[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      b[i] = (byte) Math.min(15 - a[i], 127);
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.satSubNConstSByte(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar                   loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>> outer_loop:none
+  public static void satSubNConstSByte(byte[] a, byte[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      b[i] = (byte) Math.max(-15 - a[i], -128);
+    }
+  }
+
+  //
+  // Single clipping signed 16-bit saturation.
+  //
+
+  /// CHECK-START-{ARM,ARM64}: void Main.satAddPConstSShort(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar                   loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>> outer_loop:none
+  public static void satAddPConstSShort(short[] a, short[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      b[i] = (short) Math.min(a[i] + 15, 32767);
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.satAddNConstSShort(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar                   loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>> outer_loop:none
+  public static void satAddNConstSShort(short[] a, short[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      b[i] = (short) Math.max(a[i] - 15, -32768);
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.satSubPConstSShort(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar                   loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>> outer_loop:none
+  public static void satSubPConstSShort(short[] a, short[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      b[i] = (short) Math.min(15 - a[i], 32767);
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.satSubNConstSShort(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar                   loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>> outer_loop:none
+  public static void satSubNConstSShort(short[] a, short[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      b[i] = (short) Math.max(-15 - a[i], -32768);
+    }
+  }
+
+  //
   // Alternatives.
   //
 
@@ -257,6 +361,87 @@
     }
   }
 
+  /// CHECK-START-{ARM,ARM64}: void Main.usatAlt1(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>>      outer_loop:none
+  public static void usatAlt1(short[] a, short[] b, short[] c) {
+    int n = Math.min(a.length, Math.min(b.length, c.length));
+    for (int i = 0; i < n; i++) {
+      int t = (0xffff & a[i]) + (0xffff & b[i]);
+      c[i] = (short) (t <= 65535 ? t : 65535);
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.usatAlt2(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>>      outer_loop:none
+  public static void usatAlt2(short[] a, short[] b, short[] c) {
+    int n = Math.min(a.length, Math.min(b.length, c.length));
+    for (int i = 0; i < n; i++) {
+      int t = (a[i] & 0xffff) + (b[i] & 0xffff);
+      c[i] = (short) (t < 65535 ? t : 65535);
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.usatAlt3(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>>      outer_loop:none
+  public static void usatAlt3(short[] a, short[] b, short[] c) {
+    int n = Math.min(a.length, Math.min(b.length, c.length));
+    for (int i = 0; i < n; i++) {
+      int x = (a[i] & 0xffff);
+      int y = (b[i] & 0xffff);
+      int t = y + x ;
+      if (t >= 65535) t = 65535;
+      c[i] = (short) t;
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.usatAlt4(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>>      outer_loop:none
+  public static void usatAlt4(short[] a, short[] b, short[] c) {
+    int n = Math.min(a.length, Math.min(b.length, c.length));
+    for (int i = 0; i < n; i++) {
+      int x = (a[i] & 0xffff);
+      int y = (b[i] & 0xffff);
+      int t = y + x ;
+      if (t > 65535) t = 65535;
+      c[i] = (short) t;
+    }
+  }
+
+  /// CHECK-START-{ARM,ARM64}: void Main.satRedundantClip(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar                   loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>]      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>  VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Add>>]  loop:<<Loop>> outer_loop:none
+  public static void satRedundantClip(short[] a, short[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      // Max clipping redundant.
+      b[i] = (short) Math.max(Math.min(a[i] + 15, 32767), -32768 + 15);
+    }
+  }
+
+  /// CHECK-START: void Main.satNonRedundantClip(short[], short[]) loop_optimization (after)
+  /// CHECK-NOT: VecSaturationAdd
+  public static void satNonRedundantClip(short[] a, short[] b) {
+    int n = Math.min(a.length, b.length);
+    for (int i = 0; i < n; i++) {
+      // Max clipping not redundant (one off).
+      b[i] = (short) Math.max(Math.min(a[i] + 15, 32767), -32768 + 16);
+    }
+  }
+
   //
   // Test drivers.
   //
@@ -297,6 +482,27 @@
       byte e = (byte) Math.max(Math.min(b1[i] - b2[i], 127), -128);
       expectEquals(e, out[i]);
     }
+    // Single clipping.
+    satAddPConstSByte(b1, out);
+    for (int i = 0; i < m; i++) {
+      byte e = (byte) Math.min(b1[i] + 15, 127);
+      expectEquals(e, out[i]);
+    }
+    satAddNConstSByte(b1, out);
+    for (int i = 0; i < m; i++) {
+      byte e = (byte) Math.max(b1[i] - 15, -128);
+      expectEquals(e, out[i]);
+    }
+    satSubPConstSByte(b1, out);
+    for (int i = 0; i < m; i++) {
+      byte e = (byte) Math.min(15 - b1[i], 127);
+      expectEquals(e, out[i]);
+    }
+    satSubNConstSByte(b1, out);
+    for (int i = 0; i < m; i++) {
+      byte e = (byte) Math.max(-15 - b1[i], -128);
+      expectEquals(e, out[i]);
+    }
   }
 
   private static void test16Bit() {
@@ -357,6 +563,27 @@
       short e = (short) Math.max(Math.min(s1[i] - s2[i], 32767), -32768);
       expectEquals(e, out[i]);
     }
+    // Single clipping.
+    satAddPConstSShort(s1, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.min(s1[i] + 15, 32767);
+      expectEquals(e, out[i]);
+    }
+    satAddNConstSShort(s1, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.max(s1[i] - 15, -32768);
+      expectEquals(e, out[i]);
+    }
+    satSubPConstSShort(s1, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.min(15 - s1[i], 32767);
+      expectEquals(e, out[i]);
+    }
+    satSubNConstSShort(s1, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.max(-15 - s1[i], -32768);
+      expectEquals(e, out[i]);
+    }
     // Alternatives.
     satAlt1(s1, s2, out);
     for (int i = 0; i < m; i++) {
@@ -373,6 +600,36 @@
       short e = (short) Math.max(Math.min(s1[i] + s2[i], 32767), -32768);
       expectEquals(e, out[i]);
     }
+    usatAlt1(s1, s2, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.min((s1[i] & 0xffff) + (s2[i] & 0xffff), 65535);
+      expectEquals(e, out[i]);
+    }
+    usatAlt2(s1, s2, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.min((s1[i] & 0xffff) + (s2[i] & 0xffff), 65535);
+      expectEquals(e, out[i]);
+    }
+    usatAlt3(s1, s2, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.min((s1[i] & 0xffff) + (s2[i] & 0xffff), 65535);
+      expectEquals(e, out[i]);
+    }
+    usatAlt4(s1, s2, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.min((s1[i] & 0xffff) + (s2[i] & 0xffff), 65535);
+      expectEquals(e, out[i]);
+    }
+    satRedundantClip(s1, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.min(s1[i] + 15, 32767);
+      expectEquals(e, out[i]);
+    }
+    satNonRedundantClip(s1, out);
+    for (int i = 0; i < m; i++) {
+      short e = (short) Math.max(Math.min(s1[i] + 15, 32767), -32752);
+      expectEquals(e, out[i]);
+    }
   }
 
   public static void main(String[] args) {