8265325: Optimize StubRoutines::dpow() for Math.pow(x, 0.5)

Reviewed-by: kvn, neliasso
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp
index 61f1865..af8c85b 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp
@@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2016, Intel Corporation.
+* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
 * Intel Math Library (LIBM) Source Code
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -765,6 +766,16 @@
     0x00000000UL, 0x40000000UL
 };
 
+ATTRIBUTE_ALIGNED(8) juint _DOUBLE0[] =
+{
+    0x00000000UL, 0x00000000UL
+};
+
+ATTRIBUTE_ALIGNED(8) juint _DOUBLE0DOT5[] =
+{
+    0x00000000UL, 0x3fe00000UL
+};
+
 //registers,
 // input: xmm0, xmm1
 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
@@ -789,6 +800,7 @@
   Label L_2TAG_PACKET_52_0_2, L_2TAG_PACKET_53_0_2, L_2TAG_PACKET_54_0_2, L_2TAG_PACKET_55_0_2;
   Label L_2TAG_PACKET_56_0_2;
   Label B1_2, B1_3, B1_5, start;
+  Label L_POW;
 
   assert_different_registers(tmp1, tmp2, eax, ecx, edx);
   jmp(start);
@@ -804,6 +816,8 @@
   address HALFMASK = (address)_HALFMASK;
   address log2 = (address)_log2_pow;
   address DOUBLE2 = (address)_DOUBLE2;
+  address DOUBLE0 = (address)_DOUBLE0;
+  address DOUBLE0DOT5 = (address)_DOUBLE0DOT5;
 
 
   bind(start);
@@ -818,7 +832,17 @@
   mulsd(xmm0, xmm0);
   jmp(B1_5);
 
+  // Special case: pow(x, 0.5) => sqrt(x)
   bind(B1_2);
+  cmp64(tmp1, ExternalAddress(DOUBLE0DOT5));
+  jccb(Assembler::notEqual, L_POW); // For pow(x, y), check whether y == 0.5
+  movdq(tmp2, xmm0);
+  cmp64(tmp2, ExternalAddress(DOUBLE0));
+  jccb(Assembler::less, L_POW); // pow(x, 0.5) => sqrt(x) only for x >= 0.0 or x is +inf/NaN
+  sqrtsd(xmm0, xmm0);
+  jmp(B1_5);
+
+  bind(L_POW);
   pextrw(eax, xmm0, 3);
   xorpd(xmm2, xmm2);
   mov64(tmp2, 0x3ff0000000000000);
diff --git a/test/hotspot/jtreg/compiler/intrinsics/math/TestPow0Dot5Opt.java b/test/hotspot/jtreg/compiler/intrinsics/math/TestPow0Dot5Opt.java
new file mode 100644
index 0000000..9ba042e
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/intrinsics/math/TestPow0Dot5Opt.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8265325
+ * @summary test the optimization of pow(x, 0.5)
+ * @requires os.arch=="amd64" | os.arch=="x86_64"
+ * @run main/othervm TestPow0Dot5Opt
+ * @run main/othervm -Xint TestPow0Dot5Opt
+ * @run main/othervm -Xbatch -XX:TieredStopAtLevel=1 TestPow0Dot5Opt
+ * @run main/othervm -Xbatch -XX:-TieredCompilation  TestPow0Dot5Opt
+ */
+
+public class TestPow0Dot5Opt {
+
+  static void test(double a) throws Exception {
+    // pow(x, 0.5) isn't replaced with sqrt(x) for x < 0.0
+    if (a < 0.0) return;
+
+    double r1 = Math.sqrt(a);
+    double r2 = Math.pow(a, 0.5);
+    if (r1 != r2) {
+      throw new RuntimeException("pow(" + a + ", 0.5), expected: " + r1 + ", actual: " + r2);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    for (int i = 0; i < 10; i++) {
+      for (int j = 1; j < 100000; j++) {
+        test(j * 1.0);
+        test(1.0 / j);
+      }
+    }
+
+    test(0.0);
+
+    // Special case: pow(+0.0, 0.5) = 0.0
+    double r = Math.pow(+0.0, 0.5);
+    if (Double.doubleToRawLongBits(r) != Double.doubleToRawLongBits(0.0)) {
+      throw new RuntimeException("pow(+0.0, 0.5), expected: 0.0, actual: " + r);
+    }
+
+    // Special case: pow(-0.0, 0.5) = 0.0
+    r = Math.pow(-0.0, 0.5);
+    if (Double.doubleToRawLongBits(r) != Double.doubleToRawLongBits(0.0)) {
+      throw new RuntimeException("pow(-0.0, 0.5), expected: 0.0, actual: " + r);
+    }
+
+    // Special case: pow(Double.POSITIVE_INFINITY, 0.5) = Infinity
+    r = Math.pow(Double.POSITIVE_INFINITY, 0.5);
+    if (!(r > 0 && Double.isInfinite(r))) {
+      throw new RuntimeException("pow(+Infinity, 0.5), expected: Infinity, actual: " + r);
+    }
+
+    // Special case: pow(Double.NEGATIVE_INFINITY, 0.5) = Infinity
+    r = Math.pow(Double.NEGATIVE_INFINITY, 0.5);
+    if (!(r > 0 && Double.isInfinite(r))) {
+      throw new RuntimeException("pow(-Infinity, 0.5), expected: Infinity, actual: " + r);
+    }
+
+    // Special case: pow(Double.NaN, 0.5) = NaN
+    r = Math.pow(Double.NaN, 0.5);
+    if (!Double.isNaN(r)) {
+      throw new RuntimeException("pow(NaN, 0.5), expected: NaN, actual: " + r);
+    }
+  }
+
+}
diff --git a/test/micro/org/openjdk/bench/java/lang/MathBench.java b/test/micro/org/openjdk/bench/java/lang/MathBench.java
index b010fcd..27d8033 100644
--- a/test/micro/org/openjdk/bench/java/lang/MathBench.java
+++ b/test/micro/org/openjdk/bench/java/lang/MathBench.java
@@ -60,7 +60,7 @@
     public int int1 = 1, int2 = 2, int42 = 42, int5 = 5;
     public long long1 = 1L, long2 = 2L, long747 = 747L, long13 = 13L;
     public float float1 = 1.0f, float2 = 2.0f, floatNegative99 = -99.0f, float7 = 7.0f, eFloat = 2.718f;
-    public double double1 = 1.0d, double2 = 2.0d, double81 = 81.0d, doubleNegative12 = -12.0d, double4Dot1 = 4.1d;
+    public double double1 = 1.0d, double2 = 2.0d, double81 = 81.0d, doubleNegative12 = -12.0d, double4Dot1 = 4.1d, double0Dot5 = 0.5d;
 
     @Setup
     public void setupValues() {
@@ -409,6 +409,38 @@
     }
 
     @Benchmark
+    public double  powDoubleLoop() {
+        double sum = 0.0;
+        for (int i = 0; i < 1000; i++) {
+            for (int j = 0; j < 1000; j++) {
+                sum += i + Math.pow(j * 1.0, i * 1.0);
+            }
+        }
+        return sum;
+    }
+
+    @Benchmark
+    public double  powDouble0Dot5() {
+        return  Math.pow(double4Dot1, double0Dot5);
+    }
+
+    @Benchmark
+    public double  powDouble0Dot5Const() {
+        return  Math.pow(double4Dot1, 0.5);
+    }
+
+    @Benchmark
+    public double  powDouble0Dot5Loop() {
+        double sum = 0.0;
+        for (int i = 0; i < 1000; i++) {
+            for (int j = 0; j < 1000; j++) {
+                sum += i + Math.pow(j * 1.0, 0.5);
+            }
+        }
+        return sum;
+    }
+
+    @Benchmark
     public double  random() {
         return  Math.random();
     }