8178800: compiler/c2/PolynomialRoot.java fails on Xeon Phi linux host with UseAVX=3
Summary: upper register bank support added for novl machines that emit EVEX
Reviewed-by: kvn, thartmann
diff --git a/src/cpu/x86/vm/macroAssembler_x86.cpp b/src/cpu/x86/vm/macroAssembler_x86.cpp
index f2c8393..6b7f947 100644
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp
@@ -4134,28 +4134,33 @@
if ((dst_enc < 16) && (nds_enc < 16)) {
vandps(dst, nds, negate_field, vector_len);
} else if ((src_enc < 16) && (dst_enc < 16)) {
- movss(src, nds);
+ evmovdqul(src, nds, Assembler::AVX_512bit);
vandps(dst, src, negate_field, vector_len);
} else if (src_enc < 16) {
- movss(src, nds);
+ evmovdqul(src, nds, Assembler::AVX_512bit);
vandps(src, src, negate_field, vector_len);
- movss(dst, src);
+ evmovdqul(dst, src, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
- movdqu(src, xmm0);
- movss(xmm0, nds);
+ evmovdqul(src, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
vandps(dst, xmm0, negate_field, vector_len);
- movdqu(xmm0, src);
- } else if (nds_enc < 16) {
- movdqu(src, xmm0);
- vandps(xmm0, nds, negate_field, vector_len);
- movss(dst, xmm0);
- movdqu(xmm0, src);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
} else {
- movdqu(src, xmm0);
- movss(xmm0, nds);
- vandps(xmm0, xmm0, negate_field, vector_len);
- movss(dst, xmm0);
- movdqu(xmm0, src);
+ if (src_enc != dst_enc) {
+ evmovdqul(src, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ vandps(xmm0, xmm0, negate_field, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ vandps(xmm0, xmm0, negate_field, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
}
}
@@ -4166,28 +4171,33 @@
if ((dst_enc < 16) && (nds_enc < 16)) {
vandpd(dst, nds, negate_field, vector_len);
} else if ((src_enc < 16) && (dst_enc < 16)) {
- movsd(src, nds);
+ evmovdqul(src, nds, Assembler::AVX_512bit);
vandpd(dst, src, negate_field, vector_len);
} else if (src_enc < 16) {
- movsd(src, nds);
+ evmovdqul(src, nds, Assembler::AVX_512bit);
vandpd(src, src, negate_field, vector_len);
- movsd(dst, src);
+ evmovdqul(dst, src, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
- movdqu(src, xmm0);
- movsd(xmm0, nds);
+ evmovdqul(src, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
vandpd(dst, xmm0, negate_field, vector_len);
- movdqu(xmm0, src);
- } else if (nds_enc < 16) {
- movdqu(src, xmm0);
- vandpd(xmm0, nds, negate_field, vector_len);
- movsd(dst, xmm0);
- movdqu(xmm0, src);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
} else {
- movdqu(src, xmm0);
- movsd(xmm0, nds);
- vandpd(xmm0, xmm0, negate_field, vector_len);
- movsd(dst, xmm0);
- movdqu(xmm0, src);
+ if (src_enc != dst_enc) {
+ evmovdqul(src, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ vandpd(xmm0, xmm0, negate_field, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ vandpd(xmm0, xmm0, negate_field, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
}
}
@@ -4934,6 +4944,24 @@
}
}
+void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) {
+ if (VM_Version::supports_avx512vl()) {
+ Assembler::pshufd(dst, src, mode);
+ } else {
+ int dst_enc = dst->encoding();
+ if (dst_enc < 16) {
+ Assembler::pshufd(dst, src, mode);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ Assembler::pshufd(xmm0, src, mode);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+}
+
// This instruction exists within macros, ergo we cannot control its input
// when emitted through those patterns.
void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
diff --git a/src/cpu/x86/vm/macroAssembler_x86.hpp b/src/cpu/x86/vm/macroAssembler_x86.hpp
index 39b2e1a..2c2b17c 100644
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp
@@ -1232,6 +1232,9 @@
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
+ void pshufd(XMMRegister dst, Address src, int mode);
+ void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); }
+
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }