| // |
| // Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. |
| // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| // |
| // This code is free software; you can redistribute it and/or modify it |
| // under the terms of the GNU General Public License version 2 only, as |
| // published by the Free Software Foundation. |
| // |
| // This code is distributed in the hope that it will be useful, but WITHOUT |
| // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| // version 2 for more details (a copy is included in the LICENSE file that |
| // accompanied this code). |
| // |
| // You should have received a copy of the GNU General Public License version |
| // 2 along with this work; if not, write to the Free Software Foundation, |
| // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| // |
| // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| // or visit www.oracle.com if you need additional information or have any |
| // questions. |
| // |
| // |
| |
| // X86 Common Architecture Description File |
| |
| //----------REGISTER DEFINITION BLOCK------------------------------------------ |
| // This information is used by the matcher and the register allocator to |
| // describe individual registers and classes of registers within the target |
| // archtecture. |
| |
| register %{ |
| //----------Architecture Description Register Definitions---------------------- |
| // General Registers |
| // "reg_def" name ( register save type, C convention save type, |
| // ideal register type, encoding ); |
| // Register Save Types: |
| // |
| // NS = No-Save: The register allocator assumes that these registers |
| // can be used without saving upon entry to the method, & |
| // that they do not need to be saved at call sites. |
| // |
| // SOC = Save-On-Call: The register allocator assumes that these registers |
| // can be used without saving upon entry to the method, |
| // but that they must be saved at call sites. |
| // |
| // SOE = Save-On-Entry: The register allocator assumes that these registers |
| // must be saved before using them upon entry to the |
| // method, but they do not need to be saved at call |
| // sites. |
| // |
| // AS = Always-Save: The register allocator assumes that these registers |
| // must be saved before using them upon entry to the |
| // method, & that they must be saved at call sites. |
| // |
| // Ideal Register Type is used to determine how to save & restore a |
| // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get |
| // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. |
| // |
| // The encoding number is the actual bit-pattern placed into the opcodes. |
| |
| // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. |
| // Word a in each register holds a Float, words ab hold a Double. |
| // The whole registers are used in SSE4.2 version intrinsics, |
| // array copy stubs and superword operations (see UseSSE42Intrinsics, |
| // UseXMMForArrayCopy and UseSuperword flags). |
| // For pre EVEX enabled architectures: |
| // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) |
| // For EVEX enabled architectures: |
| // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). |
| // |
| // Linux ABI: No register preserved across function calls |
| // XMM0-XMM7 might hold parameters |
| // Windows ABI: XMM6-XMM31 preserved across function calls |
| // XMM0-XMM3 might hold parameters |
| |
| reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); |
| reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); |
| reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); |
| reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); |
| reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); |
| reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); |
| reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); |
| reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); |
| reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); |
| reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); |
| reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); |
| reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); |
| reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); |
| reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); |
| reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); |
| reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); |
| |
| reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); |
| reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); |
| reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); |
| reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); |
| reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); |
| reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); |
| reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); |
| reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); |
| reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); |
| reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); |
| reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); |
| reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); |
| reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); |
| reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); |
| reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); |
| reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); |
| |
| reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); |
| reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); |
| reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); |
| reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); |
| reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); |
| reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); |
| reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); |
| reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); |
| reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); |
| reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); |
| reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); |
| reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); |
| reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); |
| reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); |
| reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); |
| reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); |
| |
| reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); |
| reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); |
| reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); |
| reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); |
| reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); |
| reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); |
| reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); |
| reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); |
| reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); |
| reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); |
| reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); |
| reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); |
| reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); |
| reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); |
| reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); |
| reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); |
| |
| reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); |
| reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); |
| reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); |
| reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); |
| reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); |
| reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); |
| reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); |
| reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); |
| reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); |
| reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); |
| reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); |
| reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); |
| reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); |
| reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); |
| reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); |
| reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); |
| |
| reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); |
| reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); |
| reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); |
| reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); |
| reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); |
| reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); |
| reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); |
| reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); |
| reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); |
| reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); |
| reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); |
| reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); |
| reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); |
| reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); |
| reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); |
| reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); |
| |
| reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); |
| reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); |
| reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); |
| reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); |
| reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); |
| reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); |
| reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); |
| reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); |
| reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); |
| reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); |
| reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); |
| reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); |
| reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); |
| reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); |
| reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); |
| reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); |
| |
| reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); |
| reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); |
| reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); |
| reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); |
| reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); |
| reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); |
| reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); |
| reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); |
| reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); |
| reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); |
| reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); |
| reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); |
| reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); |
| reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); |
| reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); |
| reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); |
| |
| #ifdef _LP64 |
| |
| reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); |
| reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); |
| reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); |
| reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); |
| reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); |
| reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); |
| reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); |
| reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); |
| reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); |
| reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); |
| reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); |
| reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); |
| reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); |
| reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); |
| reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); |
| reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); |
| |
| reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); |
| reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); |
| reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); |
| reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); |
| reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); |
| reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); |
| reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); |
| reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); |
| reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); |
| reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); |
| reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); |
| reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); |
| reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); |
| reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); |
| reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); |
| reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); |
| |
| reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); |
| reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); |
| reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); |
| reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); |
| reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); |
| reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); |
| reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); |
| reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); |
| reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); |
| reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); |
| reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); |
| reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); |
| reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); |
| reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); |
| reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); |
| reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); |
| |
| reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); |
| reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); |
| reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); |
| reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); |
| reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); |
| reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); |
| reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); |
| reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); |
| reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); |
| reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); |
| reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); |
| reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); |
| reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); |
| reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); |
| reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); |
| reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); |
| |
| reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); |
| reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); |
| reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); |
| reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); |
| reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); |
| reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); |
| reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); |
| reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); |
| reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); |
| reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); |
| reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); |
| reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); |
| reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); |
| reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); |
| reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); |
| reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); |
| |
| reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); |
| reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); |
| reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); |
| reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); |
| reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); |
| reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); |
| reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); |
| reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); |
| reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); |
| reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); |
| reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); |
| reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); |
| reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); |
| reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); |
| reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); |
| reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); |
| |
| reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); |
| reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); |
| reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); |
| reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); |
| reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); |
| reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); |
| reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); |
| reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); |
| reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); |
| reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); |
| reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); |
| reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); |
| reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); |
| reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); |
| reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); |
| reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); |
| |
| reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); |
| reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); |
| reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); |
| reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); |
| reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); |
| reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); |
| reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); |
| reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); |
| reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); |
| reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); |
| reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); |
| reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); |
| reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); |
| reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); |
| reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); |
| reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); |
| |
| reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); |
| reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); |
| reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); |
| reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); |
| reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); |
| reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); |
| reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); |
| reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); |
| reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); |
| reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); |
| reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); |
| reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); |
| reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); |
| reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); |
| reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); |
| reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); |
| |
| reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); |
| reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); |
| reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); |
| reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); |
| reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); |
| reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); |
| reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); |
| reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); |
| reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); |
| reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); |
| reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); |
| reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); |
| reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); |
| reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); |
| reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); |
| reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); |
| |
| reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); |
| reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); |
| reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); |
| reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); |
| reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); |
| reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); |
| reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); |
| reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); |
| reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); |
| reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); |
| reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); |
| reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); |
| reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); |
| reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); |
| reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); |
| reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); |
| |
| reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); |
| reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); |
| reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); |
| reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); |
| reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); |
| reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); |
| reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); |
| reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); |
| reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); |
| reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); |
| reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); |
| reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); |
| reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); |
| reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); |
| reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); |
| reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); |
| |
| reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); |
| reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); |
| reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); |
| reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); |
| reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); |
| reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); |
| reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); |
| reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); |
| reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); |
| reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); |
| reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); |
| reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); |
| reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); |
| reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); |
| reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); |
| reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); |
| |
| reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); |
| reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); |
| reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); |
| reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); |
| reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); |
| reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); |
| reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); |
| reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); |
| reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); |
| reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); |
| reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); |
| reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); |
| reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); |
| reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); |
| reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); |
| reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); |
| |
| reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); |
| reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); |
| reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); |
| reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); |
| reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); |
| reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); |
| reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); |
| reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); |
| reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); |
| reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); |
| reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); |
| reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); |
| reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); |
| reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); |
| reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); |
| reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); |
| |
| reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); |
| reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); |
| reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); |
| reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); |
| reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); |
| reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); |
| reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); |
| reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); |
| reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); |
| reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); |
| reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); |
| reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); |
| reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); |
| reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); |
| reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); |
| reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); |
| |
| reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); |
| reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); |
| reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); |
| reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); |
| reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); |
| reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); |
| reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); |
| reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); |
| reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); |
| reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); |
| reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); |
| reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); |
| reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); |
| reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); |
| reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); |
| reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); |
| |
| reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); |
| reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); |
| reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); |
| reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); |
| reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); |
| reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); |
| reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); |
| reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); |
| reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); |
| reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); |
| reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); |
| reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); |
| reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); |
| reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); |
| reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); |
| reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); |
| |
| reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); |
| reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); |
| reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); |
| reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); |
| reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); |
| reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); |
| reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); |
| reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); |
| reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); |
| reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); |
| reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); |
| reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); |
| reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); |
| reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); |
| reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); |
| reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); |
| |
| reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); |
| reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); |
| reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); |
| reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); |
| reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); |
| reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); |
| reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); |
| reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); |
| reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); |
| reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); |
| reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); |
| reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); |
| reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); |
| reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); |
| reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); |
| reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); |
| |
| reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); |
| reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); |
| reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); |
| reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); |
| reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); |
| reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); |
| reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); |
| reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); |
| reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); |
| reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); |
| reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); |
| reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); |
| reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); |
| reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); |
| reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); |
| reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); |
| |
| reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); |
| reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); |
| reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); |
| reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); |
| reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); |
| reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); |
| reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); |
| reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); |
| reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); |
| reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); |
| reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); |
| reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); |
| reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); |
| reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); |
| reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); |
| reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); |
| |
| reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); |
| reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); |
| reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); |
| reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); |
| reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); |
| reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); |
| reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); |
| reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); |
| reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); |
| reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); |
| reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); |
| reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); |
| reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); |
| reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); |
| reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); |
| reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); |
| |
| reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); |
| reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); |
| reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); |
| reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); |
| reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); |
| reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); |
| reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); |
| reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); |
| reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); |
| reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); |
| reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); |
| reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); |
| reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); |
| reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); |
| reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); |
| reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); |
| |
| #endif // _LP64 |
| |
| #ifdef _LP64 |
| reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); |
| #else |
| reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); |
| #endif // _LP64 |
| |
| alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, |
| XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, |
| XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, |
| XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, |
| XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, |
| XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, |
| XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, |
| XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, |
| XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, |
| XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, |
| XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, |
| XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, |
| XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, |
| XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, |
| XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p |
| ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, |
| XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, |
| XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, |
| XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, |
| XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, |
| XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, |
| XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, |
| XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, |
| XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, |
| XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, |
| XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, |
| XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, |
| XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, |
| XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, |
| XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, |
| XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p |
| #endif |
| ); |
| |
| // flags allocation class should be last. |
| alloc_class chunk2(RFLAGS); |
| |
| // Singleton class for condition codes |
| reg_class int_flags(RFLAGS); |
| |
| // Class for pre evex float registers |
| reg_class float_reg_legacy(XMM0, |
| XMM1, |
| XMM2, |
| XMM3, |
| XMM4, |
| XMM5, |
| XMM6, |
| XMM7 |
| #ifdef _LP64 |
| ,XMM8, |
| XMM9, |
| XMM10, |
| XMM11, |
| XMM12, |
| XMM13, |
| XMM14, |
| XMM15 |
| #endif |
| ); |
| |
| // Class for evex float registers |
| reg_class float_reg_evex(XMM0, |
| XMM1, |
| XMM2, |
| XMM3, |
| XMM4, |
| XMM5, |
| XMM6, |
| XMM7 |
| #ifdef _LP64 |
| ,XMM8, |
| XMM9, |
| XMM10, |
| XMM11, |
| XMM12, |
| XMM13, |
| XMM14, |
| XMM15, |
| XMM16, |
| XMM17, |
| XMM18, |
| XMM19, |
| XMM20, |
| XMM21, |
| XMM22, |
| XMM23, |
| XMM24, |
| XMM25, |
| XMM26, |
| XMM27, |
| XMM28, |
| XMM29, |
| XMM30, |
| XMM31 |
| #endif |
| ); |
| |
| reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); |
| |
| // Class for pre evex double registers |
| reg_class double_reg_legacy(XMM0, XMM0b, |
| XMM1, XMM1b, |
| XMM2, XMM2b, |
| XMM3, XMM3b, |
| XMM4, XMM4b, |
| XMM5, XMM5b, |
| XMM6, XMM6b, |
| XMM7, XMM7b |
| #ifdef _LP64 |
| ,XMM8, XMM8b, |
| XMM9, XMM9b, |
| XMM10, XMM10b, |
| XMM11, XMM11b, |
| XMM12, XMM12b, |
| XMM13, XMM13b, |
| XMM14, XMM14b, |
| XMM15, XMM15b |
| #endif |
| ); |
| |
| // Class for evex double registers |
| reg_class double_reg_evex(XMM0, XMM0b, |
| XMM1, XMM1b, |
| XMM2, XMM2b, |
| XMM3, XMM3b, |
| XMM4, XMM4b, |
| XMM5, XMM5b, |
| XMM6, XMM6b, |
| XMM7, XMM7b |
| #ifdef _LP64 |
| ,XMM8, XMM8b, |
| XMM9, XMM9b, |
| XMM10, XMM10b, |
| XMM11, XMM11b, |
| XMM12, XMM12b, |
| XMM13, XMM13b, |
| XMM14, XMM14b, |
| XMM15, XMM15b, |
| XMM16, XMM16b, |
| XMM17, XMM17b, |
| XMM18, XMM18b, |
| XMM19, XMM19b, |
| XMM20, XMM20b, |
| XMM21, XMM21b, |
| XMM22, XMM22b, |
| XMM23, XMM23b, |
| XMM24, XMM24b, |
| XMM25, XMM25b, |
| XMM26, XMM26b, |
| XMM27, XMM27b, |
| XMM28, XMM28b, |
| XMM29, XMM29b, |
| XMM30, XMM30b, |
| XMM31, XMM31b |
| #endif |
| ); |
| |
| reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); |
| |
| // Class for pre evex 32bit vector registers |
| reg_class vectors_reg_legacy(XMM0, |
| XMM1, |
| XMM2, |
| XMM3, |
| XMM4, |
| XMM5, |
| XMM6, |
| XMM7 |
| #ifdef _LP64 |
| ,XMM8, |
| XMM9, |
| XMM10, |
| XMM11, |
| XMM12, |
| XMM13, |
| XMM14, |
| XMM15 |
| #endif |
| ); |
| |
| // Class for evex 32bit vector registers |
| reg_class vectors_reg_evex(XMM0, |
| XMM1, |
| XMM2, |
| XMM3, |
| XMM4, |
| XMM5, |
| XMM6, |
| XMM7 |
| #ifdef _LP64 |
| ,XMM8, |
| XMM9, |
| XMM10, |
| XMM11, |
| XMM12, |
| XMM13, |
| XMM14, |
| XMM15, |
| XMM16, |
| XMM17, |
| XMM18, |
| XMM19, |
| XMM20, |
| XMM21, |
| XMM22, |
| XMM23, |
| XMM24, |
| XMM25, |
| XMM26, |
| XMM27, |
| XMM28, |
| XMM29, |
| XMM30, |
| XMM31 |
| #endif |
| ); |
| |
| reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); |
| |
| // Class for all 64bit vector registers |
| reg_class vectord_reg_legacy(XMM0, XMM0b, |
| XMM1, XMM1b, |
| XMM2, XMM2b, |
| XMM3, XMM3b, |
| XMM4, XMM4b, |
| XMM5, XMM5b, |
| XMM6, XMM6b, |
| XMM7, XMM7b |
| #ifdef _LP64 |
| ,XMM8, XMM8b, |
| XMM9, XMM9b, |
| XMM10, XMM10b, |
| XMM11, XMM11b, |
| XMM12, XMM12b, |
| XMM13, XMM13b, |
| XMM14, XMM14b, |
| XMM15, XMM15b |
| #endif |
| ); |
| |
| // Class for all 64bit vector registers |
| reg_class vectord_reg_evex(XMM0, XMM0b, |
| XMM1, XMM1b, |
| XMM2, XMM2b, |
| XMM3, XMM3b, |
| XMM4, XMM4b, |
| XMM5, XMM5b, |
| XMM6, XMM6b, |
| XMM7, XMM7b |
| #ifdef _LP64 |
| ,XMM8, XMM8b, |
| XMM9, XMM9b, |
| XMM10, XMM10b, |
| XMM11, XMM11b, |
| XMM12, XMM12b, |
| XMM13, XMM13b, |
| XMM14, XMM14b, |
| XMM15, XMM15b, |
| XMM16, XMM16b, |
| XMM17, XMM17b, |
| XMM18, XMM18b, |
| XMM19, XMM19b, |
| XMM20, XMM20b, |
| XMM21, XMM21b, |
| XMM22, XMM22b, |
| XMM23, XMM23b, |
| XMM24, XMM24b, |
| XMM25, XMM25b, |
| XMM26, XMM26b, |
| XMM27, XMM27b, |
| XMM28, XMM28b, |
| XMM29, XMM29b, |
| XMM30, XMM30b, |
| XMM31, XMM31b |
| #endif |
| ); |
| |
| reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); |
| |
| // Class for all 128bit vector registers |
| reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, |
| XMM1, XMM1b, XMM1c, XMM1d, |
| XMM2, XMM2b, XMM2c, XMM2d, |
| XMM3, XMM3b, XMM3c, XMM3d, |
| XMM4, XMM4b, XMM4c, XMM4d, |
| XMM5, XMM5b, XMM5c, XMM5d, |
| XMM6, XMM6b, XMM6c, XMM6d, |
| XMM7, XMM7b, XMM7c, XMM7d |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, |
| XMM9, XMM9b, XMM9c, XMM9d, |
| XMM10, XMM10b, XMM10c, XMM10d, |
| XMM11, XMM11b, XMM11c, XMM11d, |
| XMM12, XMM12b, XMM12c, XMM12d, |
| XMM13, XMM13b, XMM13c, XMM13d, |
| XMM14, XMM14b, XMM14c, XMM14d, |
| XMM15, XMM15b, XMM15c, XMM15d |
| #endif |
| ); |
| |
| // Class for all 128bit vector registers |
| reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, |
| XMM1, XMM1b, XMM1c, XMM1d, |
| XMM2, XMM2b, XMM2c, XMM2d, |
| XMM3, XMM3b, XMM3c, XMM3d, |
| XMM4, XMM4b, XMM4c, XMM4d, |
| XMM5, XMM5b, XMM5c, XMM5d, |
| XMM6, XMM6b, XMM6c, XMM6d, |
| XMM7, XMM7b, XMM7c, XMM7d |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, |
| XMM9, XMM9b, XMM9c, XMM9d, |
| XMM10, XMM10b, XMM10c, XMM10d, |
| XMM11, XMM11b, XMM11c, XMM11d, |
| XMM12, XMM12b, XMM12c, XMM12d, |
| XMM13, XMM13b, XMM13c, XMM13d, |
| XMM14, XMM14b, XMM14c, XMM14d, |
| XMM15, XMM15b, XMM15c, XMM15d, |
| XMM16, XMM16b, XMM16c, XMM16d, |
| XMM17, XMM17b, XMM17c, XMM17d, |
| XMM18, XMM18b, XMM18c, XMM18d, |
| XMM19, XMM19b, XMM19c, XMM19d, |
| XMM20, XMM20b, XMM20c, XMM20d, |
| XMM21, XMM21b, XMM21c, XMM21d, |
| XMM22, XMM22b, XMM22c, XMM22d, |
| XMM23, XMM23b, XMM23c, XMM23d, |
| XMM24, XMM24b, XMM24c, XMM24d, |
| XMM25, XMM25b, XMM25c, XMM25d, |
| XMM26, XMM26b, XMM26c, XMM26d, |
| XMM27, XMM27b, XMM27c, XMM27d, |
| XMM28, XMM28b, XMM28c, XMM28d, |
| XMM29, XMM29b, XMM29c, XMM29d, |
| XMM30, XMM30b, XMM30c, XMM30d, |
| XMM31, XMM31b, XMM31c, XMM31d |
| #endif |
| ); |
| |
| reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); |
| |
| // Class for all 256bit vector registers |
| reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, |
| XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, |
| XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, |
| XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, |
| XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, |
| XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, |
| XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, |
| XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, |
| XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, |
| XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, |
| XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, |
| XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, |
| XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, |
| XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, |
| XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h |
| #endif |
| ); |
| |
| // Class for all 256bit vector registers |
| reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, |
| XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, |
| XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, |
| XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, |
| XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, |
| XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, |
| XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, |
| XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, |
| XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, |
| XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, |
| XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, |
| XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, |
| XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, |
| XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, |
| XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, |
| XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, |
| XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, |
| XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, |
| XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, |
| XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, |
| XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, |
| XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, |
| XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, |
| XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, |
| XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, |
| XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, |
| XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, |
| XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, |
| XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, |
| XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, |
| XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h |
| #endif |
| ); |
| |
| reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); |
| |
| // Class for all 512bit vector registers |
| reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, |
| XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, |
| XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, |
| XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, |
| XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, |
| XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, |
| XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, |
| XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, |
| XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, |
| XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, |
| XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, |
| XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, |
| XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, |
| XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, |
| XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p |
| ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, |
| XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, |
| XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, |
| XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, |
| XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, |
| XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, |
| XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, |
| XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, |
| XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, |
| XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, |
| XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, |
| XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, |
| XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, |
| XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, |
| XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, |
| XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p |
| #endif |
| ); |
| |
| // Class for restricted 512bit vector registers |
| reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, |
| XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, |
| XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, |
| XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, |
| XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, |
| XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, |
| XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, |
| XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, |
| XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, |
| XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, |
| XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, |
| XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, |
| XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, |
| XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, |
| XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p |
| #endif |
| ); |
| |
| reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); |
| |
| reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); |
| %} |
| |
| |
| //----------SOURCE BLOCK------------------------------------------------------- |
| // This is a block of C++ code which provides values, functions, and |
| // definitions necessary in the rest of the architecture description |
| |
| source_hpp %{ |
| // Header information of the source block. |
| // Method declarations/definitions which are used outside |
| // the ad-scope can conveniently be defined here. |
| // |
| // To keep related declarations/definitions/uses close together, |
| // we switch between source %{ }% and source_hpp %{ }% freely as needed. |
| |
| #include "runtime/vm_version.hpp" |
| |
| class NativeJump; |
| |
| class CallStubImpl { |
| |
| //-------------------------------------------------------------- |
| //---< Used for optimization in Compile::shorten_branches >--- |
| //-------------------------------------------------------------- |
| |
| public: |
| // Size of call trampoline stub. |
| static uint size_call_trampoline() { |
| return 0; // no call trampolines on this platform |
| } |
| |
| // number of relocations needed by a call trampoline stub |
| static uint reloc_call_trampoline() { |
| return 0; // no call trampolines on this platform |
| } |
| }; |
| |
| class HandlerImpl { |
| |
| public: |
| |
| static int emit_exception_handler(CodeBuffer &cbuf); |
| static int emit_deopt_handler(CodeBuffer& cbuf); |
| |
| static uint size_exception_handler() { |
| // NativeCall instruction size is the same as NativeJump. |
| // exception handler starts out as jump and can be patched to |
| // a call be deoptimization. (4932387) |
| // Note that this value is also credited (in output.cpp) to |
| // the size of the code section. |
| return NativeJump::instruction_size; |
| } |
| |
| #ifdef _LP64 |
| static uint size_deopt_handler() { |
| // three 5 byte instructions plus one move for unreachable address. |
| return 15+3; |
| } |
| #else |
| static uint size_deopt_handler() { |
| // NativeCall instruction size is the same as NativeJump. |
| // exception handler starts out as jump and can be patched to |
| // a call be deoptimization. (4932387) |
| // Note that this value is also credited (in output.cpp) to |
| // the size of the code section. |
| return 5 + NativeJump::instruction_size; // pushl(); jmp; |
| } |
| #endif |
| }; |
| |
| |
| inline uint vector_length(const Node* n) { |
| const TypeVect* vt = n->bottom_type()->is_vect(); |
| return vt->length(); |
| } |
| |
| inline uint vector_length(const MachNode* use, MachOper* opnd) { |
| uint def_idx = use->operand_index(opnd); |
| Node* def = use->in(def_idx); |
| return def->bottom_type()->is_vect()->length(); |
| } |
| |
| inline uint vector_length_in_bytes(const Node* n) { |
| const TypeVect* vt = n->bottom_type()->is_vect(); |
| return vt->length_in_bytes(); |
| } |
| |
| inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { |
| uint def_idx = use->operand_index(opnd); |
| Node* def = use->in(def_idx); |
| return def->bottom_type()->is_vect()->length_in_bytes(); |
| } |
| |
| inline BasicType vector_element_basic_type(const Node *n) { |
| return n->bottom_type()->is_vect()->element_basic_type(); |
| } |
| |
| inline BasicType vector_element_basic_type(const MachNode *use, MachOper* opnd) { |
| uint def_idx = use->operand_index(opnd); |
| Node* def = use->in(def_idx); |
| return def->bottom_type()->is_vect()->element_basic_type(); |
| } |
| |
| inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { |
| switch(bytes) { |
| case 4: // fall-through |
| case 8: // fall-through |
| case 16: return Assembler::AVX_128bit; |
| case 32: return Assembler::AVX_256bit; |
| case 64: return Assembler::AVX_512bit; |
| |
| default: { |
| ShouldNotReachHere(); |
| return Assembler::AVX_NoVec; |
| } |
| } |
| } |
| |
| static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { |
| return vector_length_encoding(vector_length_in_bytes(n)); |
| } |
| |
| static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { |
| uint def_idx = use->operand_index(opnd); |
| Node* def = use->in(def_idx); |
| return vector_length_encoding(def); |
| } |
| |
| class Node::PD { |
| public: |
| enum NodeFlags { |
| Flag_intel_jcc_erratum = Node::_last_flag << 1, |
| _last_flag = Flag_intel_jcc_erratum |
| }; |
| }; |
| |
| %} // end source_hpp |
| |
| source %{ |
| |
| #include "opto/addnode.hpp" |
| #include "c2_intelJccErratum_x86.hpp" |
| |
| void PhaseOutput::pd_perform_mach_node_analysis() { |
| if (VM_Version::has_intel_jcc_erratum()) { |
| int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); |
| _buf_sizes._code += extra_padding; |
| } |
| } |
| |
| int MachNode::pd_alignment_required() const { |
| if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { |
| // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. |
| return IntelJccErratum::largest_jcc_size() + 1; |
| } else { |
| return 1; |
| } |
| } |
| |
| int MachNode::compute_padding(int current_offset) const { |
| if (flags() & Node::PD::Flag_intel_jcc_erratum) { |
| Compile* C = Compile::current(); |
| PhaseOutput* output = C->output(); |
| Block* block = output->block(); |
| int index = output->index(); |
| return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); |
| } else { |
| return 0; |
| } |
| } |
| |
| // Emit exception handler code. |
| // Stuff framesize into a register and call a VM stub routine. |
| int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { |
| |
| // Note that the code buffer's insts_mark is always relative to insts. |
| // That's why we must use the macroassembler to generate a handler. |
| C2_MacroAssembler _masm(&cbuf); |
| address base = __ start_a_stub(size_exception_handler()); |
| if (base == NULL) { |
| ciEnv::current()->record_failure("CodeCache is full"); |
| return 0; // CodeBuffer::expand failed |
| } |
| int offset = __ offset(); |
| __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); |
| assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); |
| __ end_a_stub(); |
| return offset; |
| } |
| |
| // Emit deopt handler code. |
| int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { |
| |
| // Note that the code buffer's insts_mark is always relative to insts. |
| // That's why we must use the macroassembler to generate a handler. |
| C2_MacroAssembler _masm(&cbuf); |
| address base = __ start_a_stub(size_deopt_handler()); |
| if (base == NULL) { |
| ciEnv::current()->record_failure("CodeCache is full"); |
| return 0; // CodeBuffer::expand failed |
| } |
| int offset = __ offset(); |
| |
| #ifdef _LP64 |
| address the_pc = (address) __ pc(); |
| Label next; |
| // push a "the_pc" on the stack without destroying any registers |
| // as they all may be live. |
| |
| // push address of "next" |
| __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 |
| __ bind(next); |
| // adjust it so it matches "the_pc" |
| __ subptr(Address(rsp, 0), __ offset() - offset); |
| #else |
| InternalAddress here(__ pc()); |
| __ pushptr(here.addr()); |
| #endif |
| |
| __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); |
| assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); |
| __ end_a_stub(); |
| return offset; |
| } |
| |
| Assembler::Width widthForType(BasicType bt) { |
| if (bt == T_BYTE) { |
| return Assembler::B; |
| } else if (bt == T_SHORT) { |
| return Assembler::W; |
| } else if (bt == T_INT) { |
| return Assembler::D; |
| } else { |
| assert(bt == T_LONG, "not a long: %s", type2name(bt)); |
| return Assembler::Q; |
| } |
| } |
| |
| //============================================================================= |
| |
| // Float masks come from different places depending on platform. |
| #ifdef _LP64 |
| static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } |
| static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } |
| static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } |
| static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } |
| #else |
| static address float_signmask() { return (address)float_signmask_pool; } |
| static address float_signflip() { return (address)float_signflip_pool; } |
| static address double_signmask() { return (address)double_signmask_pool; } |
| static address double_signflip() { return (address)double_signflip_pool; } |
| #endif |
| static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } |
| static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } |
| static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } |
| static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } |
| static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } |
| static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } |
| static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } |
| static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } |
| static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } |
| static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } |
| static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } |
| static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } |
| |
| //============================================================================= |
| const bool Matcher::match_rule_supported(int opcode) { |
| if (!has_match_rule(opcode)) { |
| return false; // no match rule present |
| } |
| switch (opcode) { |
| case Op_AbsVL: |
| case Op_StoreVectorScatter: |
| if (UseAVX < 3) { |
| return false; |
| } |
| break; |
| case Op_PopCountI: |
| case Op_PopCountL: |
| if (!UsePopCountInstruction) { |
| return false; |
| } |
| break; |
| case Op_PopCountVI: |
| if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) { |
| return false; |
| } |
| break; |
| case Op_MulVI: |
| if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX |
| return false; |
| } |
| break; |
| case Op_MulVL: |
| if (UseSSE < 4) { // only with SSE4_1 or AVX |
| return false; |
| } |
| break; |
| case Op_MulReductionVL: |
| if (VM_Version::supports_avx512dq() == false) { |
| return false; |
| } |
| break; |
| case Op_AddReductionVL: |
| if (UseSSE < 2) { // requires at least SSE2 |
| return false; |
| } |
| break; |
| case Op_AbsVB: |
| case Op_AbsVS: |
| case Op_AbsVI: |
| case Op_AddReductionVI: |
| case Op_AndReductionV: |
| case Op_OrReductionV: |
| case Op_XorReductionV: |
| if (UseSSE < 3) { // requires at least SSSE3 |
| return false; |
| } |
| break; |
| case Op_VectorLoadShuffle: |
| case Op_VectorRearrange: |
| case Op_MulReductionVI: |
| if (UseSSE < 4) { // requires at least SSE4 |
| return false; |
| } |
| break; |
| case Op_SqrtVD: |
| case Op_SqrtVF: |
| case Op_VectorMaskCmp: |
| case Op_VectorCastB2X: |
| case Op_VectorCastS2X: |
| case Op_VectorCastI2X: |
| case Op_VectorCastL2X: |
| case Op_VectorCastF2X: |
| case Op_VectorCastD2X: |
| if (UseAVX < 1) { // enabled for AVX only |
| return false; |
| } |
| break; |
| case Op_CompareAndSwapL: |
| #ifdef _LP64 |
| case Op_CompareAndSwapP: |
| #endif |
| if (!VM_Version::supports_cx8()) { |
| return false; |
| } |
| break; |
| case Op_CMoveVF: |
| case Op_CMoveVD: |
| if (UseAVX < 1) { // enabled for AVX only |
| return false; |
| } |
| break; |
| case Op_StrIndexOf: |
| if (!UseSSE42Intrinsics) { |
| return false; |
| } |
| break; |
| case Op_StrIndexOfChar: |
| if (!UseSSE42Intrinsics) { |
| return false; |
| } |
| break; |
| case Op_OnSpinWait: |
| if (VM_Version::supports_on_spin_wait() == false) { |
| return false; |
| } |
| break; |
| case Op_MulVB: |
| case Op_LShiftVB: |
| case Op_RShiftVB: |
| case Op_URShiftVB: |
| case Op_VectorInsert: |
| case Op_VectorLoadMask: |
| case Op_VectorStoreMask: |
| case Op_VectorBlend: |
| if (UseSSE < 4) { |
| return false; |
| } |
| break; |
| #ifdef _LP64 |
| case Op_MaxD: |
| case Op_MaxF: |
| case Op_MinD: |
| case Op_MinF: |
| if (UseAVX < 1) { // enabled for AVX only |
| return false; |
| } |
| break; |
| #endif |
| case Op_CacheWB: |
| case Op_CacheWBPreSync: |
| case Op_CacheWBPostSync: |
| if (!VM_Version::supports_data_cache_line_flush()) { |
| return false; |
| } |
| break; |
| case Op_ExtractB: |
| case Op_ExtractL: |
| case Op_ExtractI: |
| case Op_RoundDoubleMode: |
| if (UseSSE < 4) { |
| return false; |
| } |
| break; |
| case Op_RoundDoubleModeV: |
| if (VM_Version::supports_avx() == false) { |
| return false; // 128bit vroundpd is not available |
| } |
| break; |
| case Op_LoadVectorGather: |
| if (UseAVX < 2) { |
| return false; |
| } |
| break; |
| case Op_FmaVD: |
| case Op_FmaVF: |
| if (!UseFMA) { |
| return false; |
| } |
| break; |
| case Op_MacroLogicV: |
| if (UseAVX < 3 || !UseVectorMacroLogic) { |
| return false; |
| } |
| break; |
| case Op_VectorMaskGen: |
| case Op_LoadVectorMasked: |
| case Op_StoreVectorMasked: |
| if (UseAVX < 3 || !VM_Version::supports_bmi2()) { |
| return false; |
| } |
| break; |
| #ifndef _LP64 |
| case Op_AddReductionVF: |
| case Op_AddReductionVD: |
| case Op_MulReductionVF: |
| case Op_MulReductionVD: |
| if (UseSSE < 1) { // requires at least SSE |
| return false; |
| } |
| break; |
| case Op_MulAddVS2VI: |
| case Op_RShiftVL: |
| case Op_AbsVD: |
| case Op_NegVD: |
| if (UseSSE < 2) { |
| return false; |
| } |
| break; |
| #endif // !LP64 |
| } |
| return true; // Match rules are supported by default. |
| } |
| |
| //------------------------------------------------------------------------ |
| |
| // Identify extra cases that we might want to provide match rules for vector nodes and |
| // other intrinsics guarded with vector length (vlen) and element type (bt). |
| const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { |
| if (!match_rule_supported(opcode)) { |
| return false; |
| } |
| // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): |
| // * SSE2 supports 128bit vectors for all types; |
| // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; |
| // * AVX2 supports 256bit vectors for all types; |
| // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; |
| // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. |
| // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). |
| // And MaxVectorSize is taken into account as well. |
| if (!vector_size_supported(bt, vlen)) { |
| return false; |
| } |
| // Special cases which require vector length follow: |
| // * implementation limitations |
| // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ |
| // * 128bit vroundpd instruction is present only in AVX1 |
| int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; |
| switch (opcode) { |
| case Op_AbsVF: |
| case Op_NegVF: |
| if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { |
| return false; // 512bit vandps and vxorps are not available |
| } |
| break; |
| case Op_AbsVD: |
| case Op_NegVD: |
| case Op_MulVL: |
| if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { |
| return false; // 512bit vpmullq, vandpd and vxorpd are not available |
| } |
| break; |
| case Op_CMoveVF: |
| if (vlen != 8) { |
| return false; // implementation limitation (only vcmov8F_reg is present) |
| } |
| break; |
| case Op_RotateRightV: |
| case Op_RotateLeftV: |
| case Op_MacroLogicV: |
| if (!VM_Version::supports_evex() || |
| ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { |
| return false; |
| } |
| break; |
| case Op_ClearArray: |
| case Op_VectorMaskGen: |
| case Op_LoadVectorMasked: |
| case Op_StoreVectorMasked: |
| if (!VM_Version::supports_avx512bw()) { |
| return false; |
| } |
| if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { |
| return false; |
| } |
| break; |
| case Op_CMoveVD: |
| if (vlen != 4) { |
| return false; // implementation limitation (only vcmov4D_reg is present) |
| } |
| break; |
| case Op_MaxV: |
| case Op_MinV: |
| if (UseSSE < 4 && is_integral_type(bt)) { |
| return false; |
| } |
| if ((bt == T_FLOAT || bt == T_DOUBLE)) { |
| // Float/Double intrinsics are enabled for AVX family currently. |
| if (UseAVX == 0) { |
| return false; |
| } |
| if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ |
| return false; |
| } |
| } |
| break; |
| case Op_AddReductionVI: |
| if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { |
| return false; |
| } |
| // fallthrough |
| case Op_AndReductionV: |
| case Op_OrReductionV: |
| case Op_XorReductionV: |
| if (is_subword_type(bt) && (UseSSE < 4)) { |
| return false; |
| } |
| #ifndef _LP64 |
| if (bt == T_BYTE || bt == T_LONG) { |
| return false; |
| } |
| #endif |
| break; |
| #ifndef _LP64 |
| case Op_VectorInsert: |
| if (bt == T_LONG || bt == T_DOUBLE) { |
| return false; |
| } |
| break; |
| #endif |
| case Op_MinReductionV: |
| case Op_MaxReductionV: |
| if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { |
| return false; |
| } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { |
| return false; |
| } |
| // Float/Double intrinsics enabled for AVX family. |
| if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { |
| return false; |
| } |
| if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { |
| return false; |
| } |
| #ifndef _LP64 |
| if (bt == T_BYTE || bt == T_LONG) { |
| return false; |
| } |
| #endif |
| break; |
| case Op_VectorTest: |
| if (UseSSE < 4) { |
| return false; // Implementation limitation |
| } else if (size_in_bits < 32) { |
| return false; // Implementation limitation |
| } else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) { |
| return false; // Implementation limitation |
| } |
| break; |
| case Op_VectorLoadShuffle: |
| case Op_VectorRearrange: |
| if(vlen == 2) { |
| return false; // Implementation limitation due to how shuffle is loaded |
| } else if (size_in_bits == 256 && UseAVX < 2) { |
| return false; // Implementation limitation |
| } else if (bt == T_BYTE && size_in_bits > 256 && !VM_Version::supports_avx512_vbmi()) { |
| return false; // Implementation limitation |
| } else if (bt == T_SHORT && size_in_bits > 256 && !VM_Version::supports_avx512bw()) { |
| return false; // Implementation limitation |
| } |
| break; |
| case Op_VectorLoadMask: |
| if (size_in_bits == 256 && UseAVX < 2) { |
| return false; // Implementation limitation |
| } |
| // fallthrough |
| case Op_VectorStoreMask: |
| if (vlen == 2) { |
| return false; // Implementation limitation |
| } |
| break; |
| case Op_VectorCastB2X: |
| if (size_in_bits == 256 && UseAVX < 2) { |
| return false; // Implementation limitation |
| } |
| break; |
| case Op_VectorCastS2X: |
| if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { |
| return false; |
| } |
| break; |
| case Op_VectorCastI2X: |
| if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { |
| return false; |
| } |
| break; |
| case Op_VectorCastL2X: |
| if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { |
| return false; |
| } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { |
| return false; |
| } |
| break; |
| case Op_VectorCastF2X: |
| case Op_VectorCastD2X: |
| if (is_integral_type(bt)) { |
| // Casts from FP to integral types require special fixup logic not easily |
| // implementable with vectors. |
| return false; // Implementation limitation |
| } |
| case Op_MulReductionVI: |
| if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { |
| return false; |
| } |
| break; |
| case Op_StoreVectorScatter: |
| if(bt == T_BYTE || bt == T_SHORT) { |
| return false; |
| } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { |
| return false; |
| } |
| // fallthrough |
| case Op_LoadVectorGather: |
| if (size_in_bits == 64 ) { |
| return false; |
| } |
| break; |
| } |
| return true; // Per default match rules are supported. |
| } |
| |
| // x86 supports generic vector operands: vec and legVec. |
| const bool Matcher::supports_generic_vector_operands = true; |
| |
| MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { |
| assert(Matcher::is_generic_vector(generic_opnd), "not generic"); |
| bool legacy = (generic_opnd->opcode() == LEGVEC); |
| if (!VM_Version::supports_avx512vlbwdq() && // KNL |
| is_temp && !legacy && (ideal_reg == Op_VecZ)) { |
| // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. |
| return new legVecZOper(); |
| } |
| if (legacy) { |
| switch (ideal_reg) { |
| case Op_VecS: return new legVecSOper(); |
| case Op_VecD: return new legVecDOper(); |
| case Op_VecX: return new legVecXOper(); |
| case Op_VecY: return new legVecYOper(); |
| case Op_VecZ: return new legVecZOper(); |
| } |
| } else { |
| switch (ideal_reg) { |
| case Op_VecS: return new vecSOper(); |
| case Op_VecD: return new vecDOper(); |
| case Op_VecX: return new vecXOper(); |
| case Op_VecY: return new vecYOper(); |
| case Op_VecZ: return new vecZOper(); |
| } |
| } |
| ShouldNotReachHere(); |
| return NULL; |
| } |
| |
| bool Matcher::is_generic_reg2reg_move(MachNode* m) { |
| switch (m->rule()) { |
| case MoveVec2Leg_rule: |
| case MoveLeg2Vec_rule: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool Matcher::is_generic_vector(MachOper* opnd) { |
| switch (opnd->opcode()) { |
| case VEC: |
| case LEGVEC: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| //------------------------------------------------------------------------ |
| |
| bool Matcher::supports_vector_variable_shifts(void) { |
| return (UseAVX >= 2); |
| } |
| |
| bool Matcher::supports_vector_variable_rotates(void) { |
| return true; |
| } |
| |
| const bool Matcher::has_predicated_vectors(void) { |
| bool ret_value = false; |
| if (UseAVX > 2) { |
| ret_value = VM_Version::supports_avx512vl(); |
| } |
| |
| return ret_value; |
| } |
| |
| const int Matcher::float_pressure(int default_pressure_threshold) { |
| int float_pressure_threshold = default_pressure_threshold; |
| #ifdef _LP64 |
| if (UseAVX > 2) { |
| // Increase pressure threshold on machines with AVX3 which have |
| // 2x more XMM registers. |
| float_pressure_threshold = default_pressure_threshold * 2; |
| } |
| #endif |
| return float_pressure_threshold; |
| } |
| |
| // Max vector size in bytes. 0 if not supported. |
| const int Matcher::vector_width_in_bytes(BasicType bt) { |
| assert(is_java_primitive(bt), "only primitive type vectors"); |
| if (UseSSE < 2) return 0; |
| // SSE2 supports 128bit vectors for all types. |
| // AVX2 supports 256bit vectors for all types. |
| // AVX2/EVEX supports 512bit vectors for all types. |
| int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; |
| // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. |
| if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) |
| size = (UseAVX > 2) ? 64 : 32; |
| if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) |
| size = (VM_Version::supports_avx512bw()) ? 64 : 32; |
| // Use flag to limit vector size. |
| size = MIN2(size,(int)MaxVectorSize); |
| // Minimum 2 values in vector (or 4 for bytes). |
| switch (bt) { |
| case T_DOUBLE: |
| case T_LONG: |
| if (size < 16) return 0; |
| break; |
| case T_FLOAT: |
| case T_INT: |
| if (size < 8) return 0; |
| break; |
| case T_BOOLEAN: |
| if (size < 4) return 0; |
| break; |
| case T_CHAR: |
| if (size < 4) return 0; |
| break; |
| case T_BYTE: |
| if (size < 4) return 0; |
| break; |
| case T_SHORT: |
| if (size < 4) return 0; |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| return size; |
| } |
| |
| // Limits on vector size (number of elements) loaded into vector. |
| const int Matcher::max_vector_size(const BasicType bt) { |
| return vector_width_in_bytes(bt)/type2aelembytes(bt); |
| } |
| const int Matcher::min_vector_size(const BasicType bt) { |
| int max_size = max_vector_size(bt); |
| // Min size which can be loaded into vector is 4 bytes. |
| int size = (type2aelembytes(bt) == 1) ? 4 : 2; |
| return MIN2(size,max_size); |
| } |
| |
| const bool Matcher::supports_scalable_vector() { |
| return false; |
| } |
| |
| const int Matcher::scalable_vector_reg_size(const BasicType bt) { |
| return -1; |
| } |
| |
| // Vector ideal reg corresponding to specified size in bytes |
| const uint Matcher::vector_ideal_reg(int size) { |
| assert(MaxVectorSize >= size, ""); |
| switch(size) { |
| case 4: return Op_VecS; |
| case 8: return Op_VecD; |
| case 16: return Op_VecX; |
| case 32: return Op_VecY; |
| case 64: return Op_VecZ; |
| } |
| ShouldNotReachHere(); |
| return 0; |
| } |
| |
| // x86 supports misaligned vectors store/load. |
| const bool Matcher::misaligned_vectors_ok() { |
| return true; |
| } |
| |
| |
| const bool Matcher::convi2l_type_required = true; |
| |
| // Check for shift by small constant as well |
| static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { |
| if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && |
| shift->in(2)->get_int() <= 3 && |
| // Are there other uses besides address expressions? |
| !matcher->is_visited(shift)) { |
| address_visited.set(shift->_idx); // Flag as address_visited |
| mstack.push(shift->in(2), Matcher::Visit); |
| Node *conv = shift->in(1); |
| #ifdef _LP64 |
| // Allow Matcher to match the rule which bypass |
| // ConvI2L operation for an array index on LP64 |
| // if the index value is positive. |
| if (conv->Opcode() == Op_ConvI2L && |
| conv->as_Type()->type()->is_long()->_lo >= 0 && |
| // Are there other uses besides address expressions? |
| !matcher->is_visited(conv)) { |
| address_visited.set(conv->_idx); // Flag as address_visited |
| mstack.push(conv->in(1), Matcher::Pre_Visit); |
| } else |
| #endif |
| mstack.push(conv, Matcher::Pre_Visit); |
| return true; |
| } |
| return false; |
| } |
| |
| // This function identifies sub-graphs in which a 'load' node is |
| // input to two different nodes, and such that it can be matched |
| // with BMI instructions like blsi, blsr, etc. |
| // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. |
| // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* |
| // refers to the same node. |
| // |
| // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) |
| // This is a temporary solution until we make DAGs expressible in ADL. |
| template<typename ConType> |
| class FusedPatternMatcher { |
| Node* _op1_node; |
| Node* _mop_node; |
| int _con_op; |
| |
| static int match_next(Node* n, int next_op, int next_op_idx) { |
| if (n->in(1) == NULL || n->in(2) == NULL) { |
| return -1; |
| } |
| |
| if (next_op_idx == -1) { // n is commutative, try rotations |
| if (n->in(1)->Opcode() == next_op) { |
| return 1; |
| } else if (n->in(2)->Opcode() == next_op) { |
| return 2; |
| } |
| } else { |
| assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); |
| if (n->in(next_op_idx)->Opcode() == next_op) { |
| return next_op_idx; |
| } |
| } |
| return -1; |
| } |
| |
| public: |
| FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : |
| _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } |
| |
| bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative |
| int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative |
| typename ConType::NativeType con_value) { |
| if (_op1_node->Opcode() != op1) { |
| return false; |
| } |
| if (_mop_node->outcnt() > 2) { |
| return false; |
| } |
| op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); |
| if (op1_op2_idx == -1) { |
| return false; |
| } |
| // Memory operation must be the other edge |
| int op1_mop_idx = (op1_op2_idx & 1) + 1; |
| |
| // Check that the mop node is really what we want |
| if (_op1_node->in(op1_mop_idx) == _mop_node) { |
| Node* op2_node = _op1_node->in(op1_op2_idx); |
| if (op2_node->outcnt() > 1) { |
| return false; |
| } |
| assert(op2_node->Opcode() == op2, "Should be"); |
| op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); |
| if (op2_con_idx == -1) { |
| return false; |
| } |
| // Memory operation must be the other edge |
| int op2_mop_idx = (op2_con_idx & 1) + 1; |
| // Check that the memory operation is the same node |
| if (op2_node->in(op2_mop_idx) == _mop_node) { |
| // Now check the constant |
| const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); |
| if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| }; |
| |
| static bool is_bmi_pattern(Node* n, Node* m) { |
| assert(UseBMI1Instructions, "sanity"); |
| if (n != NULL && m != NULL) { |
| if (m->Opcode() == Op_LoadI) { |
| FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); |
| return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || |
| bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || |
| bmii.match(Op_XorI, -1, Op_AddI, -1, -1); |
| } else if (m->Opcode() == Op_LoadL) { |
| FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); |
| return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || |
| bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || |
| bmil.match(Op_XorL, -1, Op_AddL, -1, -1); |
| } |
| } |
| return false; |
| } |
| |
| // Should the matcher clone input 'm' of node 'n'? |
| bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { |
| // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. |
| if (UseBMI1Instructions && is_bmi_pattern(n, m)) { |
| mstack.push(m, Visit); |
| return true; |
| } |
| return false; |
| } |
| |
| // Should the Matcher clone shifts on addressing modes, expecting them |
| // to be subsumed into complex addressing expressions or compute them |
| // into registers? |
| bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { |
| Node *off = m->in(AddPNode::Offset); |
| if (off->is_Con()) { |
| address_visited.test_set(m->_idx); // Flag as address_visited |
| Node *adr = m->in(AddPNode::Address); |
| |
| // Intel can handle 2 adds in addressing mode |
| // AtomicAdd is not an addressing expression. |
| // Cheap to find it by looking for screwy base. |
| if (adr->is_AddP() && |
| !adr->in(AddPNode::Base)->is_top() && |
| LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 |
| // Are there other uses besides address expressions? |
| !is_visited(adr)) { |
| address_visited.set(adr->_idx); // Flag as address_visited |
| Node *shift = adr->in(AddPNode::Offset); |
| if (!clone_shift(shift, this, mstack, address_visited)) { |
| mstack.push(shift, Pre_Visit); |
| } |
| mstack.push(adr->in(AddPNode::Address), Pre_Visit); |
| mstack.push(adr->in(AddPNode::Base), Pre_Visit); |
| } else { |
| mstack.push(adr, Pre_Visit); |
| } |
| |
| // Clone X+offset as it also folds into most addressing expressions |
| mstack.push(off, Visit); |
| mstack.push(m->in(AddPNode::Base), Pre_Visit); |
| return true; |
| } else if (clone_shift(off, this, mstack, address_visited)) { |
| address_visited.test_set(m->_idx); // Flag as address_visited |
| mstack.push(m->in(AddPNode::Address), Pre_Visit); |
| mstack.push(m->in(AddPNode::Base), Pre_Visit); |
| return true; |
| } |
| return false; |
| } |
| |
| void Compile::reshape_address(AddPNode* addp) { |
| } |
| |
| static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { |
| switch (bt) { |
| case BoolTest::eq: return Assembler::eq; |
| case BoolTest::ne: return Assembler::neq; |
| case BoolTest::le: return Assembler::le; |
| case BoolTest::ge: return Assembler::nlt; |
| case BoolTest::lt: return Assembler::lt; |
| case BoolTest::gt: return Assembler::nle; |
| default : ShouldNotReachHere(); return Assembler::_false; |
| } |
| } |
| |
| static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { |
| switch (bt) { |
| case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling |
| // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. |
| case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling |
| case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling |
| case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling |
| case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling |
| case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling |
| default: ShouldNotReachHere(); return Assembler::FALSE_OS; |
| } |
| } |
| |
| // Helper methods for MachSpillCopyNode::implementation(). |
| static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, |
| int src_hi, int dst_hi, uint ireg, outputStream* st) { |
| assert(ireg == Op_VecS || // 32bit vector |
| (src_lo & 1) == 0 && (src_lo + 1) == src_hi && |
| (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, |
| "no non-adjacent vector moves" ); |
| if (cbuf) { |
| C2_MacroAssembler _masm(cbuf); |
| switch (ireg) { |
| case Op_VecS: // copy whole register |
| case Op_VecD: |
| case Op_VecX: |
| #ifndef _LP64 |
| __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); |
| #else |
| if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { |
| __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); |
| } else { |
| __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); |
| } |
| #endif |
| break; |
| case Op_VecY: |
| #ifndef _LP64 |
| __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); |
| #else |
| if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { |
| __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); |
| } else { |
| __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); |
| } |
| #endif |
| break; |
| case Op_VecZ: |
| __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| #ifndef PRODUCT |
| } else { |
| switch (ireg) { |
| case Op_VecS: |
| case Op_VecD: |
| case Op_VecX: |
| st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
| break; |
| case Op_VecY: |
| case Op_VecZ: |
| st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| #endif |
| } |
| } |
| |
| void vec_spill_helper(CodeBuffer *cbuf, bool is_load, |
| int stack_offset, int reg, uint ireg, outputStream* st) { |
| if (cbuf) { |
| C2_MacroAssembler _masm(cbuf); |
| if (is_load) { |
| switch (ireg) { |
| case Op_VecS: |
| __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| break; |
| case Op_VecD: |
| __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| break; |
| case Op_VecX: |
| #ifndef _LP64 |
| __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| #else |
| if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { |
| __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| } else { |
| __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); |
| __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); |
| } |
| #endif |
| break; |
| case Op_VecY: |
| #ifndef _LP64 |
| __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| #else |
| if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { |
| __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| } else { |
| __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); |
| __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); |
| } |
| #endif |
| break; |
| case Op_VecZ: |
| __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } else { // store |
| switch (ireg) { |
| case Op_VecS: |
| __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| break; |
| case Op_VecD: |
| __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| break; |
| case Op_VecX: |
| #ifndef _LP64 |
| __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| #else |
| if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { |
| __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| } |
| else { |
| __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); |
| } |
| #endif |
| break; |
| case Op_VecY: |
| #ifndef _LP64 |
| __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| #else |
| if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { |
| __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| } |
| else { |
| __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); |
| } |
| #endif |
| break; |
| case Op_VecZ: |
| __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } |
| #ifndef PRODUCT |
| } else { |
| if (is_load) { |
| switch (ireg) { |
| case Op_VecS: |
| st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
| break; |
| case Op_VecD: |
| st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
| break; |
| case Op_VecX: |
| st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
| break; |
| case Op_VecY: |
| case Op_VecZ: |
| st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } else { // store |
| switch (ireg) { |
| case Op_VecS: |
| st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
| break; |
| case Op_VecD: |
| st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
| break; |
| case Op_VecX: |
| st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
| break; |
| case Op_VecY: |
| case Op_VecZ: |
| st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } |
| #endif |
| } |
| } |
| |
| static inline jlong replicate8_imm(int con, int width) { |
| // Load a constant of "width" (in bytes) and replicate it to fill 64bit. |
| assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); |
| int bit_width = width * 8; |
| jlong val = con; |
| val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits |
| while(bit_width < 64) { |
| val |= (val << bit_width); |
| bit_width <<= 1; |
| } |
| return val; |
| } |
| |
| #ifndef PRODUCT |
| void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { |
| st->print("nop \t# %d bytes pad for loops and calls", _count); |
| } |
| #endif |
| |
| void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { |
| C2_MacroAssembler _masm(&cbuf); |
| __ nop(_count); |
| } |
| |
| uint MachNopNode::size(PhaseRegAlloc*) const { |
| return _count; |
| } |
| |
| #ifndef PRODUCT |
| void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { |
| st->print("# breakpoint"); |
| } |
| #endif |
| |
| void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { |
| C2_MacroAssembler _masm(&cbuf); |
| __ int3(); |
| } |
| |
| uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { |
| return MachNode::size(ra_); |
| } |
| |
| %} |
| |
| encode %{ |
| |
| enc_class call_epilog %{ |
| if (VerifyStackAtCalls) { |
| // Check that stack depth is unchanged: find majik cookie on stack |
| int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); |
| C2_MacroAssembler _masm(&cbuf); |
| Label L; |
| __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); |
| __ jccb(Assembler::equal, L); |
| // Die if stack mismatch |
| __ int3(); |
| __ bind(L); |
| } |
| %} |
| |
| %} |
| |
| // Operands for bound floating pointer register arguments |
| operand rxmm0() %{ |
| constraint(ALLOC_IN_RC(xmm0_reg)); |
| match(VecX); |
| format%{%} |
| interface(REG_INTER); |
| %} |
| |
| //----------OPERANDS----------------------------------------------------------- |
| // Operand definitions must precede instruction definitions for correct parsing |
| // in the ADLC because operands constitute user defined types which are used in |
| // instruction definitions. |
| |
| // Vectors |
| |
| // Dummy generic vector class. Should be used for all vector operands. |
| // Replaced with vec[SDXYZ] during post-selection pass. |
| operand vec() %{ |
| constraint(ALLOC_IN_RC(dynamic)); |
| match(VecX); |
| match(VecY); |
| match(VecZ); |
| match(VecS); |
| match(VecD); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Dummy generic legacy vector class. Should be used for all legacy vector operands. |
| // Replaced with legVec[SDXYZ] during post-selection cleanup. |
| // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) |
| // runtime code generation via reg_class_dynamic. |
| operand legVec() %{ |
| constraint(ALLOC_IN_RC(dynamic)); |
| match(VecX); |
| match(VecY); |
| match(VecZ); |
| match(VecS); |
| match(VecD); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces vec during post-selection cleanup. See above. |
| operand vecS() %{ |
| constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); |
| match(VecS); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces legVec during post-selection cleanup. See above. |
| operand legVecS() %{ |
| constraint(ALLOC_IN_RC(vectors_reg_legacy)); |
| match(VecS); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces vec during post-selection cleanup. See above. |
| operand vecD() %{ |
| constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); |
| match(VecD); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces legVec during post-selection cleanup. See above. |
| operand legVecD() %{ |
| constraint(ALLOC_IN_RC(vectord_reg_legacy)); |
| match(VecD); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces vec during post-selection cleanup. See above. |
| operand vecX() %{ |
| constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); |
| match(VecX); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces legVec during post-selection cleanup. See above. |
| operand legVecX() %{ |
| constraint(ALLOC_IN_RC(vectorx_reg_legacy)); |
| match(VecX); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces vec during post-selection cleanup. See above. |
| operand vecY() %{ |
| constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); |
| match(VecY); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces legVec during post-selection cleanup. See above. |
| operand legVecY() %{ |
| constraint(ALLOC_IN_RC(vectory_reg_legacy)); |
| match(VecY); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces vec during post-selection cleanup. See above. |
| operand vecZ() %{ |
| constraint(ALLOC_IN_RC(vectorz_reg)); |
| match(VecZ); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Replaces legVec during post-selection cleanup. See above. |
| operand legVecZ() %{ |
| constraint(ALLOC_IN_RC(vectorz_reg_legacy)); |
| match(VecZ); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Comparison Code for FP conditional move |
| operand cmpOp_vcmppd() %{ |
| match(Bool); |
| |
| predicate(n->as_Bool()->_test._test != BoolTest::overflow && |
| n->as_Bool()->_test._test != BoolTest::no_overflow); |
| format %{ "" %} |
| interface(COND_INTER) %{ |
| equal (0x0, "eq"); |
| less (0x1, "lt"); |
| less_equal (0x2, "le"); |
| not_equal (0xC, "ne"); |
| greater_equal(0xD, "ge"); |
| greater (0xE, "gt"); |
| //TODO cannot compile (adlc breaks) without two next lines with error: |
| // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ |
| // equal' for overflow. |
| overflow (0x20, "o"); // not really supported by the instruction |
| no_overflow (0x21, "no"); // not really supported by the instruction |
| %} |
| %} |
| |
| |
| // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) |
| |
| // ============================================================================ |
| |
| instruct ShouldNotReachHere() %{ |
| match(Halt); |
| format %{ "stop\t# ShouldNotReachHere" %} |
| ins_encode %{ |
| if (is_reachable()) { |
| __ stop(_halt_reason); |
| } |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // =================================EVEX special=============================== |
| |
| instruct setMask(rRegI dst, rRegI src) %{ |
| predicate(Matcher::has_predicated_vectors()); |
| match(Set dst (SetVectMaskI src)); |
| effect(TEMP dst); |
| format %{ "setvectmask $dst, $src" %} |
| ins_encode %{ |
| __ setvectmask($dst$$Register, $src$$Register); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // ============================================================================ |
| |
| instruct addF_reg(regF dst, regF src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (AddF dst src)); |
| |
| format %{ "addss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addss($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_mem(regF dst, memory src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (AddF dst (LoadF src))); |
| |
| format %{ "addss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addss($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_imm(regF dst, immF con) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (AddF dst con)); |
| format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addss($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddF src1 src2)); |
| |
| format %{ "vaddss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddF src1 (LoadF src2))); |
| |
| format %{ "vaddss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_reg_imm(regF dst, regF src, immF con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddF src con)); |
| |
| format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_reg(regD dst, regD src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (AddD dst src)); |
| |
| format %{ "addsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addsd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_mem(regD dst, memory src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (AddD dst (LoadD src))); |
| |
| format %{ "addsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addsd($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_imm(regD dst, immD con) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (AddD dst con)); |
| format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addsd($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddD src1 src2)); |
| |
| format %{ "vaddsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddD src1 (LoadD src2))); |
| |
| format %{ "vaddsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_reg_imm(regD dst, regD src, immD con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddD src con)); |
| |
| format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_reg(regF dst, regF src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (SubF dst src)); |
| |
| format %{ "subss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subss($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_mem(regF dst, memory src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (SubF dst (LoadF src))); |
| |
| format %{ "subss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subss($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_imm(regF dst, immF con) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (SubF dst con)); |
| format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subss($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubF src1 src2)); |
| |
| format %{ "vsubss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubF src1 (LoadF src2))); |
| |
| format %{ "vsubss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_reg_imm(regF dst, regF src, immF con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubF src con)); |
| |
| format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_reg(regD dst, regD src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (SubD dst src)); |
| |
| format %{ "subsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subsd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_mem(regD dst, memory src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (SubD dst (LoadD src))); |
| |
| format %{ "subsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subsd($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_imm(regD dst, immD con) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (SubD dst con)); |
| format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subsd($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubD src1 src2)); |
| |
| format %{ "vsubsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubD src1 (LoadD src2))); |
| |
| format %{ "vsubsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_reg_imm(regD dst, regD src, immD con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubD src con)); |
| |
| format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_reg(regF dst, regF src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (MulF dst src)); |
| |
| format %{ "mulss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulss($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_mem(regF dst, memory src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (MulF dst (LoadF src))); |
| |
| format %{ "mulss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulss($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_imm(regF dst, immF con) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (MulF dst con)); |
| format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulss($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulF src1 src2)); |
| |
| format %{ "vmulss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulF src1 (LoadF src2))); |
| |
| format %{ "vmulss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_reg_imm(regF dst, regF src, immF con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulF src con)); |
| |
| format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_reg(regD dst, regD src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (MulD dst src)); |
| |
| format %{ "mulsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulsd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_mem(regD dst, memory src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (MulD dst (LoadD src))); |
| |
| format %{ "mulsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulsd($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_imm(regD dst, immD con) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (MulD dst con)); |
| format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulsd($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulD src1 src2)); |
| |
| format %{ "vmulsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulD src1 (LoadD src2))); |
| |
| format %{ "vmulsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_reg_imm(regD dst, regD src, immD con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulD src con)); |
| |
| format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_reg(regF dst, regF src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (DivF dst src)); |
| |
| format %{ "divss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divss($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_mem(regF dst, memory src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (DivF dst (LoadF src))); |
| |
| format %{ "divss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divss($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_imm(regF dst, immF con) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (DivF dst con)); |
| format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divss($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivF src1 src2)); |
| |
| format %{ "vdivss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivF src1 (LoadF src2))); |
| |
| format %{ "vdivss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_reg_imm(regF dst, regF src, immF con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivF src con)); |
| |
| format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_reg(regD dst, regD src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (DivD dst src)); |
| |
| format %{ "divsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divsd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_mem(regD dst, memory src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (DivD dst (LoadD src))); |
| |
| format %{ "divsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divsd($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_imm(regD dst, immD con) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (DivD dst con)); |
| format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divsd($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivD src1 src2)); |
| |
| format %{ "vdivsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivD src1 (LoadD src2))); |
| |
| format %{ "vdivsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_reg_imm(regD dst, regD src, immD con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivD src con)); |
| |
| format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct absF_reg(regF dst) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (AbsF dst)); |
| ins_cost(150); |
| format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} |
| ins_encode %{ |
| __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AbsF src)); |
| ins_cost(150); |
| format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_128bit; |
| __ vandps($dst$$XMMRegister, $src$$XMMRegister, |
| ExternalAddress(float_signmask()), vlen_enc); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct absD_reg(regD dst) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (AbsD dst)); |
| ins_cost(150); |
| format %{ "andpd $dst, [0x7fffffffffffffff]\t" |
| "# abs double by sign masking" %} |
| ins_encode %{ |
| __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AbsD src)); |
| ins_cost(150); |
| format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" |
| "# abs double by sign masking" %} |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_128bit; |
| __ vandpd($dst$$XMMRegister, $src$$XMMRegister, |
| ExternalAddress(double_signmask()), vlen_enc); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct negF_reg(regF dst) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (NegF dst)); |
| ins_cost(150); |
| format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} |
| ins_encode %{ |
| __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (NegF src)); |
| ins_cost(150); |
| format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} |
| ins_encode %{ |
| __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, |
| ExternalAddress(float_signflip())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct negD_reg(regD dst) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (NegD dst)); |
| ins_cost(150); |
| format %{ "xorpd $dst, [0x8000000000000000]\t" |
| "# neg double by sign flipping" %} |
| ins_encode %{ |
| __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (NegD src)); |
| ins_cost(150); |
| format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" |
| "# neg double by sign flipping" %} |
| ins_encode %{ |
| __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, |
| ExternalAddress(double_signflip())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // sqrtss instruction needs destination register to be pre initialized for best performance |
| // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below |
| instruct sqrtF_reg(regF dst) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (SqrtF dst)); |
| format %{ "sqrtss $dst, $dst" %} |
| ins_encode %{ |
| __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // sqrtsd instruction needs destination register to be pre initialized for best performance |
| // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below |
| instruct sqrtD_reg(regD dst) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (SqrtD dst)); |
| format %{ "sqrtsd $dst, $dst" %} |
| ins_encode %{ |
| __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // ---------------------------------------- VectorReinterpret ------------------------------------ |
| |
| instruct reinterpret(vec dst) %{ |
| predicate(vector_length_in_bytes(n) == vector_length_in_bytes(n->in(1))); // dst == src |
| match(Set dst (VectorReinterpret dst)); |
| ins_cost(125); |
| format %{ "vector_reinterpret $dst\t!" %} |
| ins_encode %{ |
| // empty |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ |
| predicate(UseAVX == 0 && |
| (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst |
| match(Set dst (VectorReinterpret src)); |
| ins_cost(125); |
| effect(TEMP dst, TEMP scratch); |
| format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} |
| ins_encode %{ |
| assert(vector_length_in_bytes(this) <= 16, "required"); |
| assert(vector_length_in_bytes(this, $src) <= 8, "required"); |
| |
| int src_vlen_in_bytes = vector_length_in_bytes(this, $src); |
| if (src_vlen_in_bytes == 4) { |
| __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); |
| } else { |
| assert(src_vlen_in_bytes == 8, ""); |
| __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); |
| } |
| __ pand($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ |
| predicate(UseAVX > 0 && |
| (vector_length_in_bytes(n->in(1)) == 4) && // src |
| (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst |
| match(Set dst (VectorReinterpret src)); |
| ins_cost(125); |
| effect(TEMP scratch); |
| format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} |
| ins_encode %{ |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| instruct vreinterpret_expand(legVec dst, vec src) %{ |
| predicate(UseAVX > 0 && |
| (vector_length_in_bytes(n->in(1)) > 4) && // src |
| (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst |
| match(Set dst (VectorReinterpret src)); |
| ins_cost(125); |
| format %{ "vector_reinterpret_expand $dst,$src\t!" %} |
| ins_encode %{ |
| switch (vector_length_in_bytes(this, $src)) { |
| case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; |
| case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; |
| case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; |
| default: ShouldNotReachHere(); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct reinterpret_shrink(vec dst, legVec src) %{ |
| predicate(vector_length_in_bytes(n->in(1)) > vector_length_in_bytes(n)); // src > dst |
| match(Set dst (VectorReinterpret src)); |
| ins_cost(125); |
| format %{ "vector_reinterpret_shrink $dst,$src\t!" %} |
| ins_encode %{ |
| switch (vector_length_in_bytes(this)) { |
| case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; |
| case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; |
| case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; |
| case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; |
| default: ShouldNotReachHere(); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ---------------------------------------------------------------------------------------------------- |
| |
| #ifdef _LP64 |
| instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ |
| match(Set dst (RoundDoubleMode src rmode)); |
| format %{ "roundsd $dst,$src" %} |
| ins_cost(150); |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ |
| match(Set dst (RoundDoubleMode (LoadD src) rmode)); |
| format %{ "roundsd $dst,$src" %} |
| ins_cost(150); |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ |
| match(Set dst (RoundDoubleMode con rmode)); |
| effect(TEMP scratch_reg); |
| format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ |
| predicate(vector_length(n) < 8); |
| match(Set dst (RoundDoubleModeV src rmode)); |
| format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ |
| predicate(vector_length(n) == 8); |
| match(Set dst (RoundDoubleModeV src rmode)); |
| format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ |
| predicate(vector_length(n) < 8); |
| match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); |
| format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ |
| predicate(vector_length(n) == 8); |
| match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); |
| format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif // _LP64 |
| |
| instruct onspinwait() %{ |
| match(OnSpinWait); |
| ins_cost(200); |
| |
| format %{ |
| $$template |
| $$emit$$"pause\t! membar_onspinwait" |
| %} |
| ins_encode %{ |
| __ pause(); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // a * b + c |
| instruct fmaD_reg(regD a, regD b, regD c) %{ |
| predicate(UseFMA); |
| match(Set c (FmaD c (Binary a b))); |
| format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // a * b + c |
| instruct fmaF_reg(regF a, regF b, regF c) %{ |
| predicate(UseFMA); |
| match(Set c (FmaF c (Binary a b))); |
| format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ====================VECTOR INSTRUCTIONS===================================== |
| |
| // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. |
| instruct MoveVec2Leg(legVec dst, vec src) %{ |
| match(Set dst src); |
| format %{ "" %} |
| ins_encode %{ |
| ShouldNotReachHere(); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct MoveLeg2Vec(vec dst, legVec src) %{ |
| match(Set dst src); |
| format %{ "" %} |
| ins_encode %{ |
| ShouldNotReachHere(); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // ============================================================================ |
| |
| // Load vectors generic operand pattern |
| instruct loadV(vec dst, memory mem) %{ |
| match(Set dst (LoadVector mem)); |
| ins_cost(125); |
| format %{ "load_vector $dst,$mem" %} |
| ins_encode %{ |
| switch (vector_length_in_bytes(this)) { |
| case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break; |
| case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break; |
| case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break; |
| case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break; |
| case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break; |
| default: ShouldNotReachHere(); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Store vectors generic operand pattern. |
| instruct storeV(memory mem, vec src) %{ |
| match(Set mem (StoreVector mem src)); |
| ins_cost(145); |
| format %{ "store_vector $mem,$src\n\t" %} |
| ins_encode %{ |
| switch (vector_length_in_bytes(this, $src)) { |
| case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; |
| case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; |
| case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; |
| case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; |
| case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; |
| default: ShouldNotReachHere(); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ---------------------------------------- Gather ------------------------------------ |
| |
| // Gather INT, LONG, FLOAT, DOUBLE |
| |
| instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ |
| predicate(vector_length_in_bytes(n) <= 32); |
| match(Set dst (LoadVectorGather mem idx)); |
| effect(TEMP dst, TEMP tmp, TEMP mask); |
| format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "sanity"); |
| |
| int vlen_enc = vector_length_encoding(this); |
| BasicType elem_bt = vector_element_basic_type(this); |
| |
| assert(vector_length_in_bytes(this) >= 16, "sanity"); |
| assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE |
| |
| if (vlen_enc == Assembler::AVX_128bit) { |
| __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); |
| } else { |
| __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); |
| } |
| __ lea($tmp$$Register, $mem$$Address); |
| __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{ |
| predicate(vector_length_in_bytes(n) == 64); |
| match(Set dst (LoadVectorGather mem idx)); |
| effect(TEMP dst, TEMP tmp); |
| format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "sanity"); |
| |
| int vlen_enc = vector_length_encoding(this); |
| BasicType elem_bt = vector_element_basic_type(this); |
| |
| assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE |
| |
| KRegister ktmp = k2; |
| __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register); |
| __ lea($tmp$$Register, $mem$$Address); |
| __ evgather(elem_bt, $dst$$XMMRegister, ktmp, $tmp$$Register, $idx$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ====================Scatter======================================= |
| |
| // Scatter INT, LONG, FLOAT, DOUBLE |
| |
| instruct scatter(memory mem, vec src, vec idx, rRegP tmp) %{ |
| match(Set mem (StoreVectorScatter mem (Binary src idx))); |
| effect(TEMP tmp); |
| format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "sanity"); |
| |
| int vlen_enc = vector_length_encoding(this, $src); |
| BasicType elem_bt = vector_element_basic_type(this, $src); |
| |
| assert(vector_length_in_bytes(this, $src) >= 16, "sanity"); |
| assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE |
| |
| KRegister ktmp = k2; |
| __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register); |
| __ lea($tmp$$Register, $mem$$Address); |
| __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, ktmp, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ====================REPLICATE======================================= |
| |
| // Replicate byte scalar to be vector |
| instruct ReplB_reg(vec dst, rRegI src) %{ |
| match(Set dst (ReplicateB src)); |
| format %{ "replicateB $dst,$src" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands |
| assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW |
| int vlen_enc = vector_length_encoding(this); |
| __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); |
| } else if (VM_Version::supports_avx2()) { |
| int vlen_enc = vector_length_encoding(this); |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } else { |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| if (vlen >= 16) { |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| if (vlen >= 32) { |
| assert(vlen == 32, "sanity"); |
| __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| } |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplB_mem(vec dst, memory mem) %{ |
| predicate(VM_Version::supports_avx2()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "replicateB $dst,$mem" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplB_imm(vec dst, immI con) %{ |
| match(Set dst (ReplicateB con)); |
| format %{ "replicateB $dst,$con" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1)); |
| if (vlen == 4) { |
| __ movdl($dst$$XMMRegister, const_addr); |
| } else { |
| __ movq($dst$$XMMRegister, const_addr); |
| if (vlen >= 16) { |
| if (VM_Version::supports_avx2()) { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } else { |
| assert(vlen == 16, "sanity"); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| } |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Replicate byte scalar zero to be vector |
| instruct ReplB_zero(vec dst, immI_0 zero) %{ |
| match(Set dst (ReplicateB zero)); |
| format %{ "replicateB $dst,$zero" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 16) { |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| } else { |
| // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ). |
| int vlen_enc = vector_length_encoding(this); |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // ====================ReplicateS======================================= |
| |
| instruct ReplS_reg(vec dst, rRegI src) %{ |
| match(Set dst (ReplicateS src)); |
| format %{ "replicateS $dst,$src" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands |
| assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW |
| int vlen_enc = vector_length_encoding(this); |
| __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); |
| } else if (VM_Version::supports_avx2()) { |
| int vlen_enc = vector_length_encoding(this); |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } else { |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| if (vlen >= 8) { |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| if (vlen >= 16) { |
| assert(vlen == 16, "sanity"); |
| __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| } |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplS_mem(vec dst, memory mem) %{ |
| predicate(VM_Version::supports_avx2()); |
| match(Set dst (ReplicateS (LoadS mem))); |
| format %{ "replicateS $dst,$mem" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplS_imm(vec dst, immI con) %{ |
| match(Set dst (ReplicateS con)); |
| format %{ "replicateS $dst,$con" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2)); |
| if (vlen == 2) { |
| __ movdl($dst$$XMMRegister, const_addr); |
| } else { |
| __ movq($dst$$XMMRegister, const_addr); |
| if (vlen >= 8) { |
| if (VM_Version::supports_avx2()) { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } else { |
| assert(vlen == 8, "sanity"); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| } |
| } |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct ReplS_zero(vec dst, immI_0 zero) %{ |
| match(Set dst (ReplicateS zero)); |
| format %{ "replicateS $dst,$zero" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 8) { |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| } else { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // ====================ReplicateI======================================= |
| |
| instruct ReplI_reg(vec dst, rRegI src) %{ |
| match(Set dst (ReplicateI src)); |
| format %{ "replicateI $dst,$src" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands |
| int vlen_enc = vector_length_encoding(this); |
| __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); |
| } else if (VM_Version::supports_avx2()) { |
| int vlen_enc = vector_length_encoding(this); |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } else { |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| if (vlen >= 8) { |
| assert(vlen == 8, "sanity"); |
| __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplI_mem(vec dst, memory mem) %{ |
| match(Set dst (ReplicateI (LoadI mem))); |
| format %{ "replicateI $dst,$mem" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 4) { |
| __ movdl($dst$$XMMRegister, $mem$$Address); |
| __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| } else { |
| assert(VM_Version::supports_avx2(), "sanity"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplI_imm(vec dst, immI con) %{ |
| match(Set dst (ReplicateI con)); |
| format %{ "replicateI $dst,$con" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4)); |
| if (vlen <= 4) { |
| __ movq($dst$$XMMRegister, const_addr); |
| if (vlen == 4) { |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| } else { |
| assert(VM_Version::supports_avx2(), "sanity"); |
| int vlen_enc = vector_length_encoding(this); |
| __ movq($dst$$XMMRegister, const_addr); |
| __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Replicate integer (4 byte) scalar zero to be vector |
| instruct ReplI_zero(vec dst, immI_0 zero) %{ |
| match(Set dst (ReplicateI zero)); |
| format %{ "replicateI $dst,$zero" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 4) { |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| } else { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct ReplI_M1(vec dst, immI_M1 con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (ReplicateB con)); |
| match(Set dst (ReplicateS con)); |
| match(Set dst (ReplicateI con)); |
| effect(TEMP dst); |
| format %{ "vallones $dst" %} |
| ins_encode %{ |
| int vector_len = vector_length_encoding(this); |
| __ vallones($dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ====================ReplicateL======================================= |
| |
| #ifdef _LP64 |
| // Replicate long (8 byte) scalar to be vector |
| instruct ReplL_reg(vec dst, rRegL src) %{ |
| match(Set dst (ReplicateL src)); |
| format %{ "replicateL $dst,$src" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 2) { |
| __ movdq($dst$$XMMRegister, $src$$Register); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands |
| int vlen_enc = vector_length_encoding(this); |
| __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); |
| } else if (VM_Version::supports_avx2()) { |
| assert(vlen == 4, "sanity"); |
| int vlen_enc = vector_length_encoding(this); |
| __ movdq($dst$$XMMRegister, $src$$Register); |
| __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } else { |
| assert(vlen == 4, "sanity"); |
| __ movdq($dst$$XMMRegister, $src$$Register); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #else // _LP64 |
| // Replicate long (8 byte) scalar to be vector |
| instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ |
| predicate(vector_length(n) <= 4); |
| match(Set dst (ReplicateL src)); |
| effect(TEMP dst, USE src, TEMP tmp); |
| format %{ "replicateL $dst,$src" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 2) { |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands |
| int vlen_enc = Assembler::AVX_256bit; |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } else { |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ |
| predicate(vector_length(n) == 8); |
| match(Set dst (ReplicateL src)); |
| effect(TEMP dst, USE src, TEMP tmp); |
| format %{ "replicateL $dst,$src" %} |
| ins_encode %{ |
| if (VM_Version::supports_avx512vl()) { |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); |
| } else { |
| int vlen_enc = Assembler::AVX_512bit; |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif // _LP64 |
| |
| instruct ReplL_mem(vec dst, memory mem) %{ |
| match(Set dst (ReplicateL (LoadL mem))); |
| format %{ "replicateL $dst,$mem" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 2) { |
| __ movq($dst$$XMMRegister, $mem$$Address); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| } else { |
| assert(VM_Version::supports_avx2(), "sanity"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Replicate long (8 byte) scalar immediate to be vector by loading from const table. |
| instruct ReplL_imm(vec dst, immL con) %{ |
| match(Set dst (ReplicateL con)); |
| format %{ "replicateL $dst,$con" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| InternalAddress const_addr = $constantaddress($con); |
| if (vlen == 2) { |
| __ movq($dst$$XMMRegister, const_addr); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| } else { |
| assert(VM_Version::supports_avx2(), "sanity"); |
| int vlen_enc = vector_length_encoding(this); |
| __ movq($dst$$XMMRegister, const_addr); |
| __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplL_zero(vec dst, immL0 zero) %{ |
| match(Set dst (ReplicateL zero)); |
| format %{ "replicateL $dst,$zero" %} |
| ins_encode %{ |
| int vlen = vector_length(this); |
| if (vlen == 2) { |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| } else { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct ReplL_M1(vec dst, immL_M1 con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (ReplicateL con)); |
| effect(TEMP dst); |
| format %{ "vallones $dst" %} |
| ins_encode %{ |
| int vector_len = vector_length_encoding(this); |
| __ vallones($dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ====================ReplicateF======================================= |
| |
| instruct ReplF_reg(vec dst, vlRegF src) %{ |
| match(Set dst (ReplicateF src)); |
| format %{ "replicateF $dst,$src" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 4) { |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); |
| } else if (VM_Version::supports_avx2()) { |
| int vlen_enc = vector_length_encoding(this); |
| __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 |
| } else { |
| assert(vlen == 8, "sanity"); |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); |
| __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplF_mem(vec dst, memory mem) %{ |
| match(Set dst (ReplicateF (LoadF mem))); |
| format %{ "replicateF $dst,$mem" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 4) { |
| __ movdl($dst$$XMMRegister, $mem$$Address); |
| __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| } else { |
| assert(VM_Version::supports_avx(), "sanity"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplF_zero(vec dst, immF0 zero) %{ |
| match(Set dst (ReplicateF zero)); |
| format %{ "replicateF $dst,$zero" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 4) { |
| __ xorps($dst$$XMMRegister, $dst$$XMMRegister); |
| } else { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ |
| } |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // ====================ReplicateD======================================= |
| |
| // Replicate double (8 bytes) scalar to be vector |
| instruct ReplD_reg(vec dst, vlRegD src) %{ |
| match(Set dst (ReplicateD src)); |
| format %{ "replicateD $dst,$src" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 2) { |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); |
| } else if (VM_Version::supports_avx2()) { |
| int vlen_enc = vector_length_encoding(this); |
| __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 |
| } else { |
| assert(vlen == 4, "sanity"); |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); |
| __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplD_mem(vec dst, memory mem) %{ |
| match(Set dst (ReplicateD (LoadD mem))); |
| format %{ "replicateD $dst,$mem" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 2) { |
| __ movq($dst$$XMMRegister, $mem$$Address); |
| __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); |
| } else { |
| assert(VM_Version::supports_avx(), "sanity"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct ReplD_zero(vec dst, immD0 zero) %{ |
| match(Set dst (ReplicateD zero)); |
| format %{ "replicateD $dst,$zero" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 2) { |
| __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); |
| } else { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ |
| } |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // ====================VECTOR INSERT======================================= |
| |
| instruct insert(vec dst, rRegI val, immU8 idx) %{ |
| predicate(vector_length_in_bytes(n) < 32); |
| match(Set dst (VectorInsert (Binary dst val) idx)); |
| format %{ "vector_insert $dst,$val,$idx" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| assert(vector_length_in_bytes(this) >= 8, "required"); |
| |
| BasicType elem_bt = vector_element_basic_type(this); |
| |
| assert(is_integral_type(elem_bt), ""); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ |
| predicate(vector_length_in_bytes(n) == 32); |
| match(Set dst (VectorInsert (Binary src val) idx)); |
| effect(TEMP vtmp); |
| format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_256bit; |
| BasicType elem_bt = vector_element_basic_type(this); |
| int elem_per_lane = 16/type2aelembytes(elem_bt); |
| int log2epr = log2(elem_per_lane); |
| |
| assert(is_integral_type(elem_bt), "sanity"); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| uint x_idx = $idx$$constant & right_n_bits(log2epr); |
| uint y_idx = ($idx$$constant >> log2epr) & 1; |
| __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); |
| __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); |
| __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ |
| predicate(vector_length_in_bytes(n) == 64); |
| match(Set dst (VectorInsert (Binary src val) idx)); |
| effect(TEMP vtmp); |
| format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "sanity"); |
| |
| BasicType elem_bt = vector_element_basic_type(this); |
| int elem_per_lane = 16/type2aelembytes(elem_bt); |
| int log2epr = log2(elem_per_lane); |
| |
| assert(is_integral_type(elem_bt), ""); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| uint x_idx = $idx$$constant & right_n_bits(log2epr); |
| uint y_idx = ($idx$$constant >> log2epr) & 3; |
| __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); |
| __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); |
| __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| #ifdef _LP64 |
| instruct insert2L(vec dst, rRegL val, immU8 idx) %{ |
| predicate(vector_length(n) == 2); |
| match(Set dst (VectorInsert (Binary dst val) idx)); |
| format %{ "vector_insert $dst,$val,$idx" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| assert(vector_element_basic_type(this) == T_LONG, ""); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ |
| predicate(vector_length(n) == 4); |
| match(Set dst (VectorInsert (Binary src val) idx)); |
| effect(TEMP vtmp); |
| format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} |
| ins_encode %{ |
| assert(vector_element_basic_type(this) == T_LONG, ""); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| uint x_idx = $idx$$constant & right_n_bits(1); |
| uint y_idx = ($idx$$constant >> 1) & 1; |
| int vlen_enc = Assembler::AVX_256bit; |
| __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); |
| __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); |
| __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ |
| predicate(vector_length(n) == 8); |
| match(Set dst (VectorInsert (Binary src val) idx)); |
| effect(TEMP vtmp); |
| format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} |
| ins_encode %{ |
| assert(vector_element_basic_type(this) == T_LONG, "sanity"); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| uint x_idx = $idx$$constant & right_n_bits(1); |
| uint y_idx = ($idx$$constant >> 1) & 3; |
| __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); |
| __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); |
| __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif |
| |
| instruct insertF(vec dst, regF val, immU8 idx) %{ |
| predicate(vector_length(n) < 8); |
| match(Set dst (VectorInsert (Binary dst val) idx)); |
| format %{ "vector_insert $dst,$val,$idx" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "sanity"); |
| |
| assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ |
| predicate(vector_length(n) >= 8); |
| match(Set dst (VectorInsert (Binary src val) idx)); |
| effect(TEMP vtmp); |
| format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} |
| ins_encode %{ |
| assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| int vlen = vector_length(this); |
| uint x_idx = $idx$$constant & right_n_bits(2); |
| if (vlen == 8) { |
| uint y_idx = ($idx$$constant >> 2) & 1; |
| int vlen_enc = Assembler::AVX_256bit; |
| __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); |
| __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx); |
| __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); |
| } else { |
| assert(vlen == 16, "sanity"); |
| uint y_idx = ($idx$$constant >> 2) & 3; |
| __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); |
| __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx); |
| __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| #ifdef _LP64 |
| instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ |
| predicate(vector_length(n) == 2); |
| match(Set dst (VectorInsert (Binary dst val) idx)); |
| effect(TEMP tmp); |
| format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "sanity"); |
| assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| __ movq($tmp$$Register, $val$$XMMRegister); |
| __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ |
| predicate(vector_length(n) == 4); |
| match(Set dst (VectorInsert (Binary src val) idx)); |
| effect(TEMP vtmp, TEMP tmp); |
| format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} |
| ins_encode %{ |
| assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| uint x_idx = $idx$$constant & right_n_bits(1); |
| uint y_idx = ($idx$$constant >> 1) & 1; |
| int vlen_enc = Assembler::AVX_256bit; |
| __ movq($tmp$$Register, $val$$XMMRegister); |
| __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); |
| __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); |
| __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ |
| predicate(vector_length(n) == 8); |
| match(Set dst (VectorInsert (Binary src val) idx)); |
| effect(TEMP tmp, TEMP vtmp); |
| format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} |
| ins_encode %{ |
| assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); |
| assert($idx$$constant < (int)vector_length(this), "out of bounds"); |
| |
| uint x_idx = $idx$$constant & right_n_bits(1); |
| uint y_idx = ($idx$$constant >> 1) & 3; |
| __ movq($tmp$$Register, $val$$XMMRegister); |
| __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); |
| __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); |
| __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif |
| |
| // ====================REDUCTION ARITHMETIC======================================= |
| |
| // =======================Int Reduction========================================== |
| |
| instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_INT); // src2 |
| match(Set dst (AddReductionVI src1 src2)); |
| match(Set dst (MulReductionVI src1 src2)); |
| match(Set dst (AndReductionV src1 src2)); |
| match(Set dst ( OrReductionV src1 src2)); |
| match(Set dst (XorReductionV src1 src2)); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // =======================Long Reduction========================================== |
| |
| #ifdef _LP64 |
| instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); |
| match(Set dst (AddReductionVL src1 src2)); |
| match(Set dst (MulReductionVL src1 src2)); |
| match(Set dst (AndReductionV src1 src2)); |
| match(Set dst ( OrReductionV src1 src2)); |
| match(Set dst (XorReductionV src1 src2)); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); |
| match(Set dst (AddReductionVL src1 src2)); |
| match(Set dst (MulReductionVL src1 src2)); |
| match(Set dst (AndReductionV src1 src2)); |
| match(Set dst ( OrReductionV src1 src2)); |
| match(Set dst (XorReductionV src1 src2)); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif // _LP64 |
| |
| // =======================Float Reduction========================================== |
| |
| instruct reductionF128(regF dst, vec src, vec vtmp) %{ |
| predicate(vector_length(n->in(2)) <= 4); // src |
| match(Set dst (AddReductionVF dst src)); |
| match(Set dst (MulReductionVF dst src)); |
| effect(TEMP dst, TEMP vtmp); |
| format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ |
| predicate(vector_length(n->in(2)) == 8); // src |
| match(Set dst (AddReductionVF dst src)); |
| match(Set dst (MulReductionVF dst src)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ |
| predicate(vector_length(n->in(2)) == 16); // src |
| match(Set dst (AddReductionVF dst src)); |
| match(Set dst (MulReductionVF dst src)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // =======================Double Reduction========================================== |
| |
| instruct reduction2D(regD dst, vec src, vec vtmp) %{ |
| predicate(vector_length(n->in(2)) == 2); // src |
| match(Set dst (AddReductionVD dst src)); |
| match(Set dst (MulReductionVD dst src)); |
| effect(TEMP dst, TEMP vtmp); |
| format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ |
| predicate(vector_length(n->in(2)) == 4); // src |
| match(Set dst (AddReductionVD dst src)); |
| match(Set dst (MulReductionVD dst src)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ |
| predicate(vector_length(n->in(2)) == 8); // src |
| match(Set dst (AddReductionVD dst src)); |
| match(Set dst (MulReductionVD dst src)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // =======================Byte Reduction========================================== |
| |
| #ifdef _LP64 |
| instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); |
| match(Set dst (AddReductionVI src1 src2)); |
| match(Set dst (AndReductionV src1 src2)); |
| match(Set dst ( OrReductionV src1 src2)); |
| match(Set dst (XorReductionV src1 src2)); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); |
| match(Set dst (AddReductionVI src1 src2)); |
| match(Set dst (AndReductionV src1 src2)); |
| match(Set dst ( OrReductionV src1 src2)); |
| match(Set dst (XorReductionV src1 src2)); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif |
| |
| // =======================Short Reduction========================================== |
| |
| instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_SHORT); // src2 |
| match(Set dst (AddReductionVI src1 src2)); |
| match(Set dst (MulReductionVI src1 src2)); |
| match(Set dst (AndReductionV src1 src2)); |
| match(Set dst ( OrReductionV src1 src2)); |
| match(Set dst (XorReductionV src1 src2)); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // =======================Mul Reduction========================================== |
| |
| instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_BYTE && |
| vector_length(n->in(2)) <= 32); // src2 |
| match(Set dst (MulReductionVI src1 src2)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_BYTE && |
| vector_length(n->in(2)) == 64); // src2 |
| match(Set dst (MulReductionVI src1 src2)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); |
| format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //--------------------Min/Max Float Reduction -------------------- |
| // Float Min Reduction |
| instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, |
| legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && |
| ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || |
| (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && |
| vector_length(n->in(2)) == 2); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); |
| format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "sanity"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, |
| $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, |
| legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && |
| ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || |
| (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && |
| vector_length(n->in(2)) >= 4); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); |
| format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "sanity"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, |
| $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, |
| legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && |
| vector_length(n->in(2)) == 2); |
| match(Set dst (MinReductionV dst src)); |
| match(Set dst (MaxReductionV dst src)); |
| effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); |
| format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "sanity"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, |
| $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, |
| legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && |
| vector_length(n->in(2)) >= 4); |
| match(Set dst (MinReductionV dst src)); |
| match(Set dst (MaxReductionV dst src)); |
| effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); |
| format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "sanity"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, |
| $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| //--------------------Min Double Reduction -------------------- |
| instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, |
| legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs |
| rFlagsReg cr) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && |
| ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || |
| (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && |
| vector_length(n->in(2)) == 2); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); |
| format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "sanity"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, |
| $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, |
| legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs |
| rFlagsReg cr) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && |
| ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || |
| (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && |
| vector_length(n->in(2)) >= 4); |
| match(Set dst (MinReductionV src1 src2)); |
| match(Set dst (MaxReductionV src1 src2)); |
| effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); |
| format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "sanity"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src2); |
| __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, |
| $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| instruct minmax_reduction2D_av(legRegD dst, legVec src, |
| legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs |
| rFlagsReg cr) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && |
| vector_length(n->in(2)) == 2); |
| match(Set dst (MinReductionV dst src)); |
| match(Set dst (MaxReductionV dst src)); |
| effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); |
| format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "sanity"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, |
| $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct minmax_reductionD_av(legRegD dst, legVec src, |
| legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs |
| rFlagsReg cr) %{ |
| predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && |
| vector_length(n->in(2)) >= 4); |
| match(Set dst (MinReductionV dst src)); |
| match(Set dst (MaxReductionV dst src)); |
| effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); |
| format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "sanity"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this, $src); |
| __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, |
| $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ====================VECTOR ARITHMETIC======================================= |
| |
| // --------------------------------- ADD -------------------------------------- |
| |
| // Bytes vector add |
| instruct vaddB(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (AddVB dst src)); |
| format %{ "paddb $dst,$src\t! add packedB" %} |
| ins_encode %{ |
| __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddB_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVB src1 src2)); |
| format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddB_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVB src (LoadVector mem))); |
| format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Shorts/Chars vector add |
| instruct vaddS(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (AddVS dst src)); |
| format %{ "paddw $dst,$src\t! add packedS" %} |
| ins_encode %{ |
| __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddS_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVS src1 src2)); |
| format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddS_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVS src (LoadVector mem))); |
| format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector add |
| instruct vaddI(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (AddVI dst src)); |
| format %{ "paddd $dst,$src\t! add packedI" %} |
| ins_encode %{ |
| __ paddd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddI_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVI src1 src2)); |
| format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| instruct vaddI_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVI src (LoadVector mem))); |
| format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Longs vector add |
| instruct vaddL(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (AddVL dst src)); |
| format %{ "paddq $dst,$src\t! add packedL" %} |
| ins_encode %{ |
| __ paddq($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddL_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVL src1 src2)); |
| format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddL_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVL src (LoadVector mem))); |
| format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Floats vector add |
| instruct vaddF(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (AddVF dst src)); |
| format %{ "addps $dst,$src\t! add packedF" %} |
| ins_encode %{ |
| __ addps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddF_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVF src1 src2)); |
| format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddF_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVF src (LoadVector mem))); |
| format %{ "vaddps $dst,$src,$mem\t! add packedF" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Doubles vector add |
| instruct vaddD(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (AddVD dst src)); |
| format %{ "addpd $dst,$src\t! add packedD" %} |
| ins_encode %{ |
| __ addpd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddD_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVD src1 src2)); |
| format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vaddD_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddVD src (LoadVector mem))); |
| format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- SUB -------------------------------------- |
| |
| // Bytes vector sub |
| instruct vsubB(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (SubVB dst src)); |
| format %{ "psubb $dst,$src\t! sub packedB" %} |
| ins_encode %{ |
| __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubB_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVB src1 src2)); |
| format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubB_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVB src (LoadVector mem))); |
| format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Shorts/Chars vector sub |
| instruct vsubS(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (SubVS dst src)); |
| format %{ "psubw $dst,$src\t! sub packedS" %} |
| ins_encode %{ |
| __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| instruct vsubS_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVS src1 src2)); |
| format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubS_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVS src (LoadVector mem))); |
| format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector sub |
| instruct vsubI(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (SubVI dst src)); |
| format %{ "psubd $dst,$src\t! sub packedI" %} |
| ins_encode %{ |
| __ psubd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubI_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVI src1 src2)); |
| format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubI_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVI src (LoadVector mem))); |
| format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Longs vector sub |
| instruct vsubL(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (SubVL dst src)); |
| format %{ "psubq $dst,$src\t! sub packedL" %} |
| ins_encode %{ |
| __ psubq($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubL_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVL src1 src2)); |
| format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| instruct vsubL_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVL src (LoadVector mem))); |
| format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Floats vector sub |
| instruct vsubF(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (SubVF dst src)); |
| format %{ "subps $dst,$src\t! sub packedF" %} |
| ins_encode %{ |
| __ subps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubF_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVF src1 src2)); |
| format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubF_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVF src (LoadVector mem))); |
| format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Doubles vector sub |
| instruct vsubD(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (SubVD dst src)); |
| format %{ "subpd $dst,$src\t! sub packedD" %} |
| ins_encode %{ |
| __ subpd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubD_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVD src1 src2)); |
| format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsubD_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubVD src (LoadVector mem))); |
| format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- MUL -------------------------------------- |
| |
| // Byte vector mul |
| instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ |
| predicate(vector_length(n) == 4 || |
| vector_length(n) == 8); |
| match(Set dst (MulVB src1 src2)); |
| effect(TEMP dst, TEMP tmp, TEMP scratch); |
| format %{"vector_mulB $dst,$src1,$src2" %} |
| ins_encode %{ |
| assert(UseSSE > 3, "required"); |
| __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); |
| __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); |
| __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); |
| __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); |
| __ pand($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ |
| predicate(vector_length(n) == 16 && UseAVX <= 1); |
| match(Set dst (MulVB src1 src2)); |
| effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); |
| format %{"vector_mulB $dst,$src1,$src2" %} |
| ins_encode %{ |
| assert(UseSSE > 3, "required"); |
| __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); |
| __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); |
| __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); |
| __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); |
| __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); |
| __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); |
| __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); |
| __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); |
| __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); |
| __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); |
| __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ |
| predicate(vector_length(n) == 16 && UseAVX > 1); |
| match(Set dst (MulVB src1 src2)); |
| effect(TEMP dst, TEMP tmp, TEMP scratch); |
| format %{"vector_mulB $dst,$src1,$src2" %} |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_256bit; |
| __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); |
| __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); |
| __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); |
| __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ |
| predicate(vector_length(n) == 32); |
| match(Set dst (MulVB src1 src2)); |
| effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); |
| format %{"vector_mulB $dst,$src1,$src2" %} |
| ins_encode %{ |
| assert(UseAVX > 1, "required"); |
| int vlen_enc = Assembler::AVX_256bit; |
| __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); |
| __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); |
| __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); |
| __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); |
| __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); |
| __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); |
| __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ |
| predicate(vector_length(n) == 64); |
| match(Set dst (MulVB src1 src2)); |
| effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); |
| format %{"vector_mulB $dst,$src1,$src2\n\t" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| int vlen_enc = Assembler::AVX_512bit; |
| __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); |
| __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); |
| __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); |
| __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); |
| __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); |
| __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); |
| __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); |
| __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Shorts/Chars vector mul |
| instruct vmulS(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (MulVS dst src)); |
| format %{ "pmullw $dst,$src\t! mul packedS" %} |
| ins_encode %{ |
| __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmulS_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulVS src1 src2)); |
| format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmulS_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulVS src (LoadVector mem))); |
| format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector mul |
| instruct vmulI(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (MulVI dst src)); |
| format %{ "pmulld $dst,$src\t! mul packedI" %} |
| ins_encode %{ |
| assert(UseSSE > 3, "required"); |
| __ pmulld($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmulI_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulVI src1 src2)); |
| format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmulI_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulVI src (LoadVector mem))); |
| format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Longs vector mul |
| instruct vmulL_reg(vec dst, vec src1, vec src2) %{ |
| predicate(VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL src1 src2)); |
| format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmulL_mem(vec dst, vec src, memory mem) %{ |
| predicate(VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL src (LoadVector mem))); |
| format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct mul2L_reg(vec dst, vec src2, legVec tmp) %{ |
| predicate(vector_length(n) == 2 && !VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL dst src2)); |
| effect(TEMP dst, TEMP tmp); |
| format %{ "pshufd $tmp,$src2, 177\n\t" |
| "pmulld $tmp,$dst\n\t" |
| "phaddd $tmp,$tmp\n\t" |
| "pmovzxdq $tmp,$tmp\n\t" |
| "psllq $tmp, 32\n\t" |
| "pmuludq $dst,$src2\n\t" |
| "paddq $dst,$tmp\n\t! mul packed2L" %} |
| |
| ins_encode %{ |
| assert(VM_Version::supports_sse4_1(), "required"); |
| int vlen_enc = Assembler::AVX_128bit; |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); |
| __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); |
| __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); |
| __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); |
| __ psllq($tmp$$XMMRegister, 32); |
| __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); |
| __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, legVec tmp, legVec tmp1) %{ |
| predicate(vector_length(n) == 4 && !VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL src1 src2)); |
| effect(TEMP tmp1, TEMP tmp); |
| format %{ "vpshufd $tmp,$src2\n\t" |
| "vpmulld $tmp,$src1,$tmp\n\t" |
| "vphaddd $tmp,$tmp,$tmp\n\t" |
| "vpmovzxdq $tmp,$tmp\n\t" |
| "vpsllq $tmp,$tmp\n\t" |
| "vpmuludq $tmp1,$src1,$src2\n\t" |
| "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_256bit; |
| __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vlen_enc); |
| __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vlen_enc); |
| __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); |
| __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); |
| __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); |
| __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vlen_enc); |
| __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Floats vector mul |
| instruct vmulF(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (MulVF dst src)); |
| format %{ "mulps $dst,$src\t! mul packedF" %} |
| ins_encode %{ |
| __ mulps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmulF_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulVF src1 src2)); |
| format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmulF_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulVF src (LoadVector mem))); |
| format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Doubles vector mul |
| instruct vmulD(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (MulVD dst src)); |
| format %{ "mulpd $dst,$src\t! mul packedD" %} |
| ins_encode %{ |
| __ mulpd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmulD_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulVD src1 src2)); |
| format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmulD_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulVD src (LoadVector mem))); |
| format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ |
| predicate(vector_length(n) == 8); |
| match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); |
| effect(TEMP dst, USE src1, USE src2); |
| format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" |
| "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" |
| %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| int vlen_enc = Assembler::AVX_256bit; |
| int cond = (Assembler::Condition)($copnd$$cmpcode); |
| __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); |
| __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ |
| predicate(vector_length(n) == 4); |
| match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); |
| effect(TEMP dst, USE src1, USE src2); |
| format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" |
| "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" |
| %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| int vlen_enc = Assembler::AVX_256bit; |
| int cond = (Assembler::Condition)($copnd$$cmpcode); |
| __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); |
| __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- DIV -------------------------------------- |
| |
| // Floats vector div |
| instruct vdivF(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (DivVF dst src)); |
| format %{ "divps $dst,$src\t! div packedF" %} |
| ins_encode %{ |
| __ divps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdivF_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivVF src1 src2)); |
| format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdivF_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivVF src (LoadVector mem))); |
| format %{ "vdivps $dst,$src,$mem\t! div packedF" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Doubles vector div |
| instruct vdivD(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (DivVD dst src)); |
| format %{ "divpd $dst,$src\t! div packedD" %} |
| ins_encode %{ |
| __ divpd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdivD_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivVD src1 src2)); |
| format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdivD_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivVD src (LoadVector mem))); |
| format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ------------------------------ MinMax --------------------------------------- |
| |
| // Byte, Short, Int vector Min/Max |
| instruct minmax_reg_sse(vec dst, vec src) %{ |
| predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT |
| UseAVX == 0); |
| match(Set dst (MinV dst src)); |
| match(Set dst (MaxV dst src)); |
| format %{ "vector_minmax $dst,$src\t! " %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| BasicType elem_bt = vector_element_basic_type(this); |
| __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vminmax_reg(vec dst, vec src1, vec src2) %{ |
| predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT |
| UseAVX > 0); |
| match(Set dst (MinV src1 src2)); |
| match(Set dst (MaxV src1 src2)); |
| format %{ "vector_minmax $dst,$src1,$src2\t! " %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = vector_length_encoding(this); |
| BasicType elem_bt = vector_element_basic_type(this); |
| |
| __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Long vector Min/Max |
| instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ |
| predicate(vector_length_in_bytes(n) == 16 && vector_element_basic_type(n) == T_LONG && |
| UseAVX == 0); |
| match(Set dst (MinV dst src)); |
| match(Set dst (MaxV src dst)); |
| effect(TEMP dst, TEMP tmp); |
| format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| BasicType elem_bt = vector_element_basic_type(this); |
| assert(elem_bt == T_LONG, "sanity"); |
| |
| __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ |
| predicate(vector_length_in_bytes(n) <= 32 && vector_element_basic_type(n) == T_LONG && |
| UseAVX > 0 && !VM_Version::supports_avx512vl()); |
| match(Set dst (MinV src1 src2)); |
| match(Set dst (MaxV src1 src2)); |
| effect(TEMP dst); |
| format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| int opcode = this->ideal_Opcode(); |
| BasicType elem_bt = vector_element_basic_type(this); |
| assert(elem_bt == T_LONG, "sanity"); |
| |
| __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ |
| predicate((vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && |
| vector_element_basic_type(n) == T_LONG); |
| match(Set dst (MinV src1 src2)); |
| match(Set dst (MaxV src1 src2)); |
| format %{ "vector_minmaxL $dst,$src1,src2\t! " %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| |
| int vlen_enc = vector_length_encoding(this); |
| int opcode = this->ideal_Opcode(); |
| BasicType elem_bt = vector_element_basic_type(this); |
| assert(elem_bt == T_LONG, "sanity"); |
| |
| __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Float/Double vector Min/Max |
| instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ |
| predicate(vector_length_in_bytes(n) <= 32 && |
| is_floating_point_type(vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE |
| UseAVX > 0); |
| match(Set dst (MinV a b)); |
| match(Set dst (MaxV a b)); |
| effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); |
| format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = vector_length_encoding(this); |
| BasicType elem_bt = vector_element_basic_type(this); |
| |
| __ vminmax_fp(opcode, elem_bt, |
| $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, |
| $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp) %{ |
| predicate(vector_length_in_bytes(n) == 64 && |
| is_floating_point_type(vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE |
| match(Set dst (MinV a b)); |
| match(Set dst (MaxV a b)); |
| effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp); |
| format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = vector_length_encoding(this); |
| BasicType elem_bt = vector_element_basic_type(this); |
| |
| KRegister ktmp = k1; |
| __ evminmax_fp(opcode, elem_bt, |
| $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, |
| ktmp, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- Sqrt -------------------------------------- |
| |
| instruct vsqrtF_reg(vec dst, vec src) %{ |
| match(Set dst (SqrtVF src)); |
| format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsqrtF_mem(vec dst, memory mem) %{ |
| match(Set dst (SqrtVF (LoadVector mem))); |
| format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Floating point vector sqrt |
| instruct vsqrtD_reg(vec dst, vec src) %{ |
| match(Set dst (SqrtVD src)); |
| format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsqrtD_mem(vec dst, memory mem) %{ |
| match(Set dst (SqrtVD (LoadVector mem))); |
| format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ------------------------------ Shift --------------------------------------- |
| |
| // Left and right shift count vectors are the same on x86 |
| // (only lowest bits of xmm reg are used for count). |
| instruct vshiftcnt(vec dst, rRegI cnt) %{ |
| match(Set dst (LShiftCntV cnt)); |
| match(Set dst (RShiftCntV cnt)); |
| format %{ "movdl $dst,$cnt\t! load shift count" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $cnt$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Byte vector shift |
| instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ |
| predicate(vector_length(n) <= 8 && VectorNode::is_vshift_cnt(n->in(2))); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst ( RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); |
| format %{"vector_byte_shift $dst,$src,$shift" %} |
| ins_encode %{ |
| assert(UseSSE > 3, "required"); |
| int opcode = this->ideal_Opcode(); |
| bool sign = (opcode != Op_URShiftVB); |
| __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); |
| __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); |
| __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); |
| __ pand($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ |
| predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && |
| UseAVX <= 1); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst ( RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); |
| format %{"vector_byte_shift $dst,$src,$shift" %} |
| ins_encode %{ |
| assert(UseSSE > 3, "required"); |
| int opcode = this->ideal_Opcode(); |
| bool sign = (opcode != Op_URShiftVB); |
| __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); |
| __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); |
| __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); |
| __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); |
| __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); |
| __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); |
| __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); |
| __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ |
| predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && |
| UseAVX > 1); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst ( RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, TEMP tmp, TEMP scratch); |
| format %{"vector_byte_shift $dst,$src,$shift" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| bool sign = (opcode != Op_URShiftVB); |
| int vlen_enc = Assembler::AVX_256bit; |
| __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); |
| __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ |
| predicate(vector_length(n) == 32 && VectorNode::is_vshift_cnt(n->in(2))); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst ( RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, TEMP tmp, TEMP scratch); |
| format %{"vector_byte_shift $dst,$src,$shift" %} |
| ins_encode %{ |
| assert(UseAVX > 1, "required"); |
| int opcode = this->ideal_Opcode(); |
| bool sign = (opcode != Op_URShiftVB); |
| int vlen_enc = Assembler::AVX_256bit; |
| __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); |
| __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); |
| __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); |
| __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); |
| __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ |
| predicate(vector_length(n) == 64 && VectorNode::is_vshift_cnt(n->in(2))); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst (RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); |
| format %{"vector_byte_shift $dst,$src,$shift" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| int opcode = this->ideal_Opcode(); |
| bool sign = (opcode != Op_URShiftVB); |
| int vlen_enc = Assembler::AVX_512bit; |
| __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); |
| __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); |
| __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); |
| __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); |
| __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); |
| __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Shorts vector logical right shift produces incorrect Java result |
| // for negative data because java code convert short value into int with |
| // sign extension before a shift. But char vectors are fine since chars are |
| // unsigned values. |
| // Shorts/Chars vector left shift |
| instruct vshiftS(vec dst, vec src, vec shift) %{ |
| predicate(VectorNode::is_vshift_cnt(n->in(2))); |
| match(Set dst ( LShiftVS src shift)); |
| match(Set dst ( RShiftVS src shift)); |
| match(Set dst (URShiftVS src shift)); |
| effect(TEMP dst, USE src, USE shift); |
| format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| if (UseAVX > 0) { |
| int vlen_enc = vector_length_encoding(this); |
| __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| } else { |
| int vlen = vector_length(this); |
| if (vlen == 2) { |
| __ movflt($dst$$XMMRegister, $src$$XMMRegister); |
| __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); |
| } else if (vlen == 4) { |
| __ movdbl($dst$$XMMRegister, $src$$XMMRegister); |
| __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); |
| } else { |
| assert (vlen == 8, "sanity"); |
| __ movdqu($dst$$XMMRegister, $src$$XMMRegister); |
| __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); |
| } |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector left shift |
| instruct vshiftI(vec dst, vec src, vec shift) %{ |
| predicate(VectorNode::is_vshift_cnt(n->in(2))); |
| match(Set dst ( LShiftVI src shift)); |
| match(Set dst ( RShiftVI src shift)); |
| match(Set dst (URShiftVI src shift)); |
| effect(TEMP dst, USE src, USE shift); |
| format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| if (UseAVX > 0) { |
| int vlen_enc = vector_length_encoding(this); |
| __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| } else { |
| int vlen = vector_length(this); |
| if (vlen == 2) { |
| __ movdbl($dst$$XMMRegister, $src$$XMMRegister); |
| __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); |
| } else { |
| assert(vlen == 4, "sanity"); |
| __ movdqu($dst$$XMMRegister, $src$$XMMRegister); |
| __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); |
| } |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector left constant shift |
| instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ |
| match(Set dst (LShiftVI src (LShiftCntV shift))); |
| match(Set dst (RShiftVI src (RShiftCntV shift))); |
| match(Set dst (URShiftVI src (RShiftCntV shift))); |
| format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| if (UseAVX > 0) { |
| int vector_len = vector_length_encoding(this); |
| __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); |
| } else { |
| int vlen = vector_length(this); |
| if (vlen == 2) { |
| __ movdbl($dst$$XMMRegister, $src$$XMMRegister); |
| __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); |
| } else { |
| assert(vlen == 4, "sanity"); |
| __ movdqu($dst$$XMMRegister, $src$$XMMRegister); |
| __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); |
| } |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Longs vector shift |
| instruct vshiftL(vec dst, vec src, vec shift) %{ |
| predicate(VectorNode::is_vshift_cnt(n->in(2))); |
| match(Set dst ( LShiftVL src shift)); |
| match(Set dst (URShiftVL src shift)); |
| effect(TEMP dst, USE src, USE shift); |
| format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| if (UseAVX > 0) { |
| int vlen_enc = vector_length_encoding(this); |
| __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| } else { |
| assert(vector_length(this) == 2, ""); |
| __ movdqu($dst$$XMMRegister, $src$$XMMRegister); |
| __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Longs vector constant shift |
| instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ |
| match(Set dst (LShiftVL src (LShiftCntV shift))); |
| match(Set dst (URShiftVL src (RShiftCntV shift))); |
| format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| if (UseAVX > 0) { |
| int vector_len = vector_length_encoding(this); |
| __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); |
| } else { |
| assert(vector_length(this) == 2, ""); |
| __ movdqu($dst$$XMMRegister, $src$$XMMRegister); |
| __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // -------------------ArithmeticRightShift ----------------------------------- |
| // Long vector arithmetic right shift |
| instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ |
| predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX <= 2); |
| match(Set dst (RShiftVL src shift)); |
| effect(TEMP dst, TEMP tmp, TEMP scratch); |
| format %{ "vshiftq $dst,$src,$shift" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen == 2) { |
| assert(UseSSE >= 2, "required"); |
| __ movdqu($dst$$XMMRegister, $src$$XMMRegister); |
| __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); |
| __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); |
| __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); |
| __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); |
| } else { |
| assert(vlen == 4, "sanity"); |
| assert(UseAVX > 1, "required"); |
| int vlen_enc = Assembler::AVX_256bit; |
| __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); |
| __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); |
| __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ |
| predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX > 2); |
| match(Set dst (RShiftVL src shift)); |
| format %{ "vshiftq $dst,$src,$shift" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ------------------- Variable Shift ----------------------------- |
| // Byte variable shift |
| instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ |
| predicate(vector_length(n) <= 8 && |
| !VectorNode::is_vshift_cnt(n->in(2)) && |
| !VM_Version::supports_avx512bw()); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst ( RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, TEMP vtmp, TEMP scratch); |
| format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = Assembler::AVX_128bit; |
| __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ |
| predicate(vector_length(n) == 16 && |
| !VectorNode::is_vshift_cnt(n->in(2)) && |
| !VM_Version::supports_avx512bw()); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst ( RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); |
| format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = Assembler::AVX_128bit; |
| // Shift lower half and get word result in dst |
| __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); |
| |
| // Shift upper half and get word result in vtmp1 |
| __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); |
| __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); |
| __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); |
| |
| // Merge and down convert the two word results to byte in dst |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{ |
| predicate(vector_length(n) == 32 && |
| !VectorNode::is_vshift_cnt(n->in(2)) && |
| !VM_Version::supports_avx512bw()); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst ( RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch); |
| format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = Assembler::AVX_128bit; |
| // Process lower 128 bits and get result in dst |
| __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); |
| __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); |
| __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); |
| __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); |
| |
| // Process higher 128 bits and get result in vtmp3 |
| __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); |
| __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); |
| __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register); |
| __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); |
| __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); |
| __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); |
| __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); |
| |
| // Merge the two results in dst |
| __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ |
| predicate(vector_length(n) <= 32 && |
| !VectorNode::is_vshift_cnt(n->in(2)) && |
| VM_Version::supports_avx512bw()); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst ( RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, TEMP vtmp, TEMP scratch); |
| format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = vector_length_encoding(this); |
| __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ |
| predicate(vector_length(n) == 64 && |
| !VectorNode::is_vshift_cnt(n->in(2)) && |
| VM_Version::supports_avx512bw()); |
| match(Set dst ( LShiftVB src shift)); |
| match(Set dst ( RShiftVB src shift)); |
| match(Set dst (URShiftVB src shift)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); |
| format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = Assembler::AVX_256bit; |
| __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); |
| __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); |
| __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); |
| __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); |
| __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Short variable shift |
| instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ |
| predicate(vector_length(n) <= 8 && |
| !VectorNode::is_vshift_cnt(n->in(2)) && |
| !VM_Version::supports_avx512bw()); |
| match(Set dst ( LShiftVS src shift)); |
| match(Set dst ( RShiftVS src shift)); |
| match(Set dst (URShiftVS src shift)); |
| effect(TEMP dst, TEMP vtmp, TEMP scratch); |
| format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| bool sign = (opcode != Op_URShiftVS); |
| int vlen_enc = Assembler::AVX_256bit; |
| __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); |
| __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); |
| __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); |
| __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); |
| __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); |
| __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ |
| predicate(vector_length(n) == 16 && |
| !VectorNode::is_vshift_cnt(n->in(2)) && |
| !VM_Version::supports_avx512bw()); |
| match(Set dst ( LShiftVS src shift)); |
| match(Set dst ( RShiftVS src shift)); |
| match(Set dst (URShiftVS src shift)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); |
| format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| bool sign = (opcode != Op_URShiftVS); |
| int vlen_enc = Assembler::AVX_256bit; |
| // Shift lower half, with result in vtmp2 usign vtmp1 as TEMP |
| __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); |
| __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); |
| |
| // Shift upper half, with result in dst usign vtmp1 as TEMP |
| __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); |
| __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); |
| __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); |
| __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); |
| __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); |
| |
| // Merge lower and upper half result into dst |
| __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ |
| predicate(!VectorNode::is_vshift_cnt(n->in(2)) && |
| VM_Version::supports_avx512bw()); |
| match(Set dst ( LShiftVS src shift)); |
| match(Set dst ( RShiftVS src shift)); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vector_varshift_short $dst,$src,$shift\t!" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = vector_length_encoding(this); |
| if (!VM_Version::supports_avx512vl()) { |
| vlen_enc = Assembler::AVX_512bit; |
| } |
| __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //Integer variable shift |
| instruct vshiftI_var(vec dst, vec src, vec shift) %{ |
| predicate(!VectorNode::is_vshift_cnt(n->in(2))); |
| match(Set dst ( LShiftVI src shift)); |
| match(Set dst ( RShiftVI src shift)); |
| match(Set dst (URShiftVI src shift)); |
| format %{ "vector_varshift_int $dst,$src,$shift\t!" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = vector_length_encoding(this); |
| __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //Long variable shift |
| instruct vshiftL_var(vec dst, vec src, vec shift) %{ |
| predicate(!VectorNode::is_vshift_cnt(n->in(2))); |
| match(Set dst ( LShiftVL src shift)); |
| match(Set dst (URShiftVL src shift)); |
| format %{ "vector_varshift_long $dst,$src,$shift\t!" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = vector_length_encoding(this); |
| __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //Long variable right shift arithmetic |
| instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ |
| predicate(vector_length(n) <= 4 && |
| !VectorNode::is_vshift_cnt(n->in(2)) && |
| UseAVX == 2); |
| match(Set dst (RShiftVL src shift)); |
| effect(TEMP dst, TEMP vtmp); |
| format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = vector_length_encoding(this); |
| __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, |
| $vtmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ |
| predicate(!VectorNode::is_vshift_cnt(n->in(2)) && |
| UseAVX > 2); |
| match(Set dst (RShiftVL src shift)); |
| format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen_enc = vector_length_encoding(this); |
| __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- AND -------------------------------------- |
| |
| instruct vand(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (AndV dst src)); |
| format %{ "pand $dst,$src\t! and vectors" %} |
| ins_encode %{ |
| __ pand($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AndV src1 src2)); |
| format %{ "vpand $dst,$src1,$src2\t! and vectors" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AndV src (LoadVector mem))); |
| format %{ "vpand $dst,$src,$mem\t! and vectors" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- OR --------------------------------------- |
| |
| instruct vor(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (OrV dst src)); |
| format %{ "por $dst,$src\t! or vectors" %} |
| ins_encode %{ |
| __ por($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (OrV src1 src2)); |
| format %{ "vpor $dst,$src1,$src2\t! or vectors" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (OrV src (LoadVector mem))); |
| format %{ "vpor $dst,$src,$mem\t! or vectors" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- XOR -------------------------------------- |
| |
| instruct vxor(vec dst, vec src) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (XorV dst src)); |
| format %{ "pxor $dst,$src\t! xor vectors" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor_reg(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (XorV src1 src2)); |
| format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor_mem(vec dst, vec src, memory mem) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (XorV src (LoadVector mem))); |
| format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- VectorCast -------------------------------------- |
| |
| instruct vcastBtoX(vec dst, vec src) %{ |
| match(Set dst (VectorCastB2X src)); |
| format %{ "vector_cast_b2x $dst,$src\t!" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| BasicType to_elem_bt = vector_element_basic_type(this); |
| int vlen_enc = vector_length_encoding(this); |
| switch (to_elem_bt) { |
| case T_SHORT: |
| __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| break; |
| case T_INT: |
| __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| break; |
| case T_FLOAT: |
| __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| break; |
| case T_LONG: |
| __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| break; |
| case T_DOUBLE: |
| __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| break; |
| |
| default: assert(false, "%s", type2name(to_elem_bt)); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct castStoX(vec dst, vec src, rRegP scratch) %{ |
| predicate(UseAVX <= 2 && |
| vector_length(n->in(1)) <= 8 && // src |
| vector_element_basic_type(n) == T_BYTE); |
| effect(TEMP scratch); |
| match(Set dst (VectorCastS2X src)); |
| format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register); |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ |
| predicate(UseAVX <= 2 && |
| vector_length(n->in(1)) == 16 && // src |
| vector_element_basic_type(n) == T_BYTE); |
| effect(TEMP dst, TEMP vtmp, TEMP scratch); |
| match(Set dst (VectorCastS2X src)); |
| format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| int vlen_enc = vector_length_encoding(vector_length_in_bytes(this, $src)); |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); |
| __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcastStoX_evex(vec dst, vec src) %{ |
| predicate(UseAVX > 2 || |
| (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src |
| match(Set dst (VectorCastS2X src)); |
| format %{ "vector_cast_s2x $dst,$src\t!" %} |
| ins_encode %{ |
| BasicType to_elem_bt = vector_element_basic_type(this); |
| int src_vlen_enc = vector_length_encoding(this, $src); |
| int vlen_enc = vector_length_encoding(this); |
| switch (to_elem_bt) { |
| case T_BYTE: |
| if (!VM_Version::supports_avx512vl()) { |
| vlen_enc = Assembler::AVX_512bit; |
| } |
| __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); |
| break; |
| case T_INT: |
| __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| break; |
| case T_FLOAT: |
| __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| break; |
| case T_LONG: |
| __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| break; |
| case T_DOUBLE: |
| __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct castItoX(vec dst, vec src, rRegP scratch) %{ |
| predicate(UseAVX <= 2 && |
| (vector_length_in_bytes(n->in(1)) <= 16) && |
| (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src |
| match(Set dst (VectorCastI2X src)); |
| format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %} |
| effect(TEMP scratch); |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| BasicType to_elem_bt = vector_element_basic_type(this); |
| int vlen_enc = vector_length_encoding(this, $src); |
| |
| if (to_elem_bt == T_BYTE) { |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); |
| __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } else { |
| assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); |
| __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ |
| predicate(UseAVX <= 2 && |
| (vector_length_in_bytes(n->in(1)) == 32) && |
| (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src |
| match(Set dst (VectorCastI2X src)); |
| format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %} |
| effect(TEMP dst, TEMP vtmp, TEMP scratch); |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| BasicType to_elem_bt = vector_element_basic_type(this); |
| int vlen_enc = vector_length_encoding(this, $src); |
| |
| if (to_elem_bt == T_BYTE) { |
| __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); |
| __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); |
| __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); |
| } else { |
| assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); |
| __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); |
| __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); |
| __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcastItoX_evex(vec dst, vec src) %{ |
| predicate(UseAVX > 2 || |
| (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src |
| match(Set dst (VectorCastI2X src)); |
| format %{ "vector_cast_i2x $dst,$src\t!" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| BasicType dst_elem_bt = vector_element_basic_type(this); |
| int src_vlen_enc = vector_length_encoding(this, $src); |
| int dst_vlen_enc = vector_length_encoding(this); |
| switch (dst_elem_bt) { |
| case T_BYTE: |
| if (!VM_Version::supports_avx512vl()) { |
| src_vlen_enc = Assembler::AVX_512bit; |
| } |
| __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); |
| break; |
| case T_SHORT: |
| if (!VM_Version::supports_avx512vl()) { |
| src_vlen_enc = Assembler::AVX_512bit; |
| } |
| __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); |
| break; |
| case T_FLOAT: |
| __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); |
| break; |
| case T_LONG: |
| __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); |
| break; |
| case T_DOUBLE: |
| __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ |
| predicate((vector_element_basic_type(n) == T_BYTE || vector_element_basic_type(n) == T_SHORT) && |
| UseAVX <= 2); |
| match(Set dst (VectorCastL2X src)); |
| effect(TEMP scratch); |
| format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 0, "required"); |
| |
| int vlen = vector_length_in_bytes(this, $src); |
| BasicType to_elem_bt = vector_element_basic_type(this); |
| AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) |
| : ExternalAddress(vector_int_to_short_mask()); |
| if (vlen <= 16) { |
| __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); |
| __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); |
| __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); |
| } else { |
| assert(vlen <= 32, "required"); |
| __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); |
| __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); |
| __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); |
| __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); |
| } |
| if (to_elem_bt == T_BYTE) { |
| __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcastLtoX_evex(vec dst, vec src) %{ |
| predicate(UseAVX > 2 || |
| (vector_element_basic_type(n) == T_INT || |
| vector_element_basic_type(n) == T_FLOAT || |
| vector_element_basic_type(n) == T_DOUBLE)); |
| match(Set dst (VectorCastL2X src)); |
| format %{ "vector_cast_l2x $dst,$src\t!" %} |
| ins_encode %{ |
| BasicType to_elem_bt = vector_element_basic_type(this); |
| int vlen = vector_length_in_bytes(this, $src); |
| int vlen_enc = vector_length_encoding(this, $src); |
| switch (to_elem_bt) { |
| case T_BYTE: |
| if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { |
| vlen_enc = Assembler::AVX_512bit; |
| } |
| __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| break; |
| case T_SHORT: |
| if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { |
| vlen_enc = Assembler::AVX_512bit; |
| } |
| __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| break; |
| case T_INT: |
| if (vlen == 8) { |
| if ($dst$$XMMRegister != $src$$XMMRegister) { |
| __ movflt($dst$$XMMRegister, $src$$XMMRegister); |
| } |
| } else if (vlen == 16) { |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); |
| } else if (vlen == 32) { |
| if (UseAVX > 2) { |
| if (!VM_Version::supports_avx512vl()) { |
| vlen_enc = Assembler::AVX_512bit; |
| } |
| __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| } else { |
| __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); |
| __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); |
| } |
| } else { // vlen == 64 |
| __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| } |
| break; |
| case T_FLOAT: |
| assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); |
| __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| break; |
| case T_DOUBLE: |
| assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); |
| __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| break; |
| |
| default: assert(false, "%s", type2name(to_elem_bt)); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcastFtoD_reg(vec dst, vec src) %{ |
| predicate(vector_element_basic_type(n) == T_DOUBLE); |
| match(Set dst (VectorCastF2X src)); |
| format %{ "vector_cast_f2x $dst,$src\t!" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcastDtoF_reg(vec dst, vec src) %{ |
| predicate(vector_element_basic_type(n) == T_FLOAT); |
| match(Set dst (VectorCastD2X src)); |
| format %{ "vector_cast_d2x $dst,$src\t!" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this, $src); |
| __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- VectorMaskCmp -------------------------------------- |
| |
| instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ |
| predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 |
| vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 |
| is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE |
| match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); |
| format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this, $src1); |
| Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); |
| if (vector_element_basic_type(this, $src1) == T_FLOAT) { |
| __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); |
| } else { |
| __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{ |
| predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 |
| is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE |
| match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); |
| effect(TEMP scratch); |
| format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_512bit; |
| Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); |
| KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. |
| KRegister mask = k0; // The comparison itself is not being masked. |
| if (vector_element_basic_type(this, $src1) == T_FLOAT) { |
| __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); |
| __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); |
| } else { |
| __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); |
| __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{ |
| predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 |
| vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 |
| is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 |
| match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); |
| effect(TEMP scratch); |
| format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this, $src1); |
| Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); |
| Assembler::Width ww = widthForType(vector_element_basic_type(this, $src1)); |
| __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{ |
| predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 |
| is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 |
| match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); |
| effect(TEMP scratch); |
| format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| |
| int vlen_enc = Assembler::AVX_512bit; |
| Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); |
| KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. |
| KRegister mask = k0; // The comparison itself is not being masked. |
| bool merge = false; |
| BasicType src1_elem_bt = vector_element_basic_type(this, $src1); |
| |
| switch (src1_elem_bt) { |
| case T_BYTE: { |
| __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); |
| __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); |
| break; |
| } |
| case T_SHORT: { |
| __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); |
| __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); |
| break; |
| } |
| case T_INT: { |
| __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); |
| __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); |
| break; |
| } |
| case T_LONG: { |
| __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); |
| __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); |
| break; |
| } |
| |
| default: assert(false, "%s", type2name(src1_elem_bt)); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Extract |
| |
| instruct extractI(rRegI dst, legVec src, immU8 idx) %{ |
| predicate(vector_length_in_bytes(n->in(1)) <= 16); // src |
| match(Set dst (ExtractI src idx)); |
| match(Set dst (ExtractS src idx)); |
| #ifdef _LP64 |
| match(Set dst (ExtractB src idx)); |
| #endif |
| format %{ "extractI $dst,$src,$idx\t!" %} |
| ins_encode %{ |
| assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); |
| |
| BasicType elem_bt = vector_element_basic_type(this, $src); |
| __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ |
| predicate(vector_length_in_bytes(n->in(1)) == 32 || // src |
| vector_length_in_bytes(n->in(1)) == 64); // src |
| match(Set dst (ExtractI src idx)); |
| match(Set dst (ExtractS src idx)); |
| #ifdef _LP64 |
| match(Set dst (ExtractB src idx)); |
| #endif |
| effect(TEMP vtmp); |
| format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} |
| ins_encode %{ |
| assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); |
| |
| BasicType elem_bt = vector_element_basic_type(this, $src); |
| XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); |
| __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| #ifdef _LP64 |
| instruct extractL(rRegL dst, legVec src, immU8 idx) %{ |
| predicate(vector_length(n->in(1)) <= 2); // src |
| match(Set dst (ExtractL src idx)); |
| format %{ "extractL $dst,$src,$idx\t!" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); |
| |
| __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ |
| predicate(vector_length(n->in(1)) == 4 || // src |
| vector_length(n->in(1)) == 8); // src |
| match(Set dst (ExtractL src idx)); |
| effect(TEMP vtmp); |
| format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} |
| ins_encode %{ |
| assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); |
| |
| XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); |
| __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif |
| |
| instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ |
| predicate(vector_length(n->in(1)) <= 4); |
| match(Set dst (ExtractF src idx)); |
| effect(TEMP dst, TEMP tmp, TEMP vtmp); |
| format %{ "extractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} |
| ins_encode %{ |
| assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); |
| |
| __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ |
| predicate(vector_length(n->in(1)/*src*/) == 8 || |
| vector_length(n->in(1)/*src*/) == 16); |
| match(Set dst (ExtractF src idx)); |
| effect(TEMP tmp, TEMP vtmp); |
| format %{ "vextractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} |
| ins_encode %{ |
| assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); |
| |
| XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); |
| __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct extractD(legRegD dst, legVec src, immU8 idx) %{ |
| predicate(vector_length(n->in(1)) == 2); // src |
| match(Set dst (ExtractD src idx)); |
| format %{ "extractD $dst,$src,$idx\t!" %} |
| ins_encode %{ |
| assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); |
| |
| __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ |
| predicate(vector_length(n->in(1)) == 4 || // src |
| vector_length(n->in(1)) == 8); // src |
| match(Set dst (ExtractD src idx)); |
| effect(TEMP vtmp); |
| format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} |
| ins_encode %{ |
| assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); |
| |
| XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); |
| __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- Vector Blend -------------------------------------- |
| |
| instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (VectorBlend (Binary dst src) mask)); |
| format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} |
| effect(TEMP tmp); |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| |
| if ($mask$$XMMRegister != $tmp$$XMMRegister) { |
| __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); |
| } |
| __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ |
| predicate(UseAVX > 0 && |
| vector_length_in_bytes(n) <= 32 && |
| is_integral_type(vector_element_basic_type(n))); |
| match(Set dst (VectorBlend (Binary src1 src2) mask)); |
| format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ |
| predicate(UseAVX > 0 && |
| vector_length_in_bytes(n) <= 32 && |
| !is_integral_type(vector_element_basic_type(n))); |
| match(Set dst (VectorBlend (Binary src1 src2) mask)); |
| format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch) %{ |
| predicate(vector_length_in_bytes(n) == 64); |
| match(Set dst (VectorBlend (Binary src1 src2) mask)); |
| format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} |
| effect(TEMP scratch); |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_512bit; |
| BasicType elem_bt = vector_element_basic_type(this); |
| KRegister ktmp = k2; |
| __ evpcmp(elem_bt, ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register); |
| __ evpblend(elem_bt, $dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- ABS -------------------------------------- |
| // a = |a| |
| instruct vabsB_reg(vec dst, vec src) %{ |
| match(Set dst (AbsVB src)); |
| format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 16) { |
| __ pabsb($dst$$XMMRegister, $src$$XMMRegister); |
| } else { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vabsS_reg(vec dst, vec src) %{ |
| match(Set dst (AbsVS src)); |
| format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 8) { |
| __ pabsw($dst$$XMMRegister, $src$$XMMRegister); |
| } else { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vabsI_reg(vec dst, vec src) %{ |
| match(Set dst (AbsVI src)); |
| format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} |
| ins_encode %{ |
| uint vlen = vector_length(this); |
| if (vlen <= 4) { |
| __ pabsd($dst$$XMMRegister, $src$$XMMRegister); |
| } else { |
| int vlen_enc = vector_length_encoding(this); |
| __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vabsL_reg(vec dst, vec src) %{ |
| match(Set dst (AbsVL src)); |
| format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| if (!VM_Version::supports_avx512vl()) { |
| vlen_enc = Assembler::AVX_512bit; |
| } |
| __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- ABSNEG -------------------------------------- |
| |
| instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ |
| predicate(vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F |
| match(Set dst (AbsVF src)); |
| match(Set dst (NegVF src)); |
| effect(TEMP scratch); |
| format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} |
| ins_cost(150); |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vlen = vector_length(this); |
| if (vlen == 2) { |
| __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); |
| } else { |
| assert(vlen == 8 || vlen == 16, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vabsneg4F(vec dst, rRegI scratch) %{ |
| predicate(vector_length(n) == 4); |
| match(Set dst (AbsVF dst)); |
| match(Set dst (NegVF dst)); |
| effect(TEMP scratch); |
| format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} |
| ins_cost(150); |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ |
| match(Set dst (AbsVD src)); |
| match(Set dst (NegVD src)); |
| effect(TEMP scratch); |
| format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| uint vlen = vector_length(this); |
| if (vlen == 2) { |
| assert(UseSSE >= 2, "required"); |
| __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); |
| } else { |
| int vlen_enc = vector_length_encoding(this); |
| __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //------------------------------------- VectorTest -------------------------------------------- |
| |
| #ifdef _LP64 |
| instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{ |
| predicate(vector_length_in_bytes(n->in(1)) >= 4 && |
| vector_length_in_bytes(n->in(1)) < 16 && |
| static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); |
| match(Set dst (VectorTest src1 src2 )); |
| effect(TEMP vtmp1, TEMP vtmp2, KILL cr); |
| format %{ "vector_test $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %} |
| ins_encode %{ |
| int vlen = vector_length_in_bytes(this, $src1); |
| __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); |
| __ setb(Assembler::carrySet, $dst$$Register); |
| __ movzbl($dst$$Register, $dst$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ |
| predicate(vector_length_in_bytes(n->in(1)) >= 16 && |
| static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); |
| match(Set dst (VectorTest src1 src2 )); |
| effect(KILL cr); |
| format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %} |
| ins_encode %{ |
| int vlen = vector_length_in_bytes(this, $src1); |
| __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ setb(Assembler::carrySet, $dst$$Register); |
| __ movzbl($dst$$Register, $dst$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{ |
| predicate(vector_length_in_bytes(n->in(1)) >= 4 && |
| vector_length_in_bytes(n->in(1)) < 16 && |
| static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); |
| match(Set dst (VectorTest src1 src2 )); |
| effect(TEMP vtmp, KILL cr); |
| format %{ "vector_test_any_true $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %} |
| ins_encode %{ |
| int vlen = vector_length_in_bytes(this, $src1); |
| __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); |
| __ setb(Assembler::notZero, $dst$$Register); |
| __ movzbl($dst$$Register, $dst$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ |
| predicate(vector_length_in_bytes(n->in(1)) >= 16 && |
| static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); |
| match(Set dst (VectorTest src1 src2 )); |
| effect(KILL cr); |
| format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %} |
| ins_encode %{ |
| int vlen = vector_length_in_bytes(this, $src1); |
| __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ setb(Assembler::notZero, $dst$$Register); |
| __ movzbl($dst$$Register, $dst$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{ |
| predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 4 && |
| vector_length_in_bytes(n->in(1)->in(1)) < 16 && |
| static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); |
| match(Set cr (CmpI (VectorTest src1 src2) zero)); |
| effect(TEMP vtmp); |
| format %{ "cmp_vector_test_any_true $src1,$src2\t! using $vtmp as TEMP" %} |
| ins_encode %{ |
| int vlen = vector_length_in_bytes(this, $src1); |
| __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cmpvptest_anytrue(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{ |
| predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 16 && |
| static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); |
| match(Set cr (CmpI (VectorTest src1 src2) zero)); |
| format %{ "cmp_vector_test_any_true $src1,$src2\t!" %} |
| ins_encode %{ |
| int vlen = vector_length_in_bytes(this, $src1); |
| __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif |
| |
| //------------------------------------- LoadMask -------------------------------------------- |
| |
| instruct loadMask(vec dst, vec src) %{ |
| match(Set dst (VectorLoadMask src)); |
| effect(TEMP dst); |
| format %{ "vector_loadmask_byte $dst,$src\n\t" %} |
| ins_encode %{ |
| int vlen_in_bytes = vector_length_in_bytes(this); |
| BasicType elem_bt = vector_element_basic_type(this); |
| |
| __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //------------------------------------- StoreMask -------------------------------------------- |
| |
| instruct storeMask1B(vec dst, vec src, immI_1 size) %{ |
| predicate(vector_length(n) < 64 || VM_Version::supports_avx512vlbw()); |
| match(Set dst (VectorStoreMask src size)); |
| format %{ "vector_store_mask $dst,$src\t!" %} |
| ins_encode %{ |
| assert(UseSSE >= 3, "required"); |
| if (vector_length_in_bytes(this) <= 16) { |
| __ pabsb($dst$$XMMRegister, $src$$XMMRegister); |
| } else { |
| assert(UseAVX >= 2, "required"); |
| int src_vlen_enc = vector_length_encoding(this, $src); |
| __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct storeMask2B(vec dst, vec src, immI_2 size) %{ |
| predicate(vector_length(n) <= 8); |
| match(Set dst (VectorStoreMask src size)); |
| format %{ "vector_store_mask $dst,$src\n\t" %} |
| ins_encode %{ |
| assert(UseSSE >= 3, "required"); |
| __ pabsw($dst$$XMMRegister, $src$$XMMRegister); |
| __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vstoreMask2B(vec dst, vec src, immI_2 size) %{ |
| predicate(vector_length(n) == 16 && !VM_Version::supports_avx512bw()); |
| match(Set dst (VectorStoreMask src size)); |
| effect(TEMP dst); |
| format %{ "vector_store_mask $dst,$src\t!" %} |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_128bit; |
| __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); |
| __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister,vlen_enc); |
| __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vstoreMask2B_evex(vec dst, vec src, immI_2 size) %{ |
| predicate(VM_Version::supports_avx512bw()); |
| match(Set dst (VectorStoreMask src size)); |
| format %{ "vector_store_mask $dst,$src\t!" %} |
| ins_encode %{ |
| int src_vlen_enc = vector_length_encoding(this, $src); |
| int dst_vlen_enc = vector_length_encoding(this); |
| __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); |
| __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct storeMask4B(vec dst, vec src, immI_4 size) %{ |
| predicate (vector_length(n) <= 4 && UseAVX <= 2); |
| match(Set dst (VectorStoreMask src size)); |
| format %{ "vector_store_mask $dst,$src\t!" %} |
| ins_encode %{ |
| assert(UseSSE >= 3, "required"); |
| __ pabsd($dst$$XMMRegister, $src$$XMMRegister); |
| __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); |
| __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vstoreMask4B(vec dst, vec src, immI_4 size) %{ |
| predicate(vector_length(n) == 8 && UseAVX <= 2); |
| match(Set dst (VectorStoreMask src size)); |
| format %{ "vector_store_mask $dst,$src\t!" %} |
| effect(TEMP dst); |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_128bit; |
| __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); |
| __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (VectorStoreMask src size)); |
| format %{ "vector_store_mask $dst,$src\t!" %} |
| ins_encode %{ |
| int src_vlen_enc = vector_length_encoding(this, $src); |
| int dst_vlen_enc = vector_length_encoding(this); |
| if (!VM_Version::supports_avx512vl()) { |
| src_vlen_enc = Assembler::AVX_512bit; |
| } |
| __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); |
| __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct storeMask8B(vec dst, vec src, immI_8 size) %{ |
| predicate(vector_length(n) == 2 && UseAVX <= 2); |
| match(Set dst (VectorStoreMask src size)); |
| format %{ "vector_store_mask $dst,$src\t!" %} |
| ins_encode %{ |
| assert(UseSSE >= 3, "required"); |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); |
| __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); |
| __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); |
| __ pabsb($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct storeMask8B_avx(vec dst, vec src, immI_8 size, legVec vtmp) %{ |
| predicate(vector_length(n) == 4 && UseAVX <= 2); |
| match(Set dst (VectorStoreMask src size)); |
| format %{ "vector_store_mask $dst,$src\t! using $vtmp as TEMP" %} |
| effect(TEMP dst, TEMP vtmp); |
| ins_encode %{ |
| int vlen_enc = Assembler::AVX_128bit; |
| __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); |
| __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); |
| __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); |
| __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vstoreMask8B_evex(vec dst, vec src, immI_8 size) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (VectorStoreMask src size)); |
| format %{ "vector_store_mask $dst,$src\t!" %} |
| ins_encode %{ |
| int src_vlen_enc = vector_length_encoding(this, $src); |
| int dst_vlen_enc = vector_length_encoding(this); |
| if (!VM_Version::supports_avx512vl()) { |
| src_vlen_enc = Assembler::AVX_512bit; |
| } |
| __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); |
| __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //-------------------------------- Load Iota Indices ---------------------------------- |
| |
| instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ |
| predicate(vector_element_basic_type(n) == T_BYTE); |
| match(Set dst (VectorLoadConst src)); |
| effect(TEMP scratch); |
| format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} |
| ins_encode %{ |
| int vlen_in_bytes = vector_length_in_bytes(this); |
| __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //-------------------------------- Rearrange ---------------------------------- |
| |
| // LoadShuffle/Rearrange for Byte |
| |
| instruct loadShuffleB(vec dst) %{ |
| predicate(vector_element_basic_type(n) == T_BYTE); |
| match(Set dst (VectorLoadShuffle dst)); |
| format %{ "vector_load_shuffle $dst, $dst" %} |
| ins_encode %{ |
| // empty |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeB(vec dst, vec shuffle) %{ |
| predicate(vector_element_basic_type(n) == T_BYTE && |
| vector_length(n) < 32); |
| match(Set dst (VectorRearrange dst shuffle)); |
| format %{ "vector_rearrange $dst, $shuffle, $dst" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ |
| predicate(vector_element_basic_type(n) == T_BYTE && |
| vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); |
| match(Set dst (VectorRearrange src shuffle)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); |
| format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| // Swap src into vtmp1 |
| __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); |
| // Shuffle swapped src to get entries from other 128 bit lane |
| __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); |
| // Shuffle original src to get entries from self 128 bit lane |
| __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); |
| // Create a blend mask by setting high bits for entries coming from other lane in shuffle |
| __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); |
| // Perform the blend |
| __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{ |
| predicate(vector_element_basic_type(n) == T_BYTE && |
| vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); |
| match(Set dst (VectorRearrange src shuffle)); |
| format %{ "vector_rearrange $dst, $shuffle, $src" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // LoadShuffle/Rearrange for Short |
| |
| instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ |
| predicate(vector_element_basic_type(n) == T_SHORT && |
| vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS |
| match(Set dst (VectorLoadShuffle src)); |
| effect(TEMP dst, TEMP vtmp, TEMP scratch); |
| format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} |
| ins_encode %{ |
| // Create a byte shuffle mask from short shuffle mask |
| // only byte shuffle instruction available on these platforms |
| int vlen_in_bytes = vector_length_in_bytes(this); |
| if (UseAVX == 0) { |
| assert(vlen_in_bytes <= 16, "required"); |
| // Multiply each shuffle by two to get byte index |
| __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); |
| __ psllw($vtmp$$XMMRegister, 1); |
| |
| // Duplicate to create 2 copies of byte index |
| __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); |
| __ psllw($dst$$XMMRegister, 8); |
| __ por($dst$$XMMRegister, $vtmp$$XMMRegister); |
| |
| // Add one to get alternate byte index |
| __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); |
| __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); |
| } else { |
| assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| // Multiply each shuffle by two to get byte index |
| __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); |
| |
| // Duplicate to create 2 copies of byte index |
| __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); |
| __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); |
| |
| // Add one to get alternate byte index |
| __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, $scratch$$Register); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeS(vec dst, vec shuffle) %{ |
| predicate(vector_element_basic_type(n) == T_SHORT && |
| vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); |
| match(Set dst (VectorRearrange dst shuffle)); |
| format %{ "vector_rearrange $dst, $shuffle, $dst" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ |
| predicate(vector_element_basic_type(n) == T_SHORT && |
| vector_length(n) == 16 && !VM_Version::supports_avx512bw()); |
| match(Set dst (VectorRearrange src shuffle)); |
| effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); |
| format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| // Swap src into vtmp1 |
| __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); |
| // Shuffle swapped src to get entries from other 128 bit lane |
| __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); |
| // Shuffle original src to get entries from self 128 bit lane |
| __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); |
| // Create a blend mask by setting high bits for entries coming from other lane in shuffle |
| __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); |
| // Perform the blend |
| __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct loadShuffleS_evex(vec dst, vec src) %{ |
| predicate(vector_element_basic_type(n) == T_SHORT && |
| VM_Version::supports_avx512bw()); |
| match(Set dst (VectorLoadShuffle src)); |
| format %{ "vector_load_shuffle $dst, $src" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| if (!VM_Version::supports_avx512vl()) { |
| vlen_enc = Assembler::AVX_512bit; |
| } |
| __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ |
| predicate(vector_element_basic_type(n) == T_SHORT && |
| VM_Version::supports_avx512bw()); |
| match(Set dst (VectorRearrange src shuffle)); |
| format %{ "vector_rearrange $dst, $shuffle, $src" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| if (!VM_Version::supports_avx512vl()) { |
| vlen_enc = Assembler::AVX_512bit; |
| } |
| __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // LoadShuffle/Rearrange for Integer and Float |
| |
| instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ |
| predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && |
| vector_length(n) == 4 && UseAVX < 2); |
| match(Set dst (VectorLoadShuffle src)); |
| effect(TEMP dst, TEMP vtmp, TEMP scratch); |
| format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| |
| // Create a byte shuffle mask from int shuffle mask |
| // only byte shuffle instruction available on these platforms |
| |
| // Duplicate and multiply each shuffle by 4 |
| __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); |
| __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); |
| __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); |
| __ psllw($vtmp$$XMMRegister, 2); |
| |
| // Duplicate again to create 4 copies of byte index |
| __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); |
| __ psllw($dst$$XMMRegister, 8); |
| __ por($vtmp$$XMMRegister, $dst$$XMMRegister); |
| |
| // Add 3,2,1,0 to get alternate byte index |
| __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); |
| __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeI(vec dst, vec shuffle) %{ |
| predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && |
| vector_length(n) == 4 && UseAVX < 2); |
| match(Set dst (VectorRearrange dst shuffle)); |
| format %{ "vector_rearrange $dst, $shuffle, $dst" %} |
| ins_encode %{ |
| assert(UseSSE >= 4, "required"); |
| __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct loadShuffleI_avx(vec dst, vec src) %{ |
| predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && |
| UseAVX >= 2); |
| match(Set dst (VectorLoadShuffle src)); |
| format %{ "vector_load_shuffle $dst, $src" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ |
| predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && |
| UseAVX >= 2); |
| match(Set dst (VectorRearrange src shuffle)); |
| format %{ "vector_rearrange $dst, $shuffle, $src" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| if (vlen_enc == Assembler::AVX_128bit) { |
| vlen_enc = Assembler::AVX_256bit; |
| } |
| __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // LoadShuffle/Rearrange for Long and Double |
| |
| instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ |
| predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE |
| vector_length(n) < 8 && !VM_Version::supports_avx512vl()); |
| match(Set dst (VectorLoadShuffle src)); |
| effect(TEMP dst, TEMP vtmp, TEMP scratch); |
| format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| |
| int vlen_enc = vector_length_encoding(this); |
| // Create a double word shuffle mask from long shuffle mask |
| // only double word shuffle instruction available on these platforms |
| |
| // Multiply each shuffle by two to get double word index |
| __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); |
| |
| // Duplicate each double word shuffle |
| __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); |
| __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); |
| |
| // Add one to get alternate double word index |
| __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeL(vec dst, vec src, vec shuffle) %{ |
| predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE |
| vector_length(n) < 8 && !VM_Version::supports_avx512vl()); |
| match(Set dst (VectorRearrange src shuffle)); |
| format %{ "vector_rearrange $dst, $shuffle, $src" %} |
| ins_encode %{ |
| assert(UseAVX >= 2, "required"); |
| |
| int vlen_enc = vector_length_encoding(this); |
| __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct loadShuffleL_evex(vec dst, vec src) %{ |
| predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE |
| (vector_length(n) == 8 || VM_Version::supports_avx512vl())); |
| match(Set dst (VectorLoadShuffle src)); |
| format %{ "vector_load_shuffle $dst, $src" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| |
| int vlen_enc = vector_length_encoding(this); |
| __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ |
| predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE |
| (vector_length(n) == 8 || VM_Version::supports_avx512vl())); |
| match(Set dst (VectorRearrange src shuffle)); |
| format %{ "vector_rearrange $dst, $shuffle, $src" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| |
| int vlen_enc = vector_length_encoding(this); |
| if (vlen_enc == Assembler::AVX_128bit) { |
| vlen_enc = Assembler::AVX_256bit; |
| } |
| __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- FMA -------------------------------------- |
| // a * b + c |
| |
| instruct vfmaF_reg(vec a, vec b, vec c) %{ |
| match(Set c (FmaVF c (Binary a b))); |
| format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} |
| ins_cost(150); |
| ins_encode %{ |
| assert(UseFMA, "not enabled"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vfmaF_mem(vec a, memory b, vec c) %{ |
| match(Set c (FmaVF c (Binary a (LoadVector b)))); |
| format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} |
| ins_cost(150); |
| ins_encode %{ |
| assert(UseFMA, "not enabled"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vfmaD_reg(vec a, vec b, vec c) %{ |
| match(Set c (FmaVD c (Binary a b))); |
| format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} |
| ins_cost(150); |
| ins_encode %{ |
| assert(UseFMA, "not enabled"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vfmaD_mem(vec a, memory b, vec c) %{ |
| match(Set c (FmaVD c (Binary a (LoadVector b)))); |
| format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} |
| ins_cost(150); |
| ins_encode %{ |
| assert(UseFMA, "not enabled"); |
| int vlen_enc = vector_length_encoding(this); |
| __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- Vector Multiply Add -------------------------------------- |
| |
| instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ |
| predicate(UseAVX == 0); |
| match(Set dst (MulAddVS2VI dst src1)); |
| format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} |
| ins_encode %{ |
| __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulAddVS2VI src1 src2)); |
| format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} |
| ins_encode %{ |
| int vlen_enc = vector_length_encoding(this); |
| __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- Vector Multiply Add Add ---------------------------------- |
| |
| instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ |
| predicate(VM_Version::supports_avx512_vnni()); |
| match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); |
| format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} |
| ins_encode %{ |
| assert(UseAVX > 2, "required"); |
| int vlen_enc = vector_length_encoding(this); |
| __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| ins_cost(10); |
| %} |
| |
| // --------------------------------- PopCount -------------------------------------- |
| |
| instruct vpopcountI(vec dst, vec src) %{ |
| match(Set dst (PopCountVI src)); |
| format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %} |
| ins_encode %{ |
| assert(UsePopCountInstruction, "not enabled"); |
| |
| int vlen_enc = vector_length_encoding(this); |
| __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- Bitwise Ternary Logic ---------------------------------- |
| |
| instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ |
| match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); |
| effect(TEMP dst); |
| format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} |
| ins_encode %{ |
| int vector_len = vector_length_encoding(this); |
| __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ |
| match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); |
| effect(TEMP dst); |
| format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} |
| ins_encode %{ |
| int vector_len = vector_length_encoding(this); |
| __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- Rotation Operations ---------------------------------- |
| instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ |
| match(Set dst (RotateLeftV src shift)); |
| match(Set dst (RotateRightV src shift)); |
| format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vector_len = vector_length_encoding(this); |
| BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); |
| __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vprorate(vec dst, vec src, vec shift) %{ |
| match(Set dst (RotateLeftV src shift)); |
| match(Set dst (RotateRightV src shift)); |
| format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} |
| ins_encode %{ |
| int opcode = this->ideal_Opcode(); |
| int vector_len = vector_length_encoding(this); |
| BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); |
| __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| #ifdef _LP64 |
| // ---------------------------------- Masked Block Copy ------------------------------------ |
| |
| instruct vmasked_load64(vec dst, memory mem, rRegL mask) %{ |
| match(Set dst (LoadVectorMasked mem mask)); |
| format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} |
| ins_encode %{ |
| BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); |
| int vector_len = vector_length_encoding(this); |
| __ kmovql(k2, $mask$$Register); |
| __ evmovdqu(elmType, k2, $dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmask_gen(rRegL dst, rRegL len, rRegL tempLen) %{ |
| match(Set dst (VectorMaskGen len)); |
| effect(TEMP_DEF dst, TEMP tempLen); |
| format %{ "vector_mask_gen $len \t! vector mask generator" %} |
| ins_encode %{ |
| __ genmask($dst$$Register, $len$$Register, $tempLen$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmask_gen_imm(rRegL dst, immL len) %{ |
| match(Set dst (VectorMaskGen len)); |
| format %{ "vector_mask_gen $len \t! vector mask generator" %} |
| ins_encode %{ |
| __ mov64($dst$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmasked_store64(memory mem, vec src, rRegL mask) %{ |
| match(Set mem (StoreVectorMasked mem (Binary src mask))); |
| format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} |
| ins_encode %{ |
| const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); |
| BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); |
| int vector_len = vector_length_encoding(src_node); |
| __ kmovql(k2, $mask$$Register); |
| __ evmovdqu(elmType, k2, $mem$$Address, $src$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif // _LP64 |