| // |
| // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. |
| // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| // |
| // This code is free software; you can redistribute it and/or modify it |
| // under the terms of the GNU General Public License version 2 only, as |
| // published by the Free Software Foundation. |
| // |
| // This code is distributed in the hope that it will be useful, but WITHOUT |
| // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| // version 2 for more details (a copy is included in the LICENSE file that |
| // accompanied this code). |
| // |
| // You should have received a copy of the GNU General Public License version |
| // 2 along with this work; if not, write to the Free Software Foundation, |
| // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| // |
| // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| // or visit www.oracle.com if you need additional information or have any |
| // questions. |
| // |
| // |
| |
| // X86 Common Architecture Description File |
| |
| //----------REGISTER DEFINITION BLOCK------------------------------------------ |
| // This information is used by the matcher and the register allocator to |
| // describe individual registers and classes of registers within the target |
| // archtecture. |
| |
| register %{ |
| //----------Architecture Description Register Definitions---------------------- |
| // General Registers |
| // "reg_def" name ( register save type, C convention save type, |
| // ideal register type, encoding ); |
| // Register Save Types: |
| // |
| // NS = No-Save: The register allocator assumes that these registers |
| // can be used without saving upon entry to the method, & |
| // that they do not need to be saved at call sites. |
| // |
| // SOC = Save-On-Call: The register allocator assumes that these registers |
| // can be used without saving upon entry to the method, |
| // but that they must be saved at call sites. |
| // |
| // SOE = Save-On-Entry: The register allocator assumes that these registers |
| // must be saved before using them upon entry to the |
| // method, but they do not need to be saved at call |
| // sites. |
| // |
| // AS = Always-Save: The register allocator assumes that these registers |
| // must be saved before using them upon entry to the |
| // method, & that they must be saved at call sites. |
| // |
| // Ideal Register Type is used to determine how to save & restore a |
| // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get |
| // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. |
| // |
| // The encoding number is the actual bit-pattern placed into the opcodes. |
| |
| // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. |
| // Word a in each register holds a Float, words ab hold a Double. |
| // The whole registers are used in SSE4.2 version intrinsics, |
| // array copy stubs and superword operations (see UseSSE42Intrinsics, |
| // UseXMMForArrayCopy and UseSuperword flags). |
| // For pre EVEX enabled architectures: |
| // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) |
| // For EVEX enabled architectures: |
| // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). |
| // |
| // Linux ABI: No register preserved across function calls |
| // XMM0-XMM7 might hold parameters |
| // Windows ABI: XMM6-XMM31 preserved across function calls |
| // XMM0-XMM3 might hold parameters |
| |
| reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); |
| reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); |
| reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); |
| reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); |
| reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); |
| reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); |
| reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); |
| reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); |
| reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); |
| reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); |
| reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); |
| reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); |
| reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); |
| reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); |
| reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); |
| reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); |
| |
| reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); |
| reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); |
| reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); |
| reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); |
| reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); |
| reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); |
| reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); |
| reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); |
| reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); |
| reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); |
| reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); |
| reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); |
| reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); |
| reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); |
| reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); |
| reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); |
| |
| reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); |
| reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); |
| reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); |
| reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); |
| reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); |
| reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); |
| reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); |
| reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); |
| reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); |
| reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); |
| reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); |
| reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); |
| reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); |
| reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); |
| reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); |
| reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); |
| |
| reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); |
| reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); |
| reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); |
| reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); |
| reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); |
| reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); |
| reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); |
| reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); |
| reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); |
| reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); |
| reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); |
| reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); |
| reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); |
| reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); |
| reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); |
| reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); |
| |
| reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); |
| reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); |
| reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); |
| reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); |
| reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); |
| reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); |
| reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); |
| reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); |
| reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); |
| reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); |
| reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); |
| reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); |
| reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); |
| reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); |
| reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); |
| reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); |
| |
| reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); |
| reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); |
| reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); |
| reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); |
| reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); |
| reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); |
| reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); |
| reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); |
| reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); |
| reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); |
| reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); |
| reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); |
| reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); |
| reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); |
| reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); |
| reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); |
| |
| #ifdef _WIN64 |
| |
| reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); |
| reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); |
| reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); |
| reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); |
| reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); |
| reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); |
| reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); |
| reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); |
| reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); |
| reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); |
| reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); |
| reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); |
| reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); |
| reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); |
| reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); |
| reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); |
| |
| reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); |
| reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); |
| reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); |
| reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); |
| reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); |
| reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); |
| reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); |
| reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); |
| reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); |
| reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); |
| reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); |
| reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); |
| reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); |
| reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); |
| reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); |
| reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); |
| |
| reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); |
| reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); |
| reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); |
| reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); |
| reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); |
| reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); |
| reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); |
| reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); |
| reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); |
| reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); |
| reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); |
| reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); |
| reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); |
| reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); |
| reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); |
| reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); |
| |
| reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); |
| reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); |
| reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); |
| reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); |
| reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); |
| reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); |
| reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); |
| reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); |
| reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); |
| reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); |
| reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); |
| reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); |
| reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); |
| reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); |
| reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); |
| reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); |
| |
| reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); |
| reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); |
| reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); |
| reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); |
| reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); |
| reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); |
| reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); |
| reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); |
| reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); |
| reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); |
| reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); |
| reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); |
| reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); |
| reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); |
| reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); |
| reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); |
| |
| reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); |
| reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); |
| reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); |
| reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); |
| reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); |
| reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); |
| reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); |
| reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); |
| reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); |
| reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); |
| reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); |
| reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); |
| reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); |
| reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); |
| reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); |
| reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); |
| |
| reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); |
| reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); |
| reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); |
| reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); |
| reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); |
| reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); |
| reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); |
| reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); |
| reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); |
| reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); |
| reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); |
| reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); |
| reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); |
| reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); |
| reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); |
| reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); |
| |
| reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); |
| reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); |
| reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); |
| reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); |
| reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); |
| reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); |
| reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); |
| reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); |
| reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); |
| reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); |
| reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); |
| reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); |
| reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); |
| reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); |
| reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); |
| reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); |
| |
| reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); |
| reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); |
| reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); |
| reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); |
| reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); |
| reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); |
| reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); |
| reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); |
| reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); |
| reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); |
| reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); |
| reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); |
| reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); |
| reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); |
| reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); |
| reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); |
| |
| reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); |
| reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); |
| reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); |
| reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); |
| reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); |
| reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); |
| reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); |
| reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); |
| reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); |
| reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); |
| reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); |
| reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); |
| reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); |
| reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); |
| reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); |
| reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); |
| |
| reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); |
| reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); |
| reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); |
| reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); |
| reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); |
| reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); |
| reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); |
| reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); |
| reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); |
| reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); |
| reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); |
| reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); |
| reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); |
| reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); |
| reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); |
| reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); |
| |
| reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); |
| reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); |
| reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); |
| reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); |
| reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); |
| reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); |
| reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); |
| reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); |
| reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); |
| reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); |
| reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); |
| reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); |
| reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); |
| reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); |
| reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); |
| reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); |
| |
| reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); |
| reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); |
| reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); |
| reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); |
| reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); |
| reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); |
| reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); |
| reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); |
| reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); |
| reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); |
| reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); |
| reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); |
| reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); |
| reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); |
| reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); |
| reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); |
| |
| reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); |
| reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); |
| reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); |
| reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); |
| reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); |
| reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); |
| reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); |
| reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); |
| reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); |
| reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); |
| reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); |
| reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); |
| reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); |
| reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); |
| reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); |
| reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); |
| |
| reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); |
| reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); |
| reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); |
| reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); |
| reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); |
| reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); |
| reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); |
| reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); |
| reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); |
| reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); |
| reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); |
| reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); |
| reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); |
| reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); |
| reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); |
| reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); |
| |
| reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); |
| reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); |
| reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); |
| reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); |
| reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); |
| reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); |
| reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); |
| reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); |
| reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); |
| reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); |
| reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); |
| reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); |
| reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); |
| reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); |
| reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); |
| reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); |
| |
| reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); |
| reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); |
| reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); |
| reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); |
| reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); |
| reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); |
| reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); |
| reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); |
| reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); |
| reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); |
| reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); |
| reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); |
| reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); |
| reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); |
| reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); |
| reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); |
| |
| reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); |
| reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); |
| reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); |
| reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); |
| reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); |
| reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); |
| reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); |
| reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); |
| reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); |
| reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); |
| reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); |
| reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); |
| reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); |
| reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); |
| reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); |
| reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); |
| |
| reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); |
| reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); |
| reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); |
| reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); |
| reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); |
| reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); |
| reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); |
| reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); |
| reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); |
| reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); |
| reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); |
| reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); |
| reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); |
| reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); |
| reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); |
| reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); |
| |
| reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); |
| reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); |
| reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); |
| reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); |
| reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); |
| reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); |
| reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); |
| reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); |
| reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); |
| reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); |
| reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); |
| reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); |
| reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); |
| reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); |
| reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); |
| reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); |
| |
| reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); |
| reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); |
| reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); |
| reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); |
| reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); |
| reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); |
| reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); |
| reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); |
| reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); |
| reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); |
| reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); |
| reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); |
| reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); |
| reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); |
| reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); |
| reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); |
| |
| reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); |
| reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); |
| reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); |
| reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); |
| reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); |
| reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); |
| reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); |
| reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); |
| reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); |
| reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); |
| reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); |
| reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); |
| reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); |
| reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); |
| reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); |
| |
| reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); |
| reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); |
| reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); |
| reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); |
| reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); |
| reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); |
| reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); |
| reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); |
| reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); |
| reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); |
| reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); |
| reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); |
| reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); |
| reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); |
| reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); |
| reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); |
| |
| reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); |
| reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); |
| reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); |
| reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); |
| reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); |
| reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); |
| reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); |
| reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); |
| reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); |
| reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); |
| reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); |
| reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); |
| reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); |
| reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); |
| reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); |
| reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); |
| |
| reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); |
| reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); |
| reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); |
| reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); |
| reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); |
| reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); |
| reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); |
| reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); |
| reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); |
| reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); |
| reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); |
| reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); |
| reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); |
| reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); |
| reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); |
| reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); |
| |
| reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); |
| reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); |
| reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); |
| reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); |
| reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); |
| reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); |
| reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); |
| reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); |
| reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); |
| reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); |
| reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); |
| reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); |
| reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); |
| reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); |
| reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); |
| reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); |
| |
| #else // _WIN64 |
| |
| reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); |
| reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); |
| reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); |
| reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); |
| reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); |
| reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); |
| reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); |
| reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); |
| reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); |
| reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); |
| reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); |
| reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); |
| reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); |
| reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); |
| reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); |
| reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); |
| |
| reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); |
| reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); |
| reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); |
| reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); |
| reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); |
| reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); |
| reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); |
| reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); |
| reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); |
| reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); |
| reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); |
| reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); |
| reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); |
| reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); |
| reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); |
| reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); |
| |
| #ifdef _LP64 |
| |
| reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); |
| reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); |
| reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); |
| reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); |
| reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); |
| reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); |
| reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); |
| reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); |
| reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); |
| reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); |
| reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); |
| reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); |
| reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); |
| reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); |
| reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); |
| reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); |
| |
| reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); |
| reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); |
| reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); |
| reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); |
| reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); |
| reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); |
| reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); |
| reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); |
| reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); |
| reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); |
| reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); |
| reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); |
| reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); |
| reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); |
| reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); |
| reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); |
| |
| reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); |
| reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); |
| reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); |
| reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); |
| reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); |
| reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); |
| reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); |
| reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); |
| reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); |
| reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); |
| reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); |
| reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); |
| reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); |
| reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); |
| reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); |
| reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); |
| |
| reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); |
| reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); |
| reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); |
| reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); |
| reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); |
| reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); |
| reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); |
| reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); |
| reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); |
| reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); |
| reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); |
| reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); |
| reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); |
| reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); |
| reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); |
| reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); |
| |
| reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); |
| reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); |
| reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); |
| reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); |
| reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); |
| reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); |
| reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); |
| reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); |
| reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); |
| reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); |
| reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); |
| reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); |
| reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); |
| reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); |
| reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); |
| reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); |
| |
| reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); |
| reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); |
| reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); |
| reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); |
| reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); |
| reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); |
| reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); |
| reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); |
| reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); |
| reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); |
| reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); |
| reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); |
| reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); |
| reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); |
| reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); |
| reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); |
| |
| reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); |
| reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); |
| reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); |
| reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); |
| reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); |
| reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); |
| reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); |
| reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); |
| reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); |
| reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); |
| reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); |
| reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); |
| reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); |
| reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); |
| reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); |
| reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); |
| |
| reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); |
| reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); |
| reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); |
| reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); |
| reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); |
| reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); |
| reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); |
| reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); |
| reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); |
| reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); |
| reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); |
| reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); |
| reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); |
| reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); |
| reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); |
| reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); |
| |
| reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); |
| reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); |
| reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); |
| reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); |
| reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); |
| reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); |
| reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); |
| reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); |
| reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); |
| reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); |
| reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); |
| reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); |
| reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); |
| reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); |
| reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); |
| reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); |
| |
| reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); |
| reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); |
| reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); |
| reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); |
| reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); |
| reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); |
| reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); |
| reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); |
| reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); |
| reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); |
| reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); |
| reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); |
| reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); |
| reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); |
| reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); |
| reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); |
| |
| reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); |
| reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); |
| reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); |
| reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); |
| reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); |
| reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); |
| reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); |
| reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); |
| reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); |
| reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); |
| reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); |
| reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); |
| reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); |
| reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); |
| reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); |
| reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); |
| |
| reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); |
| reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); |
| reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); |
| reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); |
| reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); |
| reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); |
| reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); |
| reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); |
| reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); |
| reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); |
| reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); |
| reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); |
| reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); |
| reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); |
| reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); |
| reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); |
| |
| reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); |
| reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); |
| reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); |
| reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); |
| reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); |
| reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); |
| reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); |
| reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); |
| reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); |
| reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); |
| reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); |
| reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); |
| reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); |
| reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); |
| reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); |
| reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); |
| |
| reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); |
| reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); |
| reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); |
| reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); |
| reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); |
| reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); |
| reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); |
| reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); |
| reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); |
| reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); |
| reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); |
| reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); |
| reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); |
| reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); |
| reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); |
| reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); |
| |
| reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); |
| reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); |
| reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); |
| reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); |
| reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); |
| reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); |
| reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); |
| reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); |
| reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); |
| reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); |
| reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); |
| reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); |
| reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); |
| reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); |
| reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); |
| reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); |
| |
| reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); |
| reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); |
| reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); |
| reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); |
| reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); |
| reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); |
| reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); |
| reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); |
| reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); |
| reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); |
| reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); |
| reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); |
| reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); |
| reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); |
| reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); |
| reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); |
| |
| reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); |
| reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); |
| reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); |
| reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); |
| reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); |
| reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); |
| reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); |
| reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); |
| reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); |
| reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); |
| reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); |
| reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); |
| reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); |
| reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); |
| reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); |
| reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); |
| |
| reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); |
| reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); |
| reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); |
| reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); |
| reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); |
| reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); |
| reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); |
| reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); |
| reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); |
| reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); |
| reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); |
| reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); |
| reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); |
| reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); |
| reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); |
| reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); |
| |
| reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); |
| reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); |
| reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); |
| reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); |
| reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); |
| reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); |
| reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); |
| reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); |
| reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); |
| reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); |
| reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); |
| reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); |
| reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); |
| reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); |
| reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); |
| reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); |
| |
| reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); |
| reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); |
| reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); |
| reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); |
| reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); |
| reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); |
| reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); |
| reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); |
| reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); |
| reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); |
| reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); |
| reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); |
| reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); |
| reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); |
| reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); |
| reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); |
| |
| reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); |
| reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); |
| reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); |
| reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); |
| reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); |
| reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); |
| reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); |
| reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); |
| reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); |
| reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); |
| reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); |
| reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); |
| reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); |
| reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); |
| reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); |
| reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); |
| |
| reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); |
| reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); |
| reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); |
| reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); |
| reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); |
| reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); |
| reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); |
| reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); |
| reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); |
| reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); |
| reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); |
| reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); |
| reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); |
| reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); |
| reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); |
| reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); |
| |
| reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); |
| reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); |
| reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); |
| reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); |
| reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); |
| reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); |
| reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); |
| reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); |
| reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); |
| reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); |
| reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); |
| reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); |
| reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); |
| reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); |
| reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); |
| reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); |
| |
| reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); |
| reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); |
| reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); |
| reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); |
| reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); |
| reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); |
| reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); |
| reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); |
| reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); |
| reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); |
| reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); |
| reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); |
| reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); |
| reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); |
| reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); |
| reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); |
| |
| #endif // _LP64 |
| |
| #endif // _WIN64 |
| |
| #ifdef _LP64 |
| reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); |
| #else |
| reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); |
| #endif // _LP64 |
| |
| alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, |
| XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, |
| XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, |
| XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, |
| XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, |
| XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, |
| XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, |
| XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, |
| XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, |
| XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, |
| XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, |
| XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, |
| XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, |
| XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, |
| XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p |
| ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, |
| XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, |
| XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, |
| XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, |
| XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, |
| XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, |
| XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, |
| XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, |
| XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, |
| XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, |
| XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, |
| XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, |
| XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, |
| XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, |
| XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, |
| XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p |
| #endif |
| ); |
| |
| // flags allocation class should be last. |
| alloc_class chunk2(RFLAGS); |
| |
| // Singleton class for condition codes |
| reg_class int_flags(RFLAGS); |
| |
| // Class for pre evex float registers |
| reg_class float_reg_legacy(XMM0, |
| XMM1, |
| XMM2, |
| XMM3, |
| XMM4, |
| XMM5, |
| XMM6, |
| XMM7 |
| #ifdef _LP64 |
| ,XMM8, |
| XMM9, |
| XMM10, |
| XMM11, |
| XMM12, |
| XMM13, |
| XMM14, |
| XMM15 |
| #endif |
| ); |
| |
| // Class for evex float registers |
| reg_class float_reg_evex(XMM0, |
| XMM1, |
| XMM2, |
| XMM3, |
| XMM4, |
| XMM5, |
| XMM6, |
| XMM7 |
| #ifdef _LP64 |
| ,XMM8, |
| XMM9, |
| XMM10, |
| XMM11, |
| XMM12, |
| XMM13, |
| XMM14, |
| XMM15, |
| XMM16, |
| XMM17, |
| XMM18, |
| XMM19, |
| XMM20, |
| XMM21, |
| XMM22, |
| XMM23, |
| XMM24, |
| XMM25, |
| XMM26, |
| XMM27, |
| XMM28, |
| XMM29, |
| XMM30, |
| XMM31 |
| #endif |
| ); |
| |
| reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| |
| // Class for pre evex double registers |
| reg_class double_reg_legacy(XMM0, XMM0b, |
| XMM1, XMM1b, |
| XMM2, XMM2b, |
| XMM3, XMM3b, |
| XMM4, XMM4b, |
| XMM5, XMM5b, |
| XMM6, XMM6b, |
| XMM7, XMM7b |
| #ifdef _LP64 |
| ,XMM8, XMM8b, |
| XMM9, XMM9b, |
| XMM10, XMM10b, |
| XMM11, XMM11b, |
| XMM12, XMM12b, |
| XMM13, XMM13b, |
| XMM14, XMM14b, |
| XMM15, XMM15b |
| #endif |
| ); |
| |
| // Class for evex double registers |
| reg_class double_reg_evex(XMM0, XMM0b, |
| XMM1, XMM1b, |
| XMM2, XMM2b, |
| XMM3, XMM3b, |
| XMM4, XMM4b, |
| XMM5, XMM5b, |
| XMM6, XMM6b, |
| XMM7, XMM7b |
| #ifdef _LP64 |
| ,XMM8, XMM8b, |
| XMM9, XMM9b, |
| XMM10, XMM10b, |
| XMM11, XMM11b, |
| XMM12, XMM12b, |
| XMM13, XMM13b, |
| XMM14, XMM14b, |
| XMM15, XMM15b, |
| XMM16, XMM16b, |
| XMM17, XMM17b, |
| XMM18, XMM18b, |
| XMM19, XMM19b, |
| XMM20, XMM20b, |
| XMM21, XMM21b, |
| XMM22, XMM22b, |
| XMM23, XMM23b, |
| XMM24, XMM24b, |
| XMM25, XMM25b, |
| XMM26, XMM26b, |
| XMM27, XMM27b, |
| XMM28, XMM28b, |
| XMM29, XMM29b, |
| XMM30, XMM30b, |
| XMM31, XMM31b |
| #endif |
| ); |
| |
| reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| |
| // Class for pre evex 32bit vector registers |
| reg_class vectors_reg_legacy(XMM0, |
| XMM1, |
| XMM2, |
| XMM3, |
| XMM4, |
| XMM5, |
| XMM6, |
| XMM7 |
| #ifdef _LP64 |
| ,XMM8, |
| XMM9, |
| XMM10, |
| XMM11, |
| XMM12, |
| XMM13, |
| XMM14, |
| XMM15 |
| #endif |
| ); |
| |
| // Class for evex 32bit vector registers |
| reg_class vectors_reg_evex(XMM0, |
| XMM1, |
| XMM2, |
| XMM3, |
| XMM4, |
| XMM5, |
| XMM6, |
| XMM7 |
| #ifdef _LP64 |
| ,XMM8, |
| XMM9, |
| XMM10, |
| XMM11, |
| XMM12, |
| XMM13, |
| XMM14, |
| XMM15, |
| XMM16, |
| XMM17, |
| XMM18, |
| XMM19, |
| XMM20, |
| XMM21, |
| XMM22, |
| XMM23, |
| XMM24, |
| XMM25, |
| XMM26, |
| XMM27, |
| XMM28, |
| XMM29, |
| XMM30, |
| XMM31 |
| #endif |
| ); |
| |
| reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| |
| // Class for all 64bit vector registers |
| reg_class vectord_reg_legacy(XMM0, XMM0b, |
| XMM1, XMM1b, |
| XMM2, XMM2b, |
| XMM3, XMM3b, |
| XMM4, XMM4b, |
| XMM5, XMM5b, |
| XMM6, XMM6b, |
| XMM7, XMM7b |
| #ifdef _LP64 |
| ,XMM8, XMM8b, |
| XMM9, XMM9b, |
| XMM10, XMM10b, |
| XMM11, XMM11b, |
| XMM12, XMM12b, |
| XMM13, XMM13b, |
| XMM14, XMM14b, |
| XMM15, XMM15b |
| #endif |
| ); |
| |
| // Class for all 64bit vector registers |
| reg_class vectord_reg_evex(XMM0, XMM0b, |
| XMM1, XMM1b, |
| XMM2, XMM2b, |
| XMM3, XMM3b, |
| XMM4, XMM4b, |
| XMM5, XMM5b, |
| XMM6, XMM6b, |
| XMM7, XMM7b |
| #ifdef _LP64 |
| ,XMM8, XMM8b, |
| XMM9, XMM9b, |
| XMM10, XMM10b, |
| XMM11, XMM11b, |
| XMM12, XMM12b, |
| XMM13, XMM13b, |
| XMM14, XMM14b, |
| XMM15, XMM15b, |
| XMM16, XMM16b, |
| XMM17, XMM17b, |
| XMM18, XMM18b, |
| XMM19, XMM19b, |
| XMM20, XMM20b, |
| XMM21, XMM21b, |
| XMM22, XMM22b, |
| XMM23, XMM23b, |
| XMM24, XMM24b, |
| XMM25, XMM25b, |
| XMM26, XMM26b, |
| XMM27, XMM27b, |
| XMM28, XMM28b, |
| XMM29, XMM29b, |
| XMM30, XMM30b, |
| XMM31, XMM31b |
| #endif |
| ); |
| |
| reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| |
| // Class for all 128bit vector registers |
| reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, |
| XMM1, XMM1b, XMM1c, XMM1d, |
| XMM2, XMM2b, XMM2c, XMM2d, |
| XMM3, XMM3b, XMM3c, XMM3d, |
| XMM4, XMM4b, XMM4c, XMM4d, |
| XMM5, XMM5b, XMM5c, XMM5d, |
| XMM6, XMM6b, XMM6c, XMM6d, |
| XMM7, XMM7b, XMM7c, XMM7d |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, |
| XMM9, XMM9b, XMM9c, XMM9d, |
| XMM10, XMM10b, XMM10c, XMM10d, |
| XMM11, XMM11b, XMM11c, XMM11d, |
| XMM12, XMM12b, XMM12c, XMM12d, |
| XMM13, XMM13b, XMM13c, XMM13d, |
| XMM14, XMM14b, XMM14c, XMM14d, |
| XMM15, XMM15b, XMM15c, XMM15d |
| #endif |
| ); |
| |
| // Class for all 128bit vector registers |
| reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, |
| XMM1, XMM1b, XMM1c, XMM1d, |
| XMM2, XMM2b, XMM2c, XMM2d, |
| XMM3, XMM3b, XMM3c, XMM3d, |
| XMM4, XMM4b, XMM4c, XMM4d, |
| XMM5, XMM5b, XMM5c, XMM5d, |
| XMM6, XMM6b, XMM6c, XMM6d, |
| XMM7, XMM7b, XMM7c, XMM7d |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, |
| XMM9, XMM9b, XMM9c, XMM9d, |
| XMM10, XMM10b, XMM10c, XMM10d, |
| XMM11, XMM11b, XMM11c, XMM11d, |
| XMM12, XMM12b, XMM12c, XMM12d, |
| XMM13, XMM13b, XMM13c, XMM13d, |
| XMM14, XMM14b, XMM14c, XMM14d, |
| XMM15, XMM15b, XMM15c, XMM15d, |
| XMM16, XMM16b, XMM16c, XMM16d, |
| XMM17, XMM17b, XMM17c, XMM17d, |
| XMM18, XMM18b, XMM18c, XMM18d, |
| XMM19, XMM19b, XMM19c, XMM19d, |
| XMM20, XMM20b, XMM20c, XMM20d, |
| XMM21, XMM21b, XMM21c, XMM21d, |
| XMM22, XMM22b, XMM22c, XMM22d, |
| XMM23, XMM23b, XMM23c, XMM23d, |
| XMM24, XMM24b, XMM24c, XMM24d, |
| XMM25, XMM25b, XMM25c, XMM25d, |
| XMM26, XMM26b, XMM26c, XMM26d, |
| XMM27, XMM27b, XMM27c, XMM27d, |
| XMM28, XMM28b, XMM28c, XMM28d, |
| XMM29, XMM29b, XMM29c, XMM29d, |
| XMM30, XMM30b, XMM30c, XMM30d, |
| XMM31, XMM31b, XMM31c, XMM31d |
| #endif |
| ); |
| |
| reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| |
| // Class for all 256bit vector registers |
| reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, |
| XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, |
| XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, |
| XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, |
| XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, |
| XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, |
| XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, |
| XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, |
| XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, |
| XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, |
| XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, |
| XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, |
| XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, |
| XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, |
| XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h |
| #endif |
| ); |
| |
| // Class for all 256bit vector registers |
| reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, |
| XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, |
| XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, |
| XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, |
| XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, |
| XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, |
| XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, |
| XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, |
| XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, |
| XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, |
| XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, |
| XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, |
| XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, |
| XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, |
| XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, |
| XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, |
| XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, |
| XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, |
| XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, |
| XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, |
| XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, |
| XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, |
| XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, |
| XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, |
| XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, |
| XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, |
| XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, |
| XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, |
| XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, |
| XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, |
| XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h |
| #endif |
| ); |
| |
| reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); |
| |
| // Class for all 512bit vector registers |
| reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, |
| XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, |
| XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, |
| XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, |
| XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, |
| XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, |
| XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, |
| XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p |
| #ifdef _LP64 |
| ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, |
| XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, |
| XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, |
| XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, |
| XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, |
| XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, |
| XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, |
| XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p |
| ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, |
| XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, |
| XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, |
| XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, |
| XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, |
| XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, |
| XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, |
| XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, |
| XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, |
| XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, |
| XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, |
| XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, |
| XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, |
| XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, |
| XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, |
| XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p |
| #endif |
| ); |
| |
| %} |
| |
| |
| //----------SOURCE BLOCK------------------------------------------------------- |
| // This is a block of C++ code which provides values, functions, and |
| // definitions necessary in the rest of the architecture description |
| |
| source_hpp %{ |
| // Header information of the source block. |
| // Method declarations/definitions which are used outside |
| // the ad-scope can conveniently be defined here. |
| // |
| // To keep related declarations/definitions/uses close together, |
| // we switch between source %{ }% and source_hpp %{ }% freely as needed. |
| |
| class NativeJump; |
| |
| class CallStubImpl { |
| |
| //-------------------------------------------------------------- |
| //---< Used for optimization in Compile::shorten_branches >--- |
| //-------------------------------------------------------------- |
| |
| public: |
| // Size of call trampoline stub. |
| static uint size_call_trampoline() { |
| return 0; // no call trampolines on this platform |
| } |
| |
| // number of relocations needed by a call trampoline stub |
| static uint reloc_call_trampoline() { |
| return 0; // no call trampolines on this platform |
| } |
| }; |
| |
| class HandlerImpl { |
| |
| public: |
| |
| static int emit_exception_handler(CodeBuffer &cbuf); |
| static int emit_deopt_handler(CodeBuffer& cbuf); |
| |
| static uint size_exception_handler() { |
| // NativeCall instruction size is the same as NativeJump. |
| // exception handler starts out as jump and can be patched to |
| // a call be deoptimization. (4932387) |
| // Note that this value is also credited (in output.cpp) to |
| // the size of the code section. |
| return NativeJump::instruction_size; |
| } |
| |
| #ifdef _LP64 |
| static uint size_deopt_handler() { |
| // three 5 byte instructions |
| return 15; |
| } |
| #else |
| static uint size_deopt_handler() { |
| // NativeCall instruction size is the same as NativeJump. |
| // exception handler starts out as jump and can be patched to |
| // a call be deoptimization. (4932387) |
| // Note that this value is also credited (in output.cpp) to |
| // the size of the code section. |
| return 5 + NativeJump::instruction_size; // pushl(); jmp; |
| } |
| #endif |
| }; |
| |
| %} // end source_hpp |
| |
| source %{ |
| |
| // Emit exception handler code. |
| // Stuff framesize into a register and call a VM stub routine. |
| int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { |
| |
| // Note that the code buffer's insts_mark is always relative to insts. |
| // That's why we must use the macroassembler to generate a handler. |
| MacroAssembler _masm(&cbuf); |
| address base = __ start_a_stub(size_exception_handler()); |
| if (base == NULL) { |
| ciEnv::current()->record_failure("CodeCache is full"); |
| return 0; // CodeBuffer::expand failed |
| } |
| int offset = __ offset(); |
| __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); |
| assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); |
| __ end_a_stub(); |
| return offset; |
| } |
| |
| // Emit deopt handler code. |
| int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { |
| |
| // Note that the code buffer's insts_mark is always relative to insts. |
| // That's why we must use the macroassembler to generate a handler. |
| MacroAssembler _masm(&cbuf); |
| address base = __ start_a_stub(size_deopt_handler()); |
| if (base == NULL) { |
| ciEnv::current()->record_failure("CodeCache is full"); |
| return 0; // CodeBuffer::expand failed |
| } |
| int offset = __ offset(); |
| |
| #ifdef _LP64 |
| address the_pc = (address) __ pc(); |
| Label next; |
| // push a "the_pc" on the stack without destroying any registers |
| // as they all may be live. |
| |
| // push address of "next" |
| __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 |
| __ bind(next); |
| // adjust it so it matches "the_pc" |
| __ subptr(Address(rsp, 0), __ offset() - offset); |
| #else |
| InternalAddress here(__ pc()); |
| __ pushptr(here.addr()); |
| #endif |
| |
| __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); |
| assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); |
| __ end_a_stub(); |
| return offset; |
| } |
| |
| |
| //============================================================================= |
| |
| // Float masks come from different places depending on platform. |
| #ifdef _LP64 |
| static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } |
| static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } |
| static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } |
| static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } |
| #else |
| static address float_signmask() { return (address)float_signmask_pool; } |
| static address float_signflip() { return (address)float_signflip_pool; } |
| static address double_signmask() { return (address)double_signmask_pool; } |
| static address double_signflip() { return (address)double_signflip_pool; } |
| #endif |
| |
| |
| const bool Matcher::match_rule_supported(int opcode) { |
| if (!has_match_rule(opcode)) |
| return false; |
| |
| bool ret_value = true; |
| switch (opcode) { |
| case Op_PopCountI: |
| case Op_PopCountL: |
| if (!UsePopCountInstruction) |
| ret_value = false; |
| break; |
| case Op_MulVI: |
| if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX |
| ret_value = false; |
| break; |
| case Op_MulVL: |
| case Op_MulReductionVL: |
| if (VM_Version::supports_avx512dq() == false) |
| ret_value = false; |
| break; |
| case Op_AddReductionVL: |
| if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here |
| ret_value = false; |
| break; |
| case Op_AddReductionVI: |
| if (UseSSE < 3) // requires at least SSE3 |
| ret_value = false; |
| break; |
| case Op_MulReductionVI: |
| if (UseSSE < 4) // requires at least SSE4 |
| ret_value = false; |
| break; |
| case Op_AddReductionVF: |
| case Op_AddReductionVD: |
| case Op_MulReductionVF: |
| case Op_MulReductionVD: |
| if (UseSSE < 1) // requires at least SSE |
| ret_value = false; |
| break; |
| case Op_SqrtVD: |
| if (UseAVX < 1) // enabled for AVX only |
| ret_value = false; |
| break; |
| case Op_CompareAndSwapL: |
| #ifdef _LP64 |
| case Op_CompareAndSwapP: |
| #endif |
| if (!VM_Version::supports_cx8()) |
| ret_value = false; |
| break; |
| } |
| |
| return ret_value; // Per default match rules are supported. |
| } |
| |
| const int Matcher::float_pressure(int default_pressure_threshold) { |
| int float_pressure_threshold = default_pressure_threshold; |
| #ifdef _LP64 |
| if (UseAVX > 2) { |
| // Increase pressure threshold on machines with AVX3 which have |
| // 2x more XMM registers. |
| float_pressure_threshold = default_pressure_threshold * 2; |
| } |
| #endif |
| return float_pressure_threshold; |
| } |
| |
| // Max vector size in bytes. 0 if not supported. |
| const int Matcher::vector_width_in_bytes(BasicType bt) { |
| assert(is_java_primitive(bt), "only primitive type vectors"); |
| if (UseSSE < 2) return 0; |
| // SSE2 supports 128bit vectors for all types. |
| // AVX2 supports 256bit vectors for all types. |
| // AVX2/EVEX supports 512bit vectors for all types. |
| int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; |
| // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. |
| if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) |
| size = (UseAVX > 2) ? 64 : 32; |
| // Use flag to limit vector size. |
| size = MIN2(size,(int)MaxVectorSize); |
| // Minimum 2 values in vector (or 4 for bytes). |
| switch (bt) { |
| case T_DOUBLE: |
| case T_LONG: |
| if (size < 16) return 0; |
| break; |
| case T_FLOAT: |
| case T_INT: |
| if (size < 8) return 0; |
| break; |
| case T_BOOLEAN: |
| if (size < 4) return 0; |
| break; |
| case T_CHAR: |
| if (size < 4) return 0; |
| break; |
| case T_BYTE: |
| if (size < 4) return 0; |
| if ((size > 32) && !VM_Version::supports_avx512bw()) return 0; |
| break; |
| case T_SHORT: |
| if (size < 4) return 0; |
| if ((size > 16) && !VM_Version::supports_avx512bw()) return 0; |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| return size; |
| } |
| |
| // Limits on vector size (number of elements) loaded into vector. |
| const int Matcher::max_vector_size(const BasicType bt) { |
| return vector_width_in_bytes(bt)/type2aelembytes(bt); |
| } |
| const int Matcher::min_vector_size(const BasicType bt) { |
| int max_size = max_vector_size(bt); |
| // Min size which can be loaded into vector is 4 bytes. |
| int size = (type2aelembytes(bt) == 1) ? 4 : 2; |
| return MIN2(size,max_size); |
| } |
| |
| // Vector ideal reg corresponding to specidied size in bytes |
| const int Matcher::vector_ideal_reg(int size) { |
| assert(MaxVectorSize >= size, ""); |
| switch(size) { |
| case 4: return Op_VecS; |
| case 8: return Op_VecD; |
| case 16: return Op_VecX; |
| case 32: return Op_VecY; |
| case 64: return Op_VecZ; |
| } |
| ShouldNotReachHere(); |
| return 0; |
| } |
| |
| // Only lowest bits of xmm reg are used for vector shift count. |
| const int Matcher::vector_shift_count_ideal_reg(int size) { |
| return Op_VecS; |
| } |
| |
| // x86 supports misaligned vectors store/load. |
| const bool Matcher::misaligned_vectors_ok() { |
| return !AlignVector; // can be changed by flag |
| } |
| |
| // x86 AES instructions are compatible with SunJCE expanded |
| // keys, hence we do not need to pass the original key to stubs |
| const bool Matcher::pass_original_key_for_aes() { |
| return false; |
| } |
| |
| // Helper methods for MachSpillCopyNode::implementation(). |
| static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, |
| int src_hi, int dst_hi, uint ireg, outputStream* st) { |
| // In 64-bit VM size calculation is very complex. Emitting instructions |
| // into scratch buffer is used to get size in 64-bit VM. |
| LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) |
| assert(ireg == Op_VecS || // 32bit vector |
| (src_lo & 1) == 0 && (src_lo + 1) == src_hi && |
| (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, |
| "no non-adjacent vector moves" ); |
| if (cbuf) { |
| MacroAssembler _masm(cbuf); |
| int offset = __ offset(); |
| switch (ireg) { |
| case Op_VecS: // copy whole register |
| case Op_VecD: |
| case Op_VecX: |
| __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); |
| break; |
| case Op_VecY: |
| __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); |
| break; |
| case Op_VecZ: |
| __ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| int size = __ offset() - offset; |
| #ifdef ASSERT |
| // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. |
| assert(!do_size || size == 4, "incorrect size calculattion"); |
| #endif |
| return size; |
| #ifndef PRODUCT |
| } else if (!do_size) { |
| switch (ireg) { |
| case Op_VecS: |
| case Op_VecD: |
| case Op_VecX: |
| st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
| break; |
| case Op_VecY: |
| case Op_VecZ: |
| st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| #endif |
| } |
| // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. |
| return (UseAVX > 2) ? 6 : 4; |
| } |
| |
| static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, |
| int stack_offset, int reg, uint ireg, outputStream* st) { |
| // In 64-bit VM size calculation is very complex. Emitting instructions |
| // into scratch buffer is used to get size in 64-bit VM. |
| LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) |
| if (cbuf) { |
| MacroAssembler _masm(cbuf); |
| int offset = __ offset(); |
| if (is_load) { |
| switch (ireg) { |
| case Op_VecS: |
| __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| break; |
| case Op_VecD: |
| __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| break; |
| case Op_VecX: |
| __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| break; |
| case Op_VecY: |
| __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); |
| break; |
| case Op_VecZ: |
| __ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } else { // store |
| switch (ireg) { |
| case Op_VecS: |
| __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| break; |
| case Op_VecD: |
| __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| break; |
| case Op_VecX: |
| __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| break; |
| case Op_VecY: |
| __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); |
| break; |
| case Op_VecZ: |
| __ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } |
| int size = __ offset() - offset; |
| #ifdef ASSERT |
| int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); |
| // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. |
| assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); |
| #endif |
| return size; |
| #ifndef PRODUCT |
| } else if (!do_size) { |
| if (is_load) { |
| switch (ireg) { |
| case Op_VecS: |
| st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
| break; |
| case Op_VecD: |
| st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
| break; |
| case Op_VecX: |
| st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
| break; |
| case Op_VecY: |
| case Op_VecZ: |
| st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } else { // store |
| switch (ireg) { |
| case Op_VecS: |
| st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
| break; |
| case Op_VecD: |
| st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
| break; |
| case Op_VecX: |
| st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
| break; |
| case Op_VecY: |
| case Op_VecZ: |
| st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| } |
| #endif |
| } |
| bool is_single_byte = false; |
| int vec_len = 0; |
| if ((UseAVX > 2) && (stack_offset != 0)) { |
| switch (ireg) { |
| case Op_VecS: |
| case Op_VecD: |
| case Op_VecX: |
| break; |
| case Op_VecY: |
| vec_len = 1; |
| break; |
| case Op_VecZ: |
| vec_len = 2; |
| break; |
| } |
| is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0); |
| } |
| int offset_size = 0; |
| int size = 5; |
| if (UseAVX > 2 ) { |
| if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { |
| offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); |
| size += 2; // Need an additional two bytes for EVEX encoding |
| } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { |
| offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); |
| } else { |
| offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); |
| size += 2; // Need an additional two bytes for EVEX encodding |
| } |
| } else { |
| offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); |
| } |
| // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. |
| return size+offset_size; |
| } |
| |
| static inline jfloat replicate4_imm(int con, int width) { |
| // Load a constant of "width" (in bytes) and replicate it to fill 32bit. |
| assert(width == 1 || width == 2, "only byte or short types here"); |
| int bit_width = width * 8; |
| jint val = con; |
| val &= (1 << bit_width) - 1; // mask off sign bits |
| while(bit_width < 32) { |
| val |= (val << bit_width); |
| bit_width <<= 1; |
| } |
| jfloat fval = *((jfloat*) &val); // coerce to float type |
| return fval; |
| } |
| |
| static inline jdouble replicate8_imm(int con, int width) { |
| // Load a constant of "width" (in bytes) and replicate it to fill 64bit. |
| assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); |
| int bit_width = width * 8; |
| jlong val = con; |
| val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits |
| while(bit_width < 64) { |
| val |= (val << bit_width); |
| bit_width <<= 1; |
| } |
| jdouble dval = *((jdouble*) &val); // coerce to double type |
| return dval; |
| } |
| |
| #ifndef PRODUCT |
| void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { |
| st->print("nop \t# %d bytes pad for loops and calls", _count); |
| } |
| #endif |
| |
| void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { |
| MacroAssembler _masm(&cbuf); |
| __ nop(_count); |
| } |
| |
| uint MachNopNode::size(PhaseRegAlloc*) const { |
| return _count; |
| } |
| |
| #ifndef PRODUCT |
| void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { |
| st->print("# breakpoint"); |
| } |
| #endif |
| |
| void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { |
| MacroAssembler _masm(&cbuf); |
| __ int3(); |
| } |
| |
| uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { |
| return MachNode::size(ra_); |
| } |
| |
| %} |
| |
| encode %{ |
| |
| enc_class call_epilog %{ |
| if (VerifyStackAtCalls) { |
| // Check that stack depth is unchanged: find majik cookie on stack |
| int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); |
| MacroAssembler _masm(&cbuf); |
| Label L; |
| __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); |
| __ jccb(Assembler::equal, L); |
| // Die if stack mismatch |
| __ int3(); |
| __ bind(L); |
| } |
| %} |
| |
| %} |
| |
| |
| //----------OPERANDS----------------------------------------------------------- |
| // Operand definitions must precede instruction definitions for correct parsing |
| // in the ADLC because operands constitute user defined types which are used in |
| // instruction definitions. |
| |
| // This one generically applies only for evex, so only one version |
| operand vecZ() %{ |
| constraint(ALLOC_IN_RC(vectorz_reg)); |
| match(VecZ); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) |
| |
| // ============================================================================ |
| |
| instruct ShouldNotReachHere() %{ |
| match(Halt); |
| format %{ "int3\t# ShouldNotReachHere" %} |
| ins_encode %{ |
| __ int3(); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // ============================================================================ |
| |
| instruct addF_reg(regF dst, regF src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (AddF dst src)); |
| |
| format %{ "addss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addss($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_mem(regF dst, memory src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (AddF dst (LoadF src))); |
| |
| format %{ "addss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addss($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_imm(regF dst, immF con) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (AddF dst con)); |
| format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addss($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddF src1 src2)); |
| |
| format %{ "vaddss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddF src1 (LoadF src2))); |
| |
| format %{ "vaddss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addF_reg_imm(regF dst, regF src, immF con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddF src con)); |
| |
| format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_reg(regD dst, regD src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (AddD dst src)); |
| |
| format %{ "addsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addsd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_mem(regD dst, memory src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (AddD dst (LoadD src))); |
| |
| format %{ "addsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addsd($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_imm(regD dst, immD con) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (AddD dst con)); |
| format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ addsd($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddD src1 src2)); |
| |
| format %{ "vaddsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddD src1 (LoadD src2))); |
| |
| format %{ "vaddsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct addD_reg_imm(regD dst, regD src, immD con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddD src con)); |
| |
| format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_reg(regF dst, regF src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (SubF dst src)); |
| |
| format %{ "subss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subss($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_mem(regF dst, memory src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (SubF dst (LoadF src))); |
| |
| format %{ "subss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subss($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_imm(regF dst, immF con) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (SubF dst con)); |
| format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subss($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubF src1 src2)); |
| |
| format %{ "vsubss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubF src1 (LoadF src2))); |
| |
| format %{ "vsubss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subF_reg_imm(regF dst, regF src, immF con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubF src con)); |
| |
| format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_reg(regD dst, regD src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (SubD dst src)); |
| |
| format %{ "subsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subsd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_mem(regD dst, memory src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (SubD dst (LoadD src))); |
| |
| format %{ "subsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subsd($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_imm(regD dst, immD con) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (SubD dst con)); |
| format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ subsd($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubD src1 src2)); |
| |
| format %{ "vsubsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubD src1 (LoadD src2))); |
| |
| format %{ "vsubsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct subD_reg_imm(regD dst, regD src, immD con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (SubD src con)); |
| |
| format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_reg(regF dst, regF src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (MulF dst src)); |
| |
| format %{ "mulss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulss($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_mem(regF dst, memory src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (MulF dst (LoadF src))); |
| |
| format %{ "mulss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulss($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_imm(regF dst, immF con) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (MulF dst con)); |
| format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulss($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulF src1 src2)); |
| |
| format %{ "vmulss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulF src1 (LoadF src2))); |
| |
| format %{ "vmulss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulF_reg_imm(regF dst, regF src, immF con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulF src con)); |
| |
| format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_reg(regD dst, regD src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (MulD dst src)); |
| |
| format %{ "mulsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulsd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_mem(regD dst, memory src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (MulD dst (LoadD src))); |
| |
| format %{ "mulsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulsd($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_imm(regD dst, immD con) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (MulD dst con)); |
| format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ mulsd($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulD src1 src2)); |
| |
| format %{ "vmulsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulD src1 (LoadD src2))); |
| |
| format %{ "vmulsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct mulD_reg_imm(regD dst, regD src, immD con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulD src con)); |
| |
| format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_reg(regF dst, regF src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (DivF dst src)); |
| |
| format %{ "divss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divss($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_mem(regF dst, memory src) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (DivF dst (LoadF src))); |
| |
| format %{ "divss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divss($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_imm(regF dst, immF con) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (DivF dst con)); |
| format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divss($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivF src1 src2)); |
| |
| format %{ "vdivss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivF src1 (LoadF src2))); |
| |
| format %{ "vdivss $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divF_reg_imm(regF dst, regF src, immF con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivF src con)); |
| |
| format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_reg(regD dst, regD src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (DivD dst src)); |
| |
| format %{ "divsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divsd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_mem(regD dst, memory src) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (DivD dst (LoadD src))); |
| |
| format %{ "divsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divsd($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_imm(regD dst, immD con) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (DivD dst con)); |
| format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ divsd($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivD src1 src2)); |
| |
| format %{ "vdivsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivD src1 (LoadD src2))); |
| |
| format %{ "vdivsd $dst, $src1, $src2" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct divD_reg_imm(regD dst, regD src, immD con) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (DivD src con)); |
| |
| format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct absF_reg(regF dst) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (AbsF dst)); |
| ins_cost(150); |
| format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} |
| ins_encode %{ |
| __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct absF_reg_reg(regF dst, regF src) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AbsF src)); |
| ins_cost(150); |
| format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vandps($dst$$XMMRegister, $src$$XMMRegister, |
| ExternalAddress(float_signmask()), vector_len); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct absD_reg(regD dst) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (AbsD dst)); |
| ins_cost(150); |
| format %{ "andpd $dst, [0x7fffffffffffffff]\t" |
| "# abs double by sign masking" %} |
| ins_encode %{ |
| __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct absD_reg_reg(regD dst, regD src) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AbsD src)); |
| ins_cost(150); |
| format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" |
| "# abs double by sign masking" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vandpd($dst$$XMMRegister, $src$$XMMRegister, |
| ExternalAddress(double_signmask()), vector_len); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct negF_reg(regF dst) %{ |
| predicate((UseSSE>=1) && (UseAVX == 0)); |
| match(Set dst (NegF dst)); |
| ins_cost(150); |
| format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} |
| ins_encode %{ |
| __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct negF_reg_reg(regF dst, regF src) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (NegF src)); |
| ins_cost(150); |
| format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} |
| ins_encode %{ |
| __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, |
| ExternalAddress(float_signflip())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct negD_reg(regD dst) %{ |
| predicate((UseSSE>=2) && (UseAVX == 0)); |
| match(Set dst (NegD dst)); |
| ins_cost(150); |
| format %{ "xorpd $dst, [0x8000000000000000]\t" |
| "# neg double by sign flipping" %} |
| ins_encode %{ |
| __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct negD_reg_reg(regD dst, regD src) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (NegD src)); |
| ins_cost(150); |
| format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" |
| "# neg double by sign flipping" %} |
| ins_encode %{ |
| __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, |
| ExternalAddress(double_signflip())); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct sqrtF_reg(regF dst, regF src) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); |
| |
| format %{ "sqrtss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct sqrtF_mem(regF dst, memory src) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); |
| |
| format %{ "sqrtss $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ sqrtss($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct sqrtF_imm(regF dst, immF con) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); |
| format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ sqrtss($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct sqrtD_reg(regD dst, regD src) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (SqrtD src)); |
| |
| format %{ "sqrtsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct sqrtD_mem(regD dst, memory src) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (SqrtD (LoadD src))); |
| |
| format %{ "sqrtsd $dst, $src" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ sqrtsd($dst$$XMMRegister, $src$$Address); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct sqrtD_imm(regD dst, immD con) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (SqrtD con)); |
| format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} |
| ins_cost(150); |
| ins_encode %{ |
| __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // ====================VECTOR INSTRUCTIONS===================================== |
| |
| // Load vectors (4 bytes long) |
| instruct loadV4(vecS dst, memory mem) %{ |
| predicate(n->as_LoadVector()->memory_size() == 4); |
| match(Set dst (LoadVector mem)); |
| ins_cost(125); |
| format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $mem$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Load vectors (8 bytes long) |
| instruct loadV8(vecD dst, memory mem) %{ |
| predicate(n->as_LoadVector()->memory_size() == 8); |
| match(Set dst (LoadVector mem)); |
| ins_cost(125); |
| format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $mem$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Load vectors (16 bytes long) |
| instruct loadV16(vecX dst, memory mem) %{ |
| predicate(n->as_LoadVector()->memory_size() == 16); |
| match(Set dst (LoadVector mem)); |
| ins_cost(125); |
| format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} |
| ins_encode %{ |
| __ movdqu($dst$$XMMRegister, $mem$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Load vectors (32 bytes long) |
| instruct loadV32(vecY dst, memory mem) %{ |
| predicate(n->as_LoadVector()->memory_size() == 32); |
| match(Set dst (LoadVector mem)); |
| ins_cost(125); |
| format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} |
| ins_encode %{ |
| __ vmovdqu($dst$$XMMRegister, $mem$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Load vectors (64 bytes long) |
| instruct loadV64(vecZ dst, memory mem) %{ |
| predicate(n->as_LoadVector()->memory_size() == 64); |
| match(Set dst (LoadVector mem)); |
| ins_cost(125); |
| format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Store vectors |
| instruct storeV4(memory mem, vecS src) %{ |
| predicate(n->as_StoreVector()->memory_size() == 4); |
| match(Set mem (StoreVector mem src)); |
| ins_cost(145); |
| format %{ "movd $mem,$src\t! store vector (4 bytes)" %} |
| ins_encode %{ |
| __ movdl($mem$$Address, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct storeV8(memory mem, vecD src) %{ |
| predicate(n->as_StoreVector()->memory_size() == 8); |
| match(Set mem (StoreVector mem src)); |
| ins_cost(145); |
| format %{ "movq $mem,$src\t! store vector (8 bytes)" %} |
| ins_encode %{ |
| __ movq($mem$$Address, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct storeV16(memory mem, vecX src) %{ |
| predicate(n->as_StoreVector()->memory_size() == 16); |
| match(Set mem (StoreVector mem src)); |
| ins_cost(145); |
| format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} |
| ins_encode %{ |
| __ movdqu($mem$$Address, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct storeV32(memory mem, vecY src) %{ |
| predicate(n->as_StoreVector()->memory_size() == 32); |
| match(Set mem (StoreVector mem src)); |
| ins_cost(145); |
| format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} |
| ins_encode %{ |
| __ vmovdqu($mem$$Address, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct storeV64(memory mem, vecZ src) %{ |
| predicate(n->as_StoreVector()->memory_size() == 64); |
| match(Set mem (StoreVector mem src)); |
| ins_cost(145); |
| format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ====================LEGACY REPLICATE======================================= |
| |
| instruct Repl4B_mem(vecS dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "punpcklbw $dst,$mem\n\t" |
| "pshuflw $dst,$dst,0x00\t! replicate4B" %} |
| ins_encode %{ |
| __ punpcklbw($dst$$XMMRegister, $mem$$Address); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8B_mem(vecD dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "punpcklbw $dst,$mem\n\t" |
| "pshuflw $dst,$dst,0x00\t! replicate8B" %} |
| ins_encode %{ |
| __ punpcklbw($dst$$XMMRegister, $mem$$Address); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16B(vecX dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB src)); |
| format %{ "movd $dst,$src\n\t" |
| "punpcklbw $dst,$dst\n\t" |
| "pshuflw $dst,$dst,0x00\n\t" |
| "punpcklqdq $dst,$dst\t! replicate16B" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16B_mem(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "punpcklbw $dst,$mem\n\t" |
| "pshuflw $dst,$dst,0x00\n\t" |
| "punpcklqdq $dst,$dst\t! replicate16B" %} |
| ins_encode %{ |
| __ punpcklbw($dst$$XMMRegister, $mem$$Address); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32B(vecY dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB src)); |
| format %{ "movd $dst,$src\n\t" |
| "punpcklbw $dst,$dst\n\t" |
| "pshuflw $dst,$dst,0x00\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate32B" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32B_mem(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "punpcklbw $dst,$mem\n\t" |
| "pshuflw $dst,$dst,0x00\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate32B" %} |
| ins_encode %{ |
| __ punpcklbw($dst$$XMMRegister, $mem$$Address); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16B_imm(vecX dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "punpcklqdq $dst,$dst\t! replicate16B($con)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32B_imm(vecY dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4S(vecD dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS src)); |
| format %{ "movd $dst,$src\n\t" |
| "pshuflw $dst,$dst,0x00\t! replicate4S" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4S_mem(vecD dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS (LoadS mem))); |
| format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} |
| ins_encode %{ |
| __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8S(vecX dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS src)); |
| format %{ "movd $dst,$src\n\t" |
| "pshuflw $dst,$dst,0x00\n\t" |
| "punpcklqdq $dst,$dst\t! replicate8S" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8S_mem(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS (LoadS mem))); |
| format %{ "pshuflw $dst,$mem,0x00\n\t" |
| "punpcklqdq $dst,$dst\t! replicate8S" %} |
| ins_encode %{ |
| __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8S_imm(vecX dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "punpcklqdq $dst,$dst\t! replicate8S($con)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16S(vecY dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS src)); |
| format %{ "movd $dst,$src\n\t" |
| "pshuflw $dst,$dst,0x00\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate16S" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16S_mem(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS (LoadS mem))); |
| format %{ "pshuflw $dst,$mem,0x00\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate16S" %} |
| ins_encode %{ |
| __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16S_imm(vecY dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4I(vecX dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI src)); |
| format %{ "movd $dst,$src\n\t" |
| "pshufd $dst,$dst,0x00\t! replicate4I" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4I_mem(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI (LoadI mem))); |
| format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8I(vecY dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI src)); |
| format %{ "movd $dst,$src\n\t" |
| "pshufd $dst,$dst,0x00\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate8I" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8I_mem(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI (LoadI mem))); |
| format %{ "pshufd $dst,$mem,0x00\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate8I" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4I_imm(vecX dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI con)); |
| format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" |
| "punpcklqdq $dst,$dst" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8I_imm(vecY dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI con)); |
| format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Long could be loaded into xmm register directly from memory. |
| instruct Repl2L_mem(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateL (LoadL mem))); |
| format %{ "movq $dst,$mem\n\t" |
| "punpcklqdq $dst,$dst\t! replicate2L" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $mem$$Address); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Replicate long (8 byte) scalar to be vector |
| #ifdef _LP64 |
| instruct Repl4L(vecY dst, rRegL src) %{ |
| predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateL src)); |
| format %{ "movdq $dst,$src\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate4L" %} |
| ins_encode %{ |
| __ movdq($dst$$XMMRegister, $src$$Register); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #else // _LP64 |
| instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ |
| predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateL src)); |
| effect(TEMP dst, USE src, TEMP tmp); |
| format %{ "movdl $dst,$src.lo\n\t" |
| "movdl $tmp,$src.hi\n\t" |
| "punpckldq $dst,$tmp\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate4L" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif // _LP64 |
| |
| instruct Repl4L_imm(vecY dst, immL con) %{ |
| predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateL con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress($con)); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4L_mem(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateL (LoadL mem))); |
| format %{ "movq $dst,$mem\n\t" |
| "punpcklqdq $dst,$dst\n\t" |
| "vinserti128h $dst,$dst,$dst\t! replicate4L" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $mem$$Address); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl2F_mem(vecD dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateF (LoadF mem))); |
| format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4F_mem(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateF (LoadF mem))); |
| format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8F(vecY dst, regF src) %{ |
| predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateF src)); |
| format %{ "pshufd $dst,$src,0x00\n\t" |
| "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); |
| __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8F_mem(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateF (LoadF mem))); |
| format %{ "pshufd $dst,$mem,0x00\n\t" |
| "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); |
| __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl2F_zero(vecD dst, immF0 zero) %{ |
| predicate(n->as_Vector()->length() == 2 && UseAVX < 3); |
| match(Set dst (ReplicateF zero)); |
| format %{ "xorps $dst,$dst\t! replicate2F zero" %} |
| ins_encode %{ |
| __ xorps($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4F_zero(vecX dst, immF0 zero) %{ |
| predicate(n->as_Vector()->length() == 4 && UseAVX < 3); |
| match(Set dst (ReplicateF zero)); |
| format %{ "xorps $dst,$dst\t! replicate4F zero" %} |
| ins_encode %{ |
| __ xorps($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl8F_zero(vecY dst, immF0 zero) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX < 3); |
| match(Set dst (ReplicateF zero)); |
| format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl2D_mem(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateD (LoadD mem))); |
| format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4D(vecY dst, regD src) %{ |
| predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateD src)); |
| format %{ "pshufd $dst,$src,0x44\n\t" |
| "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); |
| __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4D_mem(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateD (LoadD mem))); |
| format %{ "pshufd $dst,$mem,0x44\n\t" |
| "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); |
| __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Replicate double (8 byte) scalar zero to be vector |
| instruct Repl2D_zero(vecX dst, immD0 zero) %{ |
| predicate(n->as_Vector()->length() == 2 && UseAVX < 3); |
| match(Set dst (ReplicateD zero)); |
| format %{ "xorpd $dst,$dst\t! replicate2D zero" %} |
| ins_encode %{ |
| __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4D_zero(vecY dst, immD0 zero) %{ |
| predicate(n->as_Vector()->length() == 4 && UseAVX < 3); |
| match(Set dst (ReplicateD zero)); |
| format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // ====================GENERIC REPLICATE========================================== |
| |
| // Replicate byte scalar to be vector |
| instruct Repl4B(vecS dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (ReplicateB src)); |
| format %{ "movd $dst,$src\n\t" |
| "punpcklbw $dst,$dst\n\t" |
| "pshuflw $dst,$dst,0x00\t! replicate4B" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8B(vecD dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (ReplicateB src)); |
| format %{ "movd $dst,$src\n\t" |
| "punpcklbw $dst,$dst\n\t" |
| "pshuflw $dst,$dst,0x00\t! replicate8B" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Replicate byte scalar immediate to be vector by loading from const table. |
| instruct Repl4B_imm(vecS dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (ReplicateB con)); |
| format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8B_imm(vecD dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (ReplicateB con)); |
| format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Replicate byte scalar zero to be vector |
| instruct Repl4B_zero(vecS dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (ReplicateB zero)); |
| format %{ "pxor $dst,$dst\t! replicate4B zero" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl8B_zero(vecD dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (ReplicateB zero)); |
| format %{ "pxor $dst,$dst\t! replicate8B zero" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl16B_zero(vecX dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 16); |
| match(Set dst (ReplicateB zero)); |
| format %{ "pxor $dst,$dst\t! replicate16B zero" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl32B_zero(vecY dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 32); |
| match(Set dst (ReplicateB zero)); |
| format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} |
| ins_encode %{ |
| // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). |
| int vector_len = 1; |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Replicate char/short (2 byte) scalar to be vector |
| instruct Repl2S(vecS dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateS src)); |
| format %{ "movd $dst,$src\n\t" |
| "pshuflw $dst,$dst,0x00\t! replicate2S" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. |
| instruct Repl2S_imm(vecS dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateS con)); |
| format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4S_imm(vecD dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (ReplicateS con)); |
| format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Replicate char/short (2 byte) scalar zero to be vector |
| instruct Repl2S_zero(vecS dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateS zero)); |
| format %{ "pxor $dst,$dst\t! replicate2S zero" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4S_zero(vecD dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (ReplicateS zero)); |
| format %{ "pxor $dst,$dst\t! replicate4S zero" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl8S_zero(vecX dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (ReplicateS zero)); |
| format %{ "pxor $dst,$dst\t! replicate8S zero" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl16S_zero(vecY dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 16); |
| match(Set dst (ReplicateS zero)); |
| format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} |
| ins_encode %{ |
| // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). |
| int vector_len = 1; |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Replicate integer (4 byte) scalar to be vector |
| instruct Repl2I(vecD dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateI src)); |
| format %{ "movd $dst,$src\n\t" |
| "pshufd $dst,$dst,0x00\t! replicate2I" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Integer could be loaded into xmm register directly from memory. |
| instruct Repl2I_mem(vecD dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateI (LoadI mem))); |
| format %{ "movd $dst,$mem\n\t" |
| "pshufd $dst,$dst,0x00\t! replicate2I" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $mem$$Address); |
| __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. |
| instruct Repl2I_imm(vecD dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateI con)); |
| format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Replicate integer (4 byte) scalar zero to be vector |
| instruct Repl2I_zero(vecD dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateI zero)); |
| format %{ "pxor $dst,$dst\t! replicate2I" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4I_zero(vecX dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (ReplicateI zero)); |
| format %{ "pxor $dst,$dst\t! replicate4I zero)" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl8I_zero(vecY dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (ReplicateI zero)); |
| format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} |
| ins_encode %{ |
| // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). |
| int vector_len = 1; |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Replicate long (8 byte) scalar to be vector |
| #ifdef _LP64 |
| instruct Repl2L(vecX dst, rRegL src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateL src)); |
| format %{ "movdq $dst,$src\n\t" |
| "punpcklqdq $dst,$dst\t! replicate2L" %} |
| ins_encode %{ |
| __ movdq($dst$$XMMRegister, $src$$Register); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #else // _LP64 |
| instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateL src)); |
| effect(TEMP dst, USE src, TEMP tmp); |
| format %{ "movdl $dst,$src.lo\n\t" |
| "movdl $tmp,$src.hi\n\t" |
| "punpckldq $dst,$tmp\n\t" |
| "punpcklqdq $dst,$dst\t! replicate2L"%} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif // _LP64 |
| |
| // Replicate long (8 byte) scalar immediate to be vector by loading from const table. |
| instruct Repl2L_imm(vecX dst, immL con) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateL con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "punpcklqdq $dst,$dst\t! replicate2L($con)" %} |
| ins_encode %{ |
| __ movq($dst$$XMMRegister, $constantaddress($con)); |
| __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Replicate long (8 byte) scalar zero to be vector |
| instruct Repl2L_zero(vecX dst, immL0 zero) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateL zero)); |
| format %{ "pxor $dst,$dst\t! replicate2L zero" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4L_zero(vecY dst, immL0 zero) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (ReplicateL zero)); |
| format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} |
| ins_encode %{ |
| // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). |
| int vector_len = 1; |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Replicate float (4 byte) scalar to be vector |
| instruct Repl2F(vecD dst, regF src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateF src)); |
| format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4F(vecX dst, regF src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (ReplicateF src)); |
| format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Replicate double (8 bytes) scalar to be vector |
| instruct Repl2D(vecX dst, regD src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (ReplicateD src)); |
| format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} |
| ins_encode %{ |
| __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ====================EVEX REPLICATE============================================= |
| |
| instruct Repl4B_mem_evex(vecS dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8B_mem_evex(vecD dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16B_evex(vecX dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB src)); |
| format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16B_mem_evex(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32B_evex(vecY dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB src)); |
| format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32B_mem_evex(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl64B_evex(vecZ dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); |
| match(Set dst (ReplicateB src)); |
| format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); |
| match(Set dst (ReplicateB (LoadB mem))); |
| format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16B_imm_evex(vecX dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "vpbroadcastb $dst,$dst\t! replicate16B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); |
| __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32B_imm_evex(vecY dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateB con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "vpbroadcastb $dst,$dst\t! replicate32B" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); |
| __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl64B_imm_evex(vecZ dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); |
| match(Set dst (ReplicateB con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "vpbroadcastb $dst,$dst\t! upper replicate64B" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); |
| __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 64 && UseAVX > 2); |
| match(Set dst (ReplicateB zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} |
| ins_encode %{ |
| // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4S_evex(vecD dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS src)); |
| format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4S_mem_evex(vecD dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS (LoadS mem))); |
| format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8S_evex(vecX dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS src)); |
| format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8S_mem_evex(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS (LoadS mem))); |
| format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16S_evex(vecY dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS src)); |
| format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16S_mem_evex(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS (LoadS mem))); |
| format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32S_evex(vecZ dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); |
| match(Set dst (ReplicateS src)); |
| format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); |
| match(Set dst (ReplicateS (LoadS mem))); |
| format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8S_imm_evex(vecX dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "vpbroadcastw $dst,$dst\t! replicate8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); |
| __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16S_imm_evex(vecY dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); |
| match(Set dst (ReplicateS con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "vpbroadcastw $dst,$dst\t! replicate16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); |
| __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32S_imm_evex(vecZ dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); |
| match(Set dst (ReplicateS con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "vpbroadcastw $dst,$dst\t! replicate32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); |
| __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 32 && UseAVX > 2); |
| match(Set dst (ReplicateS zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} |
| ins_encode %{ |
| // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4I_evex(vecX dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI src)); |
| format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4I_mem_evex(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI (LoadI mem))); |
| format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8I_evex(vecY dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI src)); |
| format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8I_mem_evex(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI (LoadI mem))); |
| format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16I_evex(vecZ dst, rRegI src) %{ |
| predicate(n->as_Vector()->length() == 16 && UseAVX > 2); |
| match(Set dst (ReplicateI src)); |
| format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 16 && UseAVX > 2); |
| match(Set dst (ReplicateI (LoadI mem))); |
| format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4I_imm_evex(vecX dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI con)); |
| format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" |
| "vpbroadcastd $dst,$dst\t! replicate4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); |
| __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8I_imm_evex(vecY dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateI con)); |
| format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" |
| "vpbroadcastd $dst,$dst\t! replicate8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); |
| __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16I_imm_evex(vecZ dst, immI con) %{ |
| predicate(n->as_Vector()->length() == 16 && UseAVX > 2); |
| match(Set dst (ReplicateI con)); |
| format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" |
| "vpbroadcastd $dst,$dst\t! replicate16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); |
| __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ |
| predicate(n->as_Vector()->length() == 16 && UseAVX > 2); |
| match(Set dst (ReplicateI zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} |
| ins_encode %{ |
| // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Replicate long (8 byte) scalar to be vector |
| #ifdef _LP64 |
| instruct Repl4L_evex(vecY dst, rRegL src) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateL src)); |
| format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8L_evex(vecZ dst, rRegL src) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 2); |
| match(Set dst (ReplicateL src)); |
| format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #else // _LP64 |
| instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateL src)); |
| effect(TEMP dst, USE src, TEMP tmp); |
| format %{ "movdl $dst,$src.lo\n\t" |
| "movdl $tmp,$src.hi\n\t" |
| "punpckldq $dst,$tmp\n\t" |
| "vpbroadcastq $dst,$dst\t! replicate4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 2); |
| match(Set dst (ReplicateL src)); |
| effect(TEMP dst, USE src, TEMP tmp); |
| format %{ "movdl $dst,$src.lo\n\t" |
| "movdl $tmp,$src.hi\n\t" |
| "punpckldq $dst,$tmp\n\t" |
| "vpbroadcastq $dst,$dst\t! replicate8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif // _LP64 |
| |
| instruct Repl4L_imm_evex(vecY dst, immL con) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateL con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "vpbroadcastq $dst,$dst\t! replicate4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ movq($dst$$XMMRegister, $constantaddress($con)); |
| __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8L_imm_evex(vecZ dst, immL con) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 2); |
| match(Set dst (ReplicateL con)); |
| format %{ "movq $dst,[$constantaddress]\n\t" |
| "vpbroadcastq $dst,$dst\t! replicate8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ movq($dst$$XMMRegister, $constantaddress($con)); |
| __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl2L_mem_evex(vecX dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateL (LoadL mem))); |
| format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4L_mem_evex(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateL (LoadL mem))); |
| format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 2); |
| match(Set dst (ReplicateL (LoadL mem))); |
| format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 2); |
| match(Set dst (ReplicateL zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} |
| ins_encode %{ |
| // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl8F_evex(vecY dst, regF src) %{ |
| predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateF src)); |
| format %{ "vbroadcastss $dst,$src\t! replicate8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8F_mem_evex(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateF (LoadF mem))); |
| format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16F_evex(vecZ dst, regF src) %{ |
| predicate(n->as_Vector()->length() == 16 && UseAVX > 2); |
| match(Set dst (ReplicateF src)); |
| format %{ "vbroadcastss $dst,$src\t! replicate16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 16 && UseAVX > 2); |
| match(Set dst (ReplicateF (LoadF mem))); |
| format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ |
| predicate(n->as_Vector()->length() == 2 && UseAVX > 2); |
| match(Set dst (ReplicateF zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} |
| ins_encode %{ |
| // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ |
| predicate(n->as_Vector()->length() == 4 && UseAVX > 2); |
| match(Set dst (ReplicateF zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} |
| ins_encode %{ |
| // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 2); |
| match(Set dst (ReplicateF zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} |
| ins_encode %{ |
| // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ |
| predicate(n->as_Vector()->length() == 16 && UseAVX > 2); |
| match(Set dst (ReplicateF zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} |
| ins_encode %{ |
| // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4D_evex(vecY dst, regD src) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateD src)); |
| format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl4D_mem_evex(vecY dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); |
| match(Set dst (ReplicateD (LoadD mem))); |
| format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8D_evex(vecZ dst, regD src) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 2); |
| match(Set dst (ReplicateD src)); |
| format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 2); |
| match(Set dst (ReplicateD (LoadD mem))); |
| format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ |
| predicate(n->as_Vector()->length() == 2 && UseAVX > 2); |
| match(Set dst (ReplicateD zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} |
| ins_encode %{ |
| // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ |
| predicate(n->as_Vector()->length() == 4 && UseAVX > 2); |
| match(Set dst (ReplicateD zero)); |
| format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} |
| ins_encode %{ |
| // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ |
| predicate(n->as_Vector()->length() == 8 && UseAVX > 2); |
| match(Set dst (ReplicateD zero)); |
| format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} |
| ins_encode %{ |
| // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); |
| %} |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // ====================REDUCTION ARITHMETIC======================================= |
| |
| instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ |
| predicate(UseSSE > 2 && UseAVX == 0); |
| match(Set dst (AddReductionVI src1 src2)); |
| effect(TEMP tmp2, TEMP tmp); |
| format %{ "movdqu $tmp2,$src2\n\t" |
| "phaddd $tmp2,$tmp2\n\t" |
| "movd $tmp,$src1\n\t" |
| "paddd $tmp,$tmp2\n\t" |
| "movd $dst,$tmp\t! add reduction2I" %} |
| ins_encode %{ |
| __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); |
| __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); |
| __ movdl($tmp$$XMMRegister, $src1$$Register); |
| __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ movdl($dst$$Register, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0 && UseAVX < 3); |
| match(Set dst (AddReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vphaddd $tmp,$src2,$src2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpaddd $tmp2,$tmp2,$tmp\n\t" |
| "movd $dst,$tmp2\t! add reduction2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (AddReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "pshufd $tmp2,$src2,0x1\n\t" |
| "vpaddd $tmp,$src2,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpaddd $tmp2,$tmp,$tmp2\n\t" |
| "movd $dst,$tmp2\t! add reduction2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); |
| __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseSSE > 2 && UseAVX == 0); |
| match(Set dst (AddReductionVI src1 src2)); |
| effect(TEMP tmp2, TEMP tmp); |
| format %{ "movdqu $tmp2,$src2\n\t" |
| "phaddd $tmp2,$tmp2\n\t" |
| "phaddd $tmp2,$tmp2\n\t" |
| "movd $tmp,$src1\n\t" |
| "paddd $tmp,$tmp2\n\t" |
| "movd $dst,$tmp\t! add reduction4I" %} |
| ins_encode %{ |
| __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); |
| __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); |
| __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); |
| __ movdl($tmp$$XMMRegister, $src1$$Register); |
| __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ movdl($dst$$Register, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0 && UseAVX < 3); |
| match(Set dst (AddReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vphaddd $tmp,$src2,$src2\n\t" |
| "vphaddd $tmp,$tmp,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpaddd $tmp2,$tmp2,$tmp\n\t" |
| "movd $dst,$tmp2\t! add reduction4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); |
| __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (AddReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "pshufd $tmp2,$src2,0xE\n\t" |
| "vpaddd $tmp,$src2,$tmp2\n\t" |
| "pshufd $tmp2,$tmp,0x1\n\t" |
| "vpaddd $tmp,$tmp,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpaddd $tmp2,$tmp,$tmp2\n\t" |
| "movd $dst,$tmp2\t! add reduction4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); |
| __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0 && UseAVX < 3); |
| match(Set dst (AddReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vphaddd $tmp,$src2,$src2\n\t" |
| "vphaddd $tmp,$tmp,$tmp2\n\t" |
| "vextracti128 $tmp2,$tmp\n\t" |
| "vpaddd $tmp,$tmp,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpaddd $tmp2,$tmp2,$tmp\n\t" |
| "movd $dst,$tmp2\t! add reduction8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); |
| __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (AddReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vextracti128 $tmp,$src2\n\t" |
| "vpaddd $tmp,$tmp,$src2\n\t" |
| "pshufd $tmp2,$tmp,0xE\n\t" |
| "vpaddd $tmp,$tmp,$tmp2\n\t" |
| "pshufd $tmp2,$tmp,0x1\n\t" |
| "vpaddd $tmp,$tmp,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpaddd $tmp2,$tmp,$tmp2\n\t" |
| "movd $dst,$tmp2\t! add reduction8I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); |
| __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); |
| __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); |
| __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (AddReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vextracti64x4 $tmp3,$src2\n\t" |
| "vpaddd $tmp3,$tmp3,$src2\n\t" |
| "vextracti128 $tmp,$tmp3\n\t" |
| "vpaddd $tmp,$tmp,$tmp3\n\t" |
| "pshufd $tmp2,$tmp,0xE\n\t" |
| "vpaddd $tmp,$tmp,$tmp2\n\t" |
| "pshufd $tmp2,$tmp,0x1\n\t" |
| "vpaddd $tmp,$tmp,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpaddd $tmp2,$tmp,$tmp2\n\t" |
| "movd $dst,$tmp2\t! mul reduction16I" %} |
| ins_encode %{ |
| __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); |
| __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); |
| __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); |
| __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); |
| __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); |
| __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| #ifdef _LP64 |
| instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (AddReductionVL src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "pshufd $tmp2,$src2,0xE\n\t" |
| "vpaddq $tmp,$src2,$tmp2\n\t" |
| "movdq $tmp2,$src1\n\t" |
| "vpaddq $tmp2,$tmp,$tmp2\n\t" |
| "movdq $dst,$tmp2\t! add reduction2L" %} |
| ins_encode %{ |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ movdq($tmp2$$XMMRegister, $src1$$Register); |
| __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ movdq($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (AddReductionVL src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" |
| "vpaddq $tmp2,$tmp,$src2\n\t" |
| "pshufd $tmp,$tmp2,0xE\n\t" |
| "vpaddq $tmp2,$tmp2,$tmp\n\t" |
| "movdq $tmp,$src1\n\t" |
| "vpaddq $tmp2,$tmp2,$tmp\n\t" |
| "movdq $dst,$tmp2\t! add reduction4L" %} |
| ins_encode %{ |
| __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); |
| __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); |
| __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
| __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ movdq($tmp$$XMMRegister, $src1$$Register); |
| __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ movdq($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (AddReductionVL src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vextracti64x4 $tmp2,$src2\n\t" |
| "vpaddq $tmp2,$tmp2,$src2\n\t" |
| "vextracti128 $tmp,$tmp2\n\t" |
| "vpaddq $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp2,0xE\n\t" |
| "vpaddq $tmp2,$tmp2,$tmp\n\t" |
| "movdq $tmp,$src1\n\t" |
| "vpaddq $tmp2,$tmp2,$tmp\n\t" |
| "movdq $dst,$tmp2\t! add reduction8L" %} |
| ins_encode %{ |
| __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); |
| __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); |
| __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
| __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ movdq($tmp$$XMMRegister, $src1$$Register); |
| __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ movdq($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif |
| |
| instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ |
| predicate(UseSSE >= 1 && UseAVX == 0); |
| match(Set dst (AddReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "movdqu $tmp,$src1\n\t" |
| "addss $tmp,$src2\n\t" |
| "pshufd $tmp2,$src2,0x01\n\t" |
| "addss $tmp,$tmp2\n\t" |
| "movdqu $dst,$tmp\t! add reduction2F" %} |
| ins_encode %{ |
| __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
| __ addss($tmp$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddReductionVF src1 src2)); |
| effect(TEMP tmp2, TEMP tmp); |
| format %{ "vaddss $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0x01\n\t" |
| "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} |
| ins_encode %{ |
| __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseSSE >= 1 && UseAVX == 0); |
| match(Set dst (AddReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "movdqu $tmp,$src1\n\t" |
| "addss $tmp,$src2\n\t" |
| "pshufd $tmp2,$src2,0x01\n\t" |
| "addss $tmp,$tmp2\n\t" |
| "pshufd $tmp2,$src2,0x02\n\t" |
| "addss $tmp,$tmp2\n\t" |
| "pshufd $tmp2,$src2,0x03\n\t" |
| "addss $tmp,$tmp2\n\t" |
| "movdqu $dst,$tmp\t! add reduction4F" %} |
| ins_encode %{ |
| __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
| __ addss($tmp$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); |
| __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); |
| __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vaddss $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0x01\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x02\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x03\n\t" |
| "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} |
| ins_encode %{ |
| __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
| __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vaddss $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0x01\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x02\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x03\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "vextractf128 $tmp3,$src2\n\t" |
| "vaddss $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0x01\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x02\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x03\n\t" |
| "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} |
| ins_encode %{ |
| __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
| __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (AddReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vaddss $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0x01\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x02\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x03\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "vextractf64x2 $tmp3,$src2, 0x1\n\t" |
| "vaddss $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0x01\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x02\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x03\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "vextractf64x2 $tmp3,$src2, 0x2\n\t" |
| "vaddss $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0x01\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x02\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x03\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "vextractf64x2 $tmp3,$src2, 0x3\n\t" |
| "vaddss $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0x01\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x02\n\t" |
| "vaddss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x03\n\t" |
| "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} |
| ins_encode %{ |
| __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
| __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
| __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ |
| predicate(UseSSE >= 1 && UseAVX == 0); |
| match(Set dst (AddReductionVD src1 src2)); |
| effect(TEMP tmp, TEMP dst); |
| format %{ "movdqu $tmp,$src1\n\t" |
| "addsd $tmp,$src2\n\t" |
| "pshufd $dst,$src2,0xE\n\t" |
| "addsd $dst,$tmp\t! add reduction2D" %} |
| ins_encode %{ |
| __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
| __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddReductionVD src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vaddsd $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0xE\n\t" |
| "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} |
| ins_encode %{ |
| __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (AddReductionVD src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vaddsd $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0xE\n\t" |
| "vaddsd $tmp2,$tmp2,$tmp\n\t" |
| "vextractf128 $tmp3,$src2\n\t" |
| "vaddsd $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0xE\n\t" |
| "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} |
| ins_encode %{ |
| __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); |
| __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
| __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (AddReductionVD src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vaddsd $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0xE\n\t" |
| "vaddsd $tmp2,$tmp2,$tmp\n\t" |
| "vextractf64x2 $tmp3,$src2, 0x1\n\t" |
| "vaddsd $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0xE\n\t" |
| "vaddsd $tmp2,$tmp2,$tmp\n\t" |
| "vextractf64x2 $tmp3,$src2, 0x2\n\t" |
| "vaddsd $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0xE\n\t" |
| "vaddsd $tmp2,$tmp2,$tmp\n\t" |
| "vextractf64x2 $tmp3,$src2, 0x3\n\t" |
| "vaddsd $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0xE\n\t" |
| "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} |
| ins_encode %{ |
| __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); |
| __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
| __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); |
| __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
| __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); |
| __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
| __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ |
| predicate(UseSSE > 3 && UseAVX == 0); |
| match(Set dst (MulReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "pshufd $tmp2,$src2,0x1\n\t" |
| "pmulld $tmp2,$src2\n\t" |
| "movd $tmp,$src1\n\t" |
| "pmulld $tmp2,$tmp\n\t" |
| "movd $dst,$tmp2\t! mul reduction2I" %} |
| ins_encode %{ |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); |
| __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); |
| __ movdl($tmp$$XMMRegister, $src1$$Register); |
| __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "pshufd $tmp2,$src2,0x1\n\t" |
| "vpmulld $tmp,$src2,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpmulld $tmp2,$tmp,$tmp2\n\t" |
| "movd $dst,$tmp2\t! mul reduction2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); |
| __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseSSE > 3 && UseAVX == 0); |
| match(Set dst (MulReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "pshufd $tmp2,$src2,0xE\n\t" |
| "pmulld $tmp2,$src2\n\t" |
| "pshufd $tmp,$tmp2,0x1\n\t" |
| "pmulld $tmp2,$tmp\n\t" |
| "movd $tmp,$src1\n\t" |
| "pmulld $tmp2,$tmp\n\t" |
| "movd $dst,$tmp2\t! mul reduction4I" %} |
| ins_encode %{ |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); |
| __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ movdl($tmp$$XMMRegister, $src1$$Register); |
| __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "pshufd $tmp2,$src2,0xE\n\t" |
| "vpmulld $tmp,$src2,$tmp2\n\t" |
| "pshufd $tmp2,$tmp,0x1\n\t" |
| "vpmulld $tmp,$tmp,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpmulld $tmp2,$tmp,$tmp2\n\t" |
| "movd $dst,$tmp2\t! mul reduction4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); |
| __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vextracti128 $tmp,$src2\n\t" |
| "vpmulld $tmp,$tmp,$src2\n\t" |
| "pshufd $tmp2,$tmp,0xE\n\t" |
| "vpmulld $tmp,$tmp,$tmp2\n\t" |
| "pshufd $tmp2,$tmp,0x1\n\t" |
| "vpmulld $tmp,$tmp,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpmulld $tmp2,$tmp,$tmp2\n\t" |
| "movd $dst,$tmp2\t! mul reduction8I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); |
| __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); |
| __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); |
| __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (MulReductionVI src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vextracti64x4 $tmp3,$src2\n\t" |
| "vpmulld $tmp3,$tmp3,$src2\n\t" |
| "vextracti128 $tmp,$tmp3\n\t" |
| "vpmulld $tmp,$tmp,$src2\n\t" |
| "pshufd $tmp2,$tmp,0xE\n\t" |
| "vpmulld $tmp,$tmp,$tmp2\n\t" |
| "pshufd $tmp2,$tmp,0x1\n\t" |
| "vpmulld $tmp,$tmp,$tmp2\n\t" |
| "movd $tmp2,$src1\n\t" |
| "vpmulld $tmp2,$tmp,$tmp2\n\t" |
| "movd $dst,$tmp2\t! mul reduction16I" %} |
| ins_encode %{ |
| __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); |
| __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); |
| __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); |
| __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); |
| __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); |
| __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ movdl($tmp2$$XMMRegister, $src1$$Register); |
| __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ movdl($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| #ifdef _LP64 |
| instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); |
| match(Set dst (MulReductionVL src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "pshufd $tmp2,$src2,0xE\n\t" |
| "vpmullq $tmp,$src2,$tmp2\n\t" |
| "movdq $tmp2,$src1\n\t" |
| "vpmullq $tmp2,$tmp,$tmp2\n\t" |
| "movdq $dst,$tmp2\t! mul reduction2L" %} |
| ins_encode %{ |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ movdq($tmp2$$XMMRegister, $src1$$Register); |
| __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); |
| __ movdq($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); |
| match(Set dst (MulReductionVL src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" |
| "vpmullq $tmp2,$tmp,$src2\n\t" |
| "pshufd $tmp,$tmp2,0xE\n\t" |
| "vpmullq $tmp2,$tmp2,$tmp\n\t" |
| "movdq $tmp,$src1\n\t" |
| "vpmullq $tmp2,$tmp2,$tmp\n\t" |
| "movdq $dst,$tmp2\t! mul reduction4L" %} |
| ins_encode %{ |
| __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); |
| __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); |
| __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
| __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ movdq($tmp$$XMMRegister, $src1$$Register); |
| __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ movdq($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); |
| match(Set dst (MulReductionVL src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vextracti64x4 $tmp2,$src2\n\t" |
| "vpmullq $tmp2,$tmp2,$src2\n\t" |
| "vextracti128 $tmp,$tmp2\n\t" |
| "vpmullq $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp2,0xE\n\t" |
| "vpmullq $tmp2,$tmp2,$tmp\n\t" |
| "movdq $tmp,$src1\n\t" |
| "vpmullq $tmp2,$tmp2,$tmp\n\t" |
| "movdq $dst,$tmp2\t! mul reduction8L" %} |
| ins_encode %{ |
| __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); |
| __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); |
| __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
| __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ movdq($tmp$$XMMRegister, $src1$$Register); |
| __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); |
| __ movdq($dst$$Register, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| #endif |
| |
| instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ |
| predicate(UseSSE >= 1 && UseAVX == 0); |
| match(Set dst (MulReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "movdqu $tmp,$src1\n\t" |
| "mulss $tmp,$src2\n\t" |
| "pshufd $tmp2,$src2,0x01\n\t" |
| "mulss $tmp,$tmp2\n\t" |
| "movdqu $dst,$tmp\t! mul reduction2F" %} |
| ins_encode %{ |
| __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
| __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vmulss $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0x01\n\t" |
| "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} |
| ins_encode %{ |
| __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseSSE >= 1 && UseAVX == 0); |
| match(Set dst (MulReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "movdqu $tmp,$src1\n\t" |
| "mulss $tmp,$src2\n\t" |
| "pshufd $tmp2,$src2,0x01\n\t" |
| "mulss $tmp,$tmp2\n\t" |
| "pshufd $tmp2,$src2,0x02\n\t" |
| "mulss $tmp,$tmp2\n\t" |
| "pshufd $tmp2,$src2,0x03\n\t" |
| "mulss $tmp,$tmp2\n\t" |
| "movdqu $dst,$tmp\t! mul reduction4F" %} |
| ins_encode %{ |
| __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
| __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); |
| __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); |
| __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vmulss $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0x01\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x02\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x03\n\t" |
| "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} |
| ins_encode %{ |
| __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
| __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vmulss $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0x01\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x02\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x03\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "vextractf128 $tmp3,$src2\n\t" |
| "vmulss $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0x01\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x02\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x03\n\t" |
| "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} |
| ins_encode %{ |
| __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
| __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (MulReductionVF src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vmulss $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0x01\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x02\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$src2,0x03\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "vextractf32x4 $tmp3,$src2, 0x1\n\t" |
| "vmulss $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0x01\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x02\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x03\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "vextractf32x4 $tmp3,$src2, 0x2\n\t" |
| "vmulss $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0x01\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x02\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x03\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "vextractf32x4 $tmp3,$src2, 0x3\n\t" |
| "vmulss $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0x01\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x02\n\t" |
| "vmulss $tmp2,$tmp2,$tmp\n\t" |
| "pshufd $tmp,$tmp3,0x03\n\t" |
| "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} |
| ins_encode %{ |
| __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
| __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
| __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ |
| predicate(UseSSE >= 1 && UseAVX == 0); |
| match(Set dst (MulReductionVD src1 src2)); |
| effect(TEMP tmp, TEMP dst); |
| format %{ "movdqu $tmp,$src1\n\t" |
| "mulsd $tmp,$src2\n\t" |
| "pshufd $dst,$src2,0xE\n\t" |
| "mulsd $dst,$tmp\t! mul reduction2D" %} |
| ins_encode %{ |
| __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
| __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulReductionVD src1 src2)); |
| effect(TEMP tmp, TEMP tmp2); |
| format %{ "vmulsd $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0xE\n\t" |
| "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} |
| ins_encode %{ |
| __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ |
| predicate(UseAVX > 0); |
| match(Set dst (MulReductionVD src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vmulsd $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0xE\n\t" |
| "vmulsd $tmp2,$tmp2,$tmp\n\t" |
| "vextractf128 $tmp3,$src2\n\t" |
| "vmulsd $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0xE\n\t" |
| "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} |
| ins_encode %{ |
| __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); |
| __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
| __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ |
| predicate(UseAVX > 2); |
| match(Set dst (MulReductionVD src1 src2)); |
| effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
| format %{ "vmulsd $tmp2,$src1,$src2\n\t" |
| "pshufd $tmp,$src2,0xE\n\t" |
| "vmulsd $tmp2,$tmp2,$tmp\n\t" |
| "vextractf64x2 $tmp3,$src2, 0x1\n\t" |
| "vmulsd $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$src2,0xE\n\t" |
| "vmulsd $tmp2,$tmp2,$tmp\n\t" |
| "vextractf64x2 $tmp3,$src2, 0x2\n\t" |
| "vmulsd $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0xE\n\t" |
| "vmulsd $tmp2,$tmp2,$tmp\n\t" |
| "vextractf64x2 $tmp3,$src2, 0x3\n\t" |
| "vmulsd $tmp2,$tmp2,$tmp3\n\t" |
| "pshufd $tmp,$tmp3,0xE\n\t" |
| "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} |
| ins_encode %{ |
| __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
| __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); |
| __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
| __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); |
| __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
| __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); |
| __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
| __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
| __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ====================VECTOR ARITHMETIC======================================= |
| |
| // --------------------------------- ADD -------------------------------------- |
| |
| // Bytes vector add |
| instruct vadd4B(vecS dst, vecS src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (AddVB dst src)); |
| format %{ "paddb $dst,$src\t! add packed4B" %} |
| ins_encode %{ |
| __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVB src1 src2)); |
| format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVB src (LoadVector mem))); |
| format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8B(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (AddVB dst src)); |
| format %{ "paddb $dst,$src\t! add packed8B" %} |
| ins_encode %{ |
| __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVB src1 src2)); |
| format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVB src (LoadVector mem))); |
| format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd16B(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 16); |
| match(Set dst (AddVB dst src)); |
| format %{ "paddb $dst,$src\t! add packed16B" %} |
| ins_encode %{ |
| __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
| match(Set dst (AddVB src1 src2)); |
| format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
| match(Set dst (AddVB src (LoadVector mem))); |
| format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 32); |
| match(Set dst (AddVB src1 src2)); |
| format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 32); |
| match(Set dst (AddVB src (LoadVector mem))); |
| format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 64); |
| match(Set dst (AddVB src1 src2)); |
| format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 64); |
| match(Set dst (AddVB src (LoadVector mem))); |
| format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Shorts/Chars vector add |
| instruct vadd2S(vecS dst, vecS src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (AddVS dst src)); |
| format %{ "paddw $dst,$src\t! add packed2S" %} |
| ins_encode %{ |
| __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVS src1 src2)); |
| format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVS src (LoadVector mem))); |
| format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4S(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (AddVS dst src)); |
| format %{ "paddw $dst,$src\t! add packed4S" %} |
| ins_encode %{ |
| __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVS src1 src2)); |
| format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVS src (LoadVector mem))); |
| format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8S(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (AddVS dst src)); |
| format %{ "paddw $dst,$src\t! add packed8S" %} |
| ins_encode %{ |
| __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVS src1 src2)); |
| format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVS src (LoadVector mem))); |
| format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (AddVS src1 src2)); |
| format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (AddVS src (LoadVector mem))); |
| format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (AddVS src1 src2)); |
| format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (AddVS src (LoadVector mem))); |
| format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector add |
| instruct vadd2I(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (AddVI dst src)); |
| format %{ "paddd $dst,$src\t! add packed2I" %} |
| ins_encode %{ |
| __ paddd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVI src1 src2)); |
| format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVI src (LoadVector mem))); |
| format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4I(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (AddVI dst src)); |
| format %{ "paddd $dst,$src\t! add packed4I" %} |
| ins_encode %{ |
| __ paddd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVI src1 src2)); |
| format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVI src (LoadVector mem))); |
| format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVI src1 src2)); |
| format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVI src (LoadVector mem))); |
| format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (AddVI src1 src2)); |
| format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (AddVI src (LoadVector mem))); |
| format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Longs vector add |
| instruct vadd2L(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (AddVL dst src)); |
| format %{ "paddq $dst,$src\t! add packed2L" %} |
| ins_encode %{ |
| __ paddq($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVL src1 src2)); |
| format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVL src (LoadVector mem))); |
| format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVL src1 src2)); |
| format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVL src (LoadVector mem))); |
| format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVL src1 src2)); |
| format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVL src (LoadVector mem))); |
| format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Floats vector add |
| instruct vadd2F(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (AddVF dst src)); |
| format %{ "addps $dst,$src\t! add packed2F" %} |
| ins_encode %{ |
| __ addps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVF src1 src2)); |
| format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVF src (LoadVector mem))); |
| format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4F(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (AddVF dst src)); |
| format %{ "addps $dst,$src\t! add packed4F" %} |
| ins_encode %{ |
| __ addps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVF src1 src2)); |
| format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVF src (LoadVector mem))); |
| format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVF src1 src2)); |
| format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVF src (LoadVector mem))); |
| format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (AddVF src1 src2)); |
| format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (AddVF src (LoadVector mem))); |
| format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Doubles vector add |
| instruct vadd2D(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (AddVD dst src)); |
| format %{ "addpd $dst,$src\t! add packed2D" %} |
| ins_encode %{ |
| __ addpd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVD src1 src2)); |
| format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (AddVD src (LoadVector mem))); |
| format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVD src1 src2)); |
| format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (AddVD src (LoadVector mem))); |
| format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVD src1 src2)); |
| format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (AddVD src (LoadVector mem))); |
| format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- SUB -------------------------------------- |
| |
| // Bytes vector sub |
| instruct vsub4B(vecS dst, vecS src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (SubVB dst src)); |
| format %{ "psubb $dst,$src\t! sub packed4B" %} |
| ins_encode %{ |
| __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVB src1 src2)); |
| format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVB src (LoadVector mem))); |
| format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8B(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (SubVB dst src)); |
| format %{ "psubb $dst,$src\t! sub packed8B" %} |
| ins_encode %{ |
| __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVB src1 src2)); |
| format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVB src (LoadVector mem))); |
| format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub16B(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 16); |
| match(Set dst (SubVB dst src)); |
| format %{ "psubb $dst,$src\t! sub packed16B" %} |
| ins_encode %{ |
| __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
| match(Set dst (SubVB src1 src2)); |
| format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
| match(Set dst (SubVB src (LoadVector mem))); |
| format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 32); |
| match(Set dst (SubVB src1 src2)); |
| format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 32); |
| match(Set dst (SubVB src (LoadVector mem))); |
| format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 64); |
| match(Set dst (SubVB src1 src2)); |
| format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 64); |
| match(Set dst (SubVB src (LoadVector mem))); |
| format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Shorts/Chars vector sub |
| instruct vsub2S(vecS dst, vecS src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (SubVS dst src)); |
| format %{ "psubw $dst,$src\t! sub packed2S" %} |
| ins_encode %{ |
| __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVS src1 src2)); |
| format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVS src (LoadVector mem))); |
| format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4S(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (SubVS dst src)); |
| format %{ "psubw $dst,$src\t! sub packed4S" %} |
| ins_encode %{ |
| __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVS src1 src2)); |
| format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVS src (LoadVector mem))); |
| format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8S(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (SubVS dst src)); |
| format %{ "psubw $dst,$src\t! sub packed8S" %} |
| ins_encode %{ |
| __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVS src1 src2)); |
| format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVS src (LoadVector mem))); |
| format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (SubVS src1 src2)); |
| format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (SubVS src (LoadVector mem))); |
| format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (SubVS src1 src2)); |
| format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (SubVS src (LoadVector mem))); |
| format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector sub |
| instruct vsub2I(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (SubVI dst src)); |
| format %{ "psubd $dst,$src\t! sub packed2I" %} |
| ins_encode %{ |
| __ psubd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVI src1 src2)); |
| format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVI src (LoadVector mem))); |
| format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4I(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (SubVI dst src)); |
| format %{ "psubd $dst,$src\t! sub packed4I" %} |
| ins_encode %{ |
| __ psubd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVI src1 src2)); |
| format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVI src (LoadVector mem))); |
| format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVI src1 src2)); |
| format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVI src (LoadVector mem))); |
| format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (SubVI src1 src2)); |
| format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (SubVI src (LoadVector mem))); |
| format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Longs vector sub |
| instruct vsub2L(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (SubVL dst src)); |
| format %{ "psubq $dst,$src\t! sub packed2L" %} |
| ins_encode %{ |
| __ psubq($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVL src1 src2)); |
| format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVL src (LoadVector mem))); |
| format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVL src1 src2)); |
| format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVL src (LoadVector mem))); |
| format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVL src1 src2)); |
| format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVL src (LoadVector mem))); |
| format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Floats vector sub |
| instruct vsub2F(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (SubVF dst src)); |
| format %{ "subps $dst,$src\t! sub packed2F" %} |
| ins_encode %{ |
| __ subps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVF src1 src2)); |
| format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVF src (LoadVector mem))); |
| format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4F(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (SubVF dst src)); |
| format %{ "subps $dst,$src\t! sub packed4F" %} |
| ins_encode %{ |
| __ subps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVF src1 src2)); |
| format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVF src (LoadVector mem))); |
| format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVF src1 src2)); |
| format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVF src (LoadVector mem))); |
| format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (SubVF src1 src2)); |
| format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (SubVF src (LoadVector mem))); |
| format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Doubles vector sub |
| instruct vsub2D(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (SubVD dst src)); |
| format %{ "subpd $dst,$src\t! sub packed2D" %} |
| ins_encode %{ |
| __ subpd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVD src1 src2)); |
| format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SubVD src (LoadVector mem))); |
| format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVD src1 src2)); |
| format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SubVD src (LoadVector mem))); |
| format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVD src1 src2)); |
| format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (SubVD src (LoadVector mem))); |
| format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- MUL -------------------------------------- |
| |
| // Shorts/Chars vector mul |
| instruct vmul2S(vecS dst, vecS src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (MulVS dst src)); |
| format %{ "pmullw $dst,$src\t! mul packed2S" %} |
| ins_encode %{ |
| __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (MulVS src1 src2)); |
| format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (MulVS src (LoadVector mem))); |
| format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4S(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (MulVS dst src)); |
| format %{ "pmullw $dst,$src\t! mul packed4S" %} |
| ins_encode %{ |
| __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (MulVS src1 src2)); |
| format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (MulVS src (LoadVector mem))); |
| format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8S(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (MulVS dst src)); |
| format %{ "pmullw $dst,$src\t! mul packed8S" %} |
| ins_encode %{ |
| __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (MulVS src1 src2)); |
| format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (MulVS src (LoadVector mem))); |
| format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (MulVS src1 src2)); |
| format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (MulVS src (LoadVector mem))); |
| format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (MulVS src1 src2)); |
| format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (MulVS src (LoadVector mem))); |
| format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector mul (sse4_1) |
| instruct vmul2I(vecD dst, vecD src) %{ |
| predicate(UseSSE > 3 && n->as_Vector()->length() == 2); |
| match(Set dst (MulVI dst src)); |
| format %{ "pmulld $dst,$src\t! mul packed2I" %} |
| ins_encode %{ |
| __ pmulld($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (MulVI src1 src2)); |
| format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (MulVI src (LoadVector mem))); |
| format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4I(vecX dst, vecX src) %{ |
| predicate(UseSSE > 3 && n->as_Vector()->length() == 4); |
| match(Set dst (MulVI dst src)); |
| format %{ "pmulld $dst,$src\t! mul packed4I" %} |
| ins_encode %{ |
| __ pmulld($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (MulVI src1 src2)); |
| format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (MulVI src (LoadVector mem))); |
| format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL src1 src2)); |
| format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL src (LoadVector mem))); |
| format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL src1 src2)); |
| format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL src (LoadVector mem))); |
| format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL src1 src2)); |
| format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); |
| match(Set dst (MulVL src (LoadVector mem))); |
| format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (MulVI src1 src2)); |
| format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (MulVI src (LoadVector mem))); |
| format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (MulVI src1 src2)); |
| format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (MulVI src (LoadVector mem))); |
| format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Floats vector mul |
| instruct vmul2F(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (MulVF dst src)); |
| format %{ "mulps $dst,$src\t! mul packed2F" %} |
| ins_encode %{ |
| __ mulps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (MulVF src1 src2)); |
| format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (MulVF src (LoadVector mem))); |
| format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4F(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (MulVF dst src)); |
| format %{ "mulps $dst,$src\t! mul packed4F" %} |
| ins_encode %{ |
| __ mulps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (MulVF src1 src2)); |
| format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (MulVF src (LoadVector mem))); |
| format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (MulVF src1 src2)); |
| format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (MulVF src (LoadVector mem))); |
| format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (MulVF src1 src2)); |
| format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (MulVF src (LoadVector mem))); |
| format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Doubles vector mul |
| instruct vmul2D(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (MulVD dst src)); |
| format %{ "mulpd $dst,$src\t! mul packed2D" %} |
| ins_encode %{ |
| __ mulpd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (MulVD src1 src2)); |
| format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (MulVD src (LoadVector mem))); |
| format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (MulVD src1 src2)); |
| format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (MulVD src (LoadVector mem))); |
| format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (MulVD src1 src2)); |
| format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (MulVD src (LoadVector mem))); |
| format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- DIV -------------------------------------- |
| |
| // Floats vector div |
| instruct vdiv2F(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (DivVF dst src)); |
| format %{ "divps $dst,$src\t! div packed2F" %} |
| ins_encode %{ |
| __ divps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (DivVF src1 src2)); |
| format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (DivVF src (LoadVector mem))); |
| format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv4F(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (DivVF dst src)); |
| format %{ "divps $dst,$src\t! div packed4F" %} |
| ins_encode %{ |
| __ divps($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (DivVF src1 src2)); |
| format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (DivVF src (LoadVector mem))); |
| format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (DivVF src1 src2)); |
| format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (DivVF src (LoadVector mem))); |
| format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
| match(Set dst (DivVF src1 src2)); |
| format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
| match(Set dst (DivVF src (LoadVector mem))); |
| format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Doubles vector div |
| instruct vdiv2D(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (DivVD dst src)); |
| format %{ "divpd $dst,$src\t! div packed2D" %} |
| ins_encode %{ |
| __ divpd($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (DivVD src1 src2)); |
| format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (DivVD src (LoadVector mem))); |
| format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (DivVD src1 src2)); |
| format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (DivVD src (LoadVector mem))); |
| format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (DivVD src1 src2)); |
| format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (DivVD src (LoadVector mem))); |
| format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ------------------------------ Shift --------------------------------------- |
| |
| // Left and right shift count vectors are the same on x86 |
| // (only lowest bits of xmm reg are used for count). |
| instruct vshiftcnt(vecS dst, rRegI cnt) %{ |
| match(Set dst (LShiftCntV cnt)); |
| match(Set dst (RShiftCntV cnt)); |
| format %{ "movd $dst,$cnt\t! load shift count" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $cnt$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- Sqrt -------------------------------------- |
| |
| // Floating point vector sqrt - double precision only |
| instruct vsqrt2D_reg(vecX dst, vecX src) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SqrtVD src)); |
| format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsqrt2D_mem(vecX dst, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (SqrtVD (LoadVector mem))); |
| format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsqrt4D_reg(vecY dst, vecY src) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SqrtVD src)); |
| format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsqrt4D_mem(vecY dst, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (SqrtVD (LoadVector mem))); |
| format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (SqrtVD src)); |
| format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsqrt8D_mem(vecZ dst, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (SqrtVD (LoadVector mem))); |
| format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ------------------------------ LeftShift ----------------------------------- |
| |
| // Shorts/Chars vector left shift |
| instruct vsll2S(vecS dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVS dst shift)); |
| format %{ "psllw $dst,$shift\t! left shift packed2S" %} |
| ins_encode %{ |
| __ psllw($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll2S_imm(vecS dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVS dst shift)); |
| format %{ "psllw $dst,$shift\t! left shift packed2S" %} |
| ins_encode %{ |
| __ psllw($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4S(vecD dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVS dst shift)); |
| format %{ "psllw $dst,$shift\t! left shift packed4S" %} |
| ins_encode %{ |
| __ psllw($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4S_imm(vecD dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVS dst shift)); |
| format %{ "psllw $dst,$shift\t! left shift packed4S" %} |
| ins_encode %{ |
| __ psllw($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll8S(vecX dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (LShiftVS dst shift)); |
| format %{ "psllw $dst,$shift\t! left shift packed8S" %} |
| ins_encode %{ |
| __ psllw($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll8S_imm(vecX dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (LShiftVS dst shift)); |
| format %{ "psllw $dst,$shift\t! left shift packed8S" %} |
| ins_encode %{ |
| __ psllw($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (LShiftVS src shift)); |
| format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector left shift |
| instruct vsll2I(vecD dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVI dst shift)); |
| format %{ "pslld $dst,$shift\t! left shift packed2I" %} |
| ins_encode %{ |
| __ pslld($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll2I_imm(vecD dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVI dst shift)); |
| format %{ "pslld $dst,$shift\t! left shift packed2I" %} |
| ins_encode %{ |
| __ pslld($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVI src shift)); |
| format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVI src shift)); |
| format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4I(vecX dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVI dst shift)); |
| format %{ "pslld $dst,$shift\t! left shift packed4I" %} |
| ins_encode %{ |
| __ pslld($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4I_imm(vecX dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVI dst shift)); |
| format %{ "pslld $dst,$shift\t! left shift packed4I" %} |
| ins_encode %{ |
| __ pslld($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVI src shift)); |
| format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVI src shift)); |
| format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (LShiftVI src shift)); |
| format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (LShiftVI src shift)); |
| format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (LShiftVI src shift)); |
| format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (LShiftVI src shift)); |
| format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Longs vector left shift |
| instruct vsll2L(vecX dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVL dst shift)); |
| format %{ "psllq $dst,$shift\t! left shift packed2L" %} |
| ins_encode %{ |
| __ psllq($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll2L_imm(vecX dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVL dst shift)); |
| format %{ "psllq $dst,$shift\t! left shift packed2L" %} |
| ins_encode %{ |
| __ psllq($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVL src shift)); |
| format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (LShiftVL src shift)); |
| format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVL src shift)); |
| format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 4); |
| match(Set dst (LShiftVL src shift)); |
| format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (LShiftVL src shift)); |
| format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (LShiftVL src shift)); |
| format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ----------------------- LogicalRightShift ----------------------------------- |
| |
| // Shorts vector logical right shift produces incorrect Java result |
| // for negative data because java code convert short value into int with |
| // sign extension before a shift. But char vectors are fine since chars are |
| // unsigned values. |
| |
| instruct vsrl2S(vecS dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVS dst shift)); |
| format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} |
| ins_encode %{ |
| __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl2S_imm(vecS dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVS dst shift)); |
| format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} |
| ins_encode %{ |
| __ psrlw($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4S(vecD dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVS dst shift)); |
| format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} |
| ins_encode %{ |
| __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4S_imm(vecD dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVS dst shift)); |
| format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} |
| ins_encode %{ |
| __ psrlw($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl8S(vecX dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (URShiftVS dst shift)); |
| format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} |
| ins_encode %{ |
| __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl8S_imm(vecX dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (URShiftVS dst shift)); |
| format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} |
| ins_encode %{ |
| __ psrlw($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (URShiftVS src shift)); |
| format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector logical right shift |
| instruct vsrl2I(vecD dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVI dst shift)); |
| format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} |
| ins_encode %{ |
| __ psrld($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl2I_imm(vecD dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVI dst shift)); |
| format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} |
| ins_encode %{ |
| __ psrld($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVI src shift)); |
| format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVI src shift)); |
| format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4I(vecX dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVI dst shift)); |
| format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} |
| ins_encode %{ |
| __ psrld($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4I_imm(vecX dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVI dst shift)); |
| format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} |
| ins_encode %{ |
| __ psrld($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVI src shift)); |
| format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVI src shift)); |
| format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (URShiftVI src shift)); |
| format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (URShiftVI src shift)); |
| format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (URShiftVI src shift)); |
| format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (URShiftVI src shift)); |
| format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Longs vector logical right shift |
| instruct vsrl2L(vecX dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVL dst shift)); |
| format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} |
| ins_encode %{ |
| __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl2L_imm(vecX dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVL dst shift)); |
| format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} |
| ins_encode %{ |
| __ psrlq($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVL src shift)); |
| format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (URShiftVL src shift)); |
| format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVL src shift)); |
| format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 4); |
| match(Set dst (URShiftVL src shift)); |
| format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (URShiftVL src shift)); |
| format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 8); |
| match(Set dst (URShiftVL src shift)); |
| format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ------------------- ArithmeticRightShift ----------------------------------- |
| |
| // Shorts/Chars vector arithmetic right shift |
| instruct vsra2S(vecS dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (RShiftVS dst shift)); |
| format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} |
| ins_encode %{ |
| __ psraw($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra2S_imm(vecS dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (RShiftVS dst shift)); |
| format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} |
| ins_encode %{ |
| __ psraw($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra4S(vecD dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (RShiftVS dst shift)); |
| format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} |
| ins_encode %{ |
| __ psraw($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra4S_imm(vecD dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (RShiftVS dst shift)); |
| format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} |
| ins_encode %{ |
| __ psraw($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra8S(vecX dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (RShiftVS dst shift)); |
| format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} |
| ins_encode %{ |
| __ psraw($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra8S_imm(vecX dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 8); |
| match(Set dst (RShiftVS dst shift)); |
| format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} |
| ins_encode %{ |
| __ psraw($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
| match(Set dst (RShiftVS src shift)); |
| format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integers vector arithmetic right shift |
| instruct vsra2I(vecD dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (RShiftVI dst shift)); |
| format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} |
| ins_encode %{ |
| __ psrad($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra2I_imm(vecD dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 2); |
| match(Set dst (RShiftVI dst shift)); |
| format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} |
| ins_encode %{ |
| __ psrad($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (RShiftVI src shift)); |
| format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
| match(Set dst (RShiftVI src shift)); |
| format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra4I(vecX dst, vecS shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (RShiftVI dst shift)); |
| format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} |
| ins_encode %{ |
| __ psrad($dst$$XMMRegister, $shift$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra4I_imm(vecX dst, immI8 shift) %{ |
| predicate(n->as_Vector()->length() == 4); |
| match(Set dst (RShiftVI dst shift)); |
| format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} |
| ins_encode %{ |
| __ psrad($dst$$XMMRegister, (int)$shift$$constant); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (RShiftVI src shift)); |
| format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
| match(Set dst (RShiftVI src shift)); |
| format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (RShiftVI src shift)); |
| format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length() == 8); |
| match(Set dst (RShiftVI src shift)); |
| format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (RShiftVI src shift)); |
| format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length() == 16); |
| match(Set dst (RShiftVI src shift)); |
| format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // There are no longs vector arithmetic right shift instructions. |
| |
| |
| // --------------------------------- AND -------------------------------------- |
| |
| instruct vand4B(vecS dst, vecS src) %{ |
| predicate(n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (AndV dst src)); |
| format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} |
| ins_encode %{ |
| __ pand($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (AndV src1 src2)); |
| format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (AndV src (LoadVector mem))); |
| format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand8B(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length_in_bytes() == 8); |
| match(Set dst (AndV dst src)); |
| format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} |
| ins_encode %{ |
| __ pand($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); |
| match(Set dst (AndV src1 src2)); |
| format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); |
| match(Set dst (AndV src (LoadVector mem))); |
| format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand16B(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length_in_bytes() == 16); |
| match(Set dst (AndV dst src)); |
| format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} |
| ins_encode %{ |
| __ pand($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); |
| match(Set dst (AndV src1 src2)); |
| format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); |
| match(Set dst (AndV src (LoadVector mem))); |
| format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); |
| match(Set dst (AndV src1 src2)); |
| format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); |
| match(Set dst (AndV src (LoadVector mem))); |
| format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); |
| match(Set dst (AndV src1 src2)); |
| format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); |
| match(Set dst (AndV src (LoadVector mem))); |
| format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- OR --------------------------------------- |
| |
| instruct vor4B(vecS dst, vecS src) %{ |
| predicate(n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (OrV dst src)); |
| format %{ "por $dst,$src\t! or vectors (4 bytes)" %} |
| ins_encode %{ |
| __ por($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (OrV src1 src2)); |
| format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (OrV src (LoadVector mem))); |
| format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor8B(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length_in_bytes() == 8); |
| match(Set dst (OrV dst src)); |
| format %{ "por $dst,$src\t! or vectors (8 bytes)" %} |
| ins_encode %{ |
| __ por($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); |
| match(Set dst (OrV src1 src2)); |
| format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (OrV src (LoadVector mem))); |
| format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor16B(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length_in_bytes() == 16); |
| match(Set dst (OrV dst src)); |
| format %{ "por $dst,$src\t! or vectors (16 bytes)" %} |
| ins_encode %{ |
| __ por($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); |
| match(Set dst (OrV src1 src2)); |
| format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); |
| match(Set dst (OrV src (LoadVector mem))); |
| format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); |
| match(Set dst (OrV src1 src2)); |
| format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); |
| match(Set dst (OrV src (LoadVector mem))); |
| format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); |
| match(Set dst (OrV src1 src2)); |
| format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); |
| match(Set dst (OrV src (LoadVector mem))); |
| format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // --------------------------------- XOR -------------------------------------- |
| |
| instruct vxor4B(vecS dst, vecS src) %{ |
| predicate(n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (XorV dst src)); |
| format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (XorV src1 src2)); |
| format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); |
| match(Set dst (XorV src (LoadVector mem))); |
| format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor8B(vecD dst, vecD src) %{ |
| predicate(n->as_Vector()->length_in_bytes() == 8); |
| match(Set dst (XorV dst src)); |
| format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); |
| match(Set dst (XorV src1 src2)); |
| format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); |
| match(Set dst (XorV src (LoadVector mem))); |
| format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor16B(vecX dst, vecX src) %{ |
| predicate(n->as_Vector()->length_in_bytes() == 16); |
| match(Set dst (XorV dst src)); |
| format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} |
| ins_encode %{ |
| __ pxor($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); |
| match(Set dst (XorV src1 src2)); |
| format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ |
| predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); |
| match(Set dst (XorV src (LoadVector mem))); |
| format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} |
| ins_encode %{ |
| int vector_len = 0; |
| __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); |
| match(Set dst (XorV src1 src2)); |
| format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ |
| predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); |
| match(Set dst (XorV src (LoadVector mem))); |
| format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} |
| ins_encode %{ |
| int vector_len = 1; |
| __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); |
| match(Set dst (XorV src1 src2)); |
| format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ |
| predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); |
| match(Set dst (XorV src (LoadVector mem))); |
| format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} |
| ins_encode %{ |
| int vector_len = 2; |
| __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |