blob: dafb170500c3aae9042853bd480c7aa15b9d4b01 [file] [log] [blame]
#include <stdio.h>
#include <assert.h>
#include <malloc.h> // memalign
#include <string.h> // memset
#include <math.h> // isnormal
typedef unsigned char UChar;
typedef unsigned short int UShort;
typedef unsigned int UInt;
typedef signed int Int;
typedef unsigned char UChar;
typedef unsigned long long int ULong;
typedef signed long long int Long;
typedef double Double;
typedef float Float;
typedef unsigned char Bool;
#define False ((Bool)0)
#define True ((Bool)1)
#define ITERS 1
typedef
enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
LaneTy;
union _V128 {
UChar u8[16];
UShort u16[8];
UInt u32[4];
ULong u64[2];
Float f32[4];
Double f64[2];
};
typedef union _V128 V128;
static inline UChar randUChar ( void )
{
static UInt seed = 80021;
seed = 1103515245 * seed + 12345;
return (seed >> 17) & 0xFF;
}
static ULong randULong ( LaneTy ty )
{
Int i;
ULong r = 0;
for (i = 0; i < 8; i++) {
r = (r << 8) | (ULong)(0xFF & randUChar());
}
return r;
}
/* Generates a random V128. Ensures that that it contains normalised
FP numbers when viewed as either F32x4 or F64x2, so that it is
reasonable to use in FP test cases. */
static void randV128 ( /*OUT*/V128* v, LaneTy ty )
{
static UInt nCalls = 0, nIters = 0;
Int i;
nCalls++;
while (1) {
nIters++;
for (i = 0; i < 16; i++) {
v->u8[i] = randUChar();
}
if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
&& isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
break;
}
if (0 == (nCalls & 0xFF))
printf("randV128: %u calls, %u iters\n", nCalls, nIters);
}
static void showV128 ( V128* v )
{
Int i;
for (i = 15; i >= 0; i--)
printf("%02x", (Int)v->u8[i]);
}
static void showBlock ( const char* msg, V128* block, Int nBlock )
{
Int i;
printf("%s\n", msg);
for (i = 0; i < nBlock; i++) {
printf(" ");
showV128(&block[i]);
printf("\n");
}
}
__attribute__((unused))
static void* memalign16(size_t szB)
{
void* x;
x = memalign(16, szB);
assert(x);
assert(0 == ((16-1) & (unsigned long)x));
return x;
}
static ULong dup4x16 ( UInt x )
{
ULong r = x & 0xF;
r |= (r << 4);
r |= (r << 8);
r |= (r << 16);
r |= (r << 32);
return r;
}
// Generate a random double-precision number. About 1 time in 2,
// instead return a special value (+/- Inf, +/-Nan, denorm).
// This ensures that many of the groups of 4 calls here will
// return a special value.
static Double special_values[10];
static Bool special_values_initted = False;
static __attribute__((noinline))
Double negate ( Double d ) { return -d; }
static __attribute__((noinline))
Double divf64 ( Double x, Double y ) { return x/y; }
static __attribute__((noinline))
Double plusZero ( void ) { return 0.0; }
static __attribute__((noinline))
Double minusZero ( void ) { return negate(plusZero()); }
static __attribute__((noinline))
Double plusOne ( void ) { return 1.0; }
static __attribute__((noinline))
Double minusOne ( void ) { return negate(plusOne()); }
static __attribute__((noinline))
Double plusInf ( void ) { return 1.0 / 0.0; }
static __attribute__((noinline))
Double minusInf ( void ) { return negate(plusInf()); }
static __attribute__((noinline))
Double plusNaN ( void ) { return divf64(plusInf(),plusInf()); }
static __attribute__((noinline))
Double minusNaN ( void ) { return negate(plusNaN()); }
static __attribute__((noinline))
Double plusDenorm ( void ) { return 1.23e-315 / 1e3; }
static __attribute__((noinline))
Double minusDenorm ( void ) { return negate(plusDenorm()); }
static void ensure_special_values_initted ( void )
{
if (special_values_initted) return;
special_values[0] = plusZero();
special_values[1] = minusZero();
special_values[2] = plusOne();
special_values[3] = minusOne();
special_values[4] = plusInf();
special_values[5] = minusInf();
special_values[6] = plusNaN();
special_values[7] = minusNaN();
special_values[8] = plusDenorm();
special_values[9] = minusDenorm();
special_values_initted = True;
int i;
printf("\n");
for (i = 0; i < 10; i++) {
printf("special value %d = %e\n", i, special_values[i]);
}
printf("\n");
}
static Double randDouble ( void )
{
ensure_special_values_initted();
UChar c = randUChar();
if (c >= 128) {
// return a normal number most of the time.
// 0 .. 2^63-1
ULong u64 = randULong(TyDF);
// -2^62 .. 2^62-1
Long s64 = (Long)u64;
// -2^55 .. 2^55-1
s64 >>= (62-55);
// and now as a float
return (Double)s64;
}
c = randUChar() % 10;
return special_values[c];
}
static Float randFloat ( void )
{
ensure_special_values_initted();
UChar c = randUChar();
if (c >= 128) {
// return a normal number most of the time.
// 0 .. 2^63-1
ULong u64 = randULong(TyDF);
// -2^62 .. 2^62-1
Long s64 = (Long)u64;
// -2^25 .. 2^25-1
s64 >>= (62-25);
// and now as a float
return (Float)s64;
}
c = randUChar() % 10;
return special_values[c];
}
void randBlock_Doubles ( V128* block, Int nBlock )
{
Int i;
for (i = 0; i < nBlock; i++) {
block[i].f64[0] = randDouble();
block[i].f64[1] = randDouble();
}
}
void randBlock_Floats ( V128* block, Int nBlock )
{
Int i;
for (i = 0; i < nBlock; i++) {
block[i].f32[0] = randFloat();
block[i].f32[1] = randFloat();
block[i].f32[2] = randFloat();
block[i].f32[3] = randFloat();
}
}
/* ---------------------------------------------------------------- */
/* -- Parameterisable test macros -- */
/* ---------------------------------------------------------------- */
#define DO50(_action) \
do { \
Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
} while (0)
/* Note this also sets the destination register to a known value (0x55..55)
since it can sometimes be an input to the instruction too. */
#define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
__attribute__((noinline)) \
static void test_##INSN##_##SUFFIXD##_##SUFFIXN ( LaneTy ty ) { \
Int i; \
for (i = 0; i < ITERS; i++) { \
V128 block[2+1]; \
memset(block, 0x55, sizeof(block)); \
randV128(&block[0], ty); \
randV128(&block[1], ty); \
__asm__ __volatile__( \
"mov x30, #0 ; msr fpsr, x30 ; " \
"ldr q7, [%0, #0] ; " \
"ldr q8, [%0, #16] ; " \
#INSN " v8." #SUFFIXD ", v7." #SUFFIXN " ; " \
"str q8, [%0, #16] ; " \
"mrs x30, fpsr ; str x30, [%0, #32] " \
: : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
); \
printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN); \
UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
showV128(&block[0]); printf(" "); \
showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
} \
}
/* Note this also sets the destination register to a known value (0x55..55)
since it can sometimes be an input to the instruction too. */
#define GEN_BINARY_TEST(INSN,SUFFIXD,SUFFIXN,SUFFIXM) \
__attribute__((noinline)) \
static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##SUFFIXM ( LaneTy ty ) { \
Int i; \
for (i = 0; i < ITERS; i++) { \
V128 block[3+1]; \
memset(block, 0x55, sizeof(block)); \
randV128(&block[0], ty); \
randV128(&block[1], ty); \
randV128(&block[2], ty); \
__asm__ __volatile__( \
"mov x30, #0 ; msr fpsr, x30 ; " \
"ldr q7, [%0, #0] ; " \
"ldr q8, [%0, #16] ; " \
"ldr q9, [%0, #32] ; " \
#INSN " v9." #SUFFIXD ", v7." #SUFFIXN ", v8." #SUFFIXM " ; " \
"str q9, [%0, #32] ; " \
"mrs x30, fpsr ; str x30, [%0, #48] " \
: : "r"(&block[0]) : "memory", "v7", "v8", "v9", "x30" \
); \
printf(#INSN " v9." #SUFFIXD \
", v7." #SUFFIXN ", v8." #SUFFIXM " "); \
UInt fpsr = 0xFFFFFF60 & block[3].u32[0]; \
showV128(&block[0]); printf(" "); \
showV128(&block[1]); printf(" "); \
showV128(&block[2]); printf(" fpsr=%08x\n", fpsr); \
} \
}
/* Note this also sets the destination register to a known value (0x55..55)
since it can sometimes be an input to the instruction too. */
#define GEN_SHIFT_TEST(INSN,SUFFIXD,SUFFIXN,AMOUNT) \
__attribute__((noinline)) \
static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##AMOUNT ( LaneTy ty ) { \
Int i; \
for (i = 0; i < ITERS; i++) { \
V128 block[2+1]; \
memset(block, 0x55, sizeof(block)); \
randV128(&block[0], ty); \
randV128(&block[1], ty); \
__asm__ __volatile__( \
"mov x30, #0 ; msr fpsr, x30 ; " \
"ldr q7, [%0, #0] ; " \
"ldr q8, [%0, #16] ; " \
#INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " ; " \
"str q8, [%0, #16] ; " \
"mrs x30, fpsr ; str x30, [%0, #32] " \
: : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
); \
printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " "); \
UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
showV128(&block[0]); printf(" "); \
showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
} \
}
/* Generate a test that involves one integer reg and one vector reg,
with no bias as towards which is input or output. */
#define GEN_ONEINT_ONEVEC_TEST(TESTNAME,INSN,INTREGNO,VECREGNO) \
__attribute__((noinline)) \
static void test_##TESTNAME ( LaneTy ty ) { \
Int i; \
assert(INTREGNO != 30); \
for (i = 0; i < ITERS; i++) { \
V128 block[4+1]; \
memset(block, 0x55, sizeof(block)); \
randV128(&block[0], ty); \
randV128(&block[1], ty); \
randV128(&block[2], ty); \
randV128(&block[3], ty); \
__asm__ __volatile__( \
"mov x30, #0 ; msr fpsr, x30 ; " \
"ldr q"#VECREGNO", [%0, #0] ; " \
"ldr x"#INTREGNO", [%0, #16] ; " \
INSN " ; " \
"str q"#VECREGNO", [%0, #32] ; " \
"str x"#INTREGNO", [%0, #48] ; " \
"mrs x30, fpsr ; str x30, [%0, #64] " \
: : "r"(&block[0]) : "memory", "v"#VECREGNO, "x"#INTREGNO, "x30" \
); \
printf(INSN " "); \
UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
showV128(&block[0]); printf(" "); \
showV128(&block[1]); printf(" "); \
showV128(&block[2]); printf(" "); \
showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
} \
}
/* Generate a test that involves two vector regs,
with no bias as towards which is input or output.
It's OK to use x10 as scratch.*/
#define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
__attribute__((noinline)) \
static void test_##TESTNAME ( LaneTy ty ) { \
Int i; \
for (i = 0; i < ITERS; i++) { \
V128 block[4+1]; \
memset(block, 0x55, sizeof(block)); \
randV128(&block[0], ty); \
randV128(&block[1], ty); \
randV128(&block[2], ty); \
randV128(&block[3], ty); \
__asm__ __volatile__( \
"mov x30, #0 ; msr fpsr, x30 ; " \
"ldr q"#VECREG1NO", [%0, #0] ; " \
"ldr q"#VECREG2NO", [%0, #16] ; " \
INSN " ; " \
"str q"#VECREG1NO", [%0, #32] ; " \
"str q"#VECREG2NO", [%0, #48] ; " \
"mrs x30, fpsr ; str x30, [%0, #64] " \
: : "r"(&block[0]) \
: "memory", "v"#VECREG1NO, "v"#VECREG2NO, "x10", "x30" \
); \
printf(INSN " "); \
UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
showV128(&block[0]); printf(" "); \
showV128(&block[1]); printf(" "); \
showV128(&block[2]); printf(" "); \
showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
} \
}
/* Generate a test that involves three vector regs,
with no bias as towards which is input or output. It's also OK
to use v16, v17, v18 as scratch. */
#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \
__attribute__((noinline)) \
static void test_##TESTNAME ( LaneTy ty ) { \
Int i; \
for (i = 0; i < ITERS; i++) { \
V128 block[6+1]; \
memset(block, 0x55, sizeof(block)); \
randV128(&block[0], ty); \
randV128(&block[1], ty); \
randV128(&block[2], ty); \
randV128(&block[3], ty); \
randV128(&block[4], ty); \
randV128(&block[5], ty); \
__asm__ __volatile__( \
"mov x30, #0 ; msr fpsr, x30 ; " \
"ldr q"#VECREG1NO", [%0, #0] ; " \
"ldr q"#VECREG2NO", [%0, #16] ; " \
"ldr q"#VECREG3NO", [%0, #32] ; " \
INSN " ; " \
"str q"#VECREG1NO", [%0, #48] ; " \
"str q"#VECREG2NO", [%0, #64] ; " \
"str q"#VECREG3NO", [%0, #80] ; " \
"mrs x30, fpsr ; str x30, [%0, #96] " \
: : "r"(&block[0]) \
: "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, \
"v16", "v17", "v18", "x30" \
); \
printf(INSN " "); \
UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
showV128(&block[0]); printf(" "); \
showV128(&block[1]); printf(" "); \
showV128(&block[2]); printf(" "); \
showV128(&block[3]); printf(" "); \
showV128(&block[4]); printf(" "); \
showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
} \
}
/* Generate a test that involves four vector regs,
with no bias as towards which is input or output. It's also OK
to use v16, v17, v18 as scratch. */
#define GEN_FOURVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO, \
VECREG3NO,VECREG4NO) \
__attribute__((noinline)) \
static void test_##TESTNAME ( LaneTy ty ) { \
Int i; \
for (i = 0; i < ITERS; i++) { \
V128 block[8+1]; \
memset(block, 0x55, sizeof(block)); \
randV128(&block[0], ty); \
randV128(&block[1], ty); \
randV128(&block[2], ty); \
randV128(&block[3], ty); \
randV128(&block[4], ty); \
randV128(&block[5], ty); \
randV128(&block[6], ty); \
randV128(&block[7], ty); \
__asm__ __volatile__( \
"mov x30, #0 ; msr fpsr, x30 ; " \
"ldr q"#VECREG1NO", [%0, #0] ; " \
"ldr q"#VECREG2NO", [%0, #16] ; " \
"ldr q"#VECREG3NO", [%0, #32] ; " \
"ldr q"#VECREG4NO", [%0, #48] ; " \
INSN " ; " \
"str q"#VECREG1NO", [%0, #64] ; " \
"str q"#VECREG2NO", [%0, #80] ; " \
"str q"#VECREG3NO", [%0, #96] ; " \
"str q"#VECREG4NO", [%0, #112] ; " \
"mrs x30, fpsr ; str x30, [%0, #128] " \
: : "r"(&block[0]) \
: "memory", "v"#VECREG1NO, "v"#VECREG2NO, \
"v"#VECREG3NO, "v"#VECREG4NO, \
"v16", "v17", "v18", "x30" \
); \
printf(INSN " "); \
UInt fpsr = 0xFFFFFF60 & block[8].u32[0]; \
showV128(&block[0]); printf(" "); \
showV128(&block[1]); printf(" "); \
showV128(&block[2]); printf(" "); \
showV128(&block[3]); printf(" "); \
showV128(&block[4]); printf(" "); \
showV128(&block[5]); printf(" "); \
showV128(&block[6]); printf(" "); \
showV128(&block[7]); printf(" fpsr=%08x\n", fpsr); \
} \
}
/* ---------------------------------------------------------------- */
/* -- Test functions and non-parameterisable test macros -- */
/* ---------------------------------------------------------------- */
void test_UMINV ( void )
{
int i;
V128 block[2];
/* -- 4s -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyS);
randV128(&block[1], TyS);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"uminv s8, v7.4s ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMINV v8, v7.4s ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 8h -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyH);
randV128(&block[1], TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"uminv h8, v7.8h ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMINV h8, v7.8h ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 4h -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyH);
randV128(&block[1], TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"uminv h8, v7.4h ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMINV h8, v7.4h ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 16b -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyB);
randV128(&block[1], TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"uminv b8, v7.16b ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMINV b8, v7.16b ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 8b -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyB);
randV128(&block[1], TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"uminv b8, v7.8b ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMINV b8, v7.8b ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
}
void test_UMAXV ( void )
{
int i;
V128 block[2];
/* -- 4s -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyS);
randV128(&block[1], TyS);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"umaxv s8, v7.4s ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMAXV v8, v7.4s ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 8h -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyH);
randV128(&block[1], TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"umaxv h8, v7.8h ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMAXV h8, v7.8h ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 4h -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyH);
randV128(&block[1], TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"umaxv h8, v7.4h ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMAXV h8, v7.4h ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 16b -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyB);
randV128(&block[1], TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"umaxv b8, v7.16b ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMAXV b8, v7.16b ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 8b -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyB);
randV128(&block[1], TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"umaxv b8, v7.8b ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("UMAXV b8, v7.8b ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
}
void test_INS_general ( void )
{
V128 block[3];
/* -- D[0..1] -- */
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyD);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.d[0], x19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.u64[0],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyD);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.d[1], x19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.d[1],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
/* -- S[0..3] -- */
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyS);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.s[0], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.s[0],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyS);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.s[1], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.s[1],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyS);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.s[2], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.s[2],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyS);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.s[3], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.s[3],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
/* -- H[0..7] -- */
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.h[0], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.h[0],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.h[1], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.h[1],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.h[2], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.h[2],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.h[3], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.h[3],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.h[4], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.h[4],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.h[5], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.h[5],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.h[6], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.h[6],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.h[7], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.h[7],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
/* -- B[0,15] -- */
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.b[0], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.b[0],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
memset(&block, 0x55, sizeof(block));
block[1].u64[0] = randULong(TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"ldr x19, [%0, #16] ; "
"ins v7.b[15], w19 ; "
"str q7, [%0, #32] "
: : "r"(&block[0]) : "memory", "x19", "v7"
);
printf("INS v7.b[15],x19 ");
showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
showV128(&block[2]); printf("\n");
}
void test_SMINV ( void )
{
int i;
V128 block[2];
/* -- 4s -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyS);
randV128(&block[1], TyS);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"sminv s8, v7.4s ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMINV v8, v7.4s ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 8h -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyH);
randV128(&block[1], TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"sminv h8, v7.8h ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMINV h8, v7.8h ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 4h -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyH);
randV128(&block[1], TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"sminv h8, v7.4h ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMINV h8, v7.4h ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 16b -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyB);
randV128(&block[1], TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"sminv b8, v7.16b ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMINV b8, v7.16b ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 8b -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyB);
randV128(&block[1], TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"sminv b8, v7.8b ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMINV b8, v7.8b ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
}
void test_SMAXV ( void )
{
int i;
V128 block[2];
/* -- 4s -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyS);
randV128(&block[1], TyS);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"smaxv s8, v7.4s ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMAXV v8, v7.4s ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 8h -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyH);
randV128(&block[1], TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"smaxv h8, v7.8h ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMAXV h8, v7.8h ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 4h -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyH);
randV128(&block[1], TyH);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"smaxv h8, v7.4h ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMAXV h8, v7.4h ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 16b -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyB);
randV128(&block[1], TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"smaxv b8, v7.16b ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMAXV b8, v7.16b ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
/* -- 8b -- */
for (i = 0; i < 10; i++) {
memset(&block, 0x55, sizeof(block));
randV128(&block[0], TyB);
randV128(&block[1], TyB);
__asm__ __volatile__(
"ldr q7, [%0, #0] ; "
"smaxv b8, v7.8b ; "
"str q8, [%0, #16] "
: : "r"(&block[0]) : "memory", "v7", "v8"
);
printf("SMAXV b8, v7.8b ");
showV128(&block[0]); printf(" ");
showV128(&block[1]); printf("\n");
}
}
//======== FCCMP_D ========//
#define GEN_test_FCCMP_D_D_0xF_EQ \
__attribute__((noinline)) static void test_FCCMP_D_D_0xF_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_D_D_0xF_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmp d29, d11, #0xf, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMP_D_D_0xF_EQ after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMP_D_D_0xF_NE \
__attribute__((noinline)) static void test_FCCMP_D_D_0xF_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_D_D_0xF_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmp d29, d11, #0xf, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMP_D_D_0xF_NE after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMP_D_D_0x0_EQ \
__attribute__((noinline)) static void test_FCCMP_D_D_0x0_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_D_D_0x0_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmp d29, d11, #0x0, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMP_D_D_0x0_EQ after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMP_D_D_0x0_NE \
__attribute__((noinline)) static void test_FCCMP_D_D_0x0_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_D_D_0x0_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmp d29, d11, #0x0, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMP_D_D_0x0_NE after", &block[0], 4); \
printf("\n"); \
}
//======== FCCMP_S ========//
#define GEN_test_FCCMP_S_S_0xF_EQ \
__attribute__((noinline)) static void test_FCCMP_S_S_0xF_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmp s29, s11, #0xf, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMP_S_S_0xF_EQ after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMP_S_S_0xF_NE \
__attribute__((noinline)) static void test_FCCMP_S_S_0xF_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_S_S_0xF_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmp s29, s11, #0xf, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMP_S_S_0xF_NE after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMP_S_S_0x0_EQ \
__attribute__((noinline)) static void test_FCCMP_S_S_0x0_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmp s29, s11, #0x0, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMP_S_S_0x0_EQ after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMP_S_S_0x0_NE \
__attribute__((noinline)) static void test_FCCMP_S_S_0x0_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmp s29, s11, #0x0, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMP_S_S_0x0_NE after", &block[0], 4); \
printf("\n"); \
}
//======== FCCMPE_D ========//
#define GEN_test_FCCMPE_D_D_0xF_EQ \
__attribute__((noinline)) static void test_FCCMPE_D_D_0xF_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMPE_D_D_0xF_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmpe d29, d11, #0xf, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMPE_D_D_0xF_EQ after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMPE_D_D_0xF_NE \
__attribute__((noinline)) static void test_FCCMPE_D_D_0xF_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMPE_D_D_0xF_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmpe d29, d11, #0xf, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMPE_D_D_0xF_NE after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMPE_D_D_0x0_EQ \
__attribute__((noinline)) static void test_FCCMPE_D_D_0x0_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMPE_D_D_0x0_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmpe d29, d11, #0x0, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMPE_D_D_0x0_EQ after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMPE_D_D_0x0_NE \
__attribute__((noinline)) static void test_FCCMPE_D_D_0x0_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMPE_D_D_0x0_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmpe d29, d11, #0x0, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMPE_D_D_0x0_NE after", &block[0], 4); \
printf("\n"); \
}
//======== FCCMPE_S ========//
#define GEN_test_FCCMPE_S_S_0xF_EQ \
__attribute__((noinline)) static void test_FCCMPE_S_S_0xF_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmpe s29, s11, #0xf, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMPE_S_S_0xF_EQ after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMPE_S_S_0xF_NE \
__attribute__((noinline)) static void test_FCCMPE_S_S_0xF_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMPE_S_S_0xF_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmpe s29, s11, #0xf, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMPE_S_S_0xF_NE after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMPE_S_S_0x0_EQ \
__attribute__((noinline)) static void test_FCCMPE_S_S_0x0_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmpe s29, s11, #0x0, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMPE_S_S_0x0_EQ after", &block[0], 4); \
printf("\n"); \
}
#define GEN_test_FCCMPE_S_S_0x0_NE \
__attribute__((noinline)) static void test_FCCMPE_S_S_0x0_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fccmpe s29, s11, #0x0, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCCMPE_S_S_0x0_NE after", &block[0], 4); \
printf("\n"); \
}
//======== FCMEQ_D_D ========//
#define GEN_test_FCMEQ_D_D \
__attribute__((noinline)) static void test_FCMEQ_D_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMEQ_D_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmeq d29, d11, d9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMEQ_D_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMEQ_S_S ========//
#define GEN_test_FCMEQ_S_S \
__attribute__((noinline)) static void test_FCMEQ_S_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMEQ_S_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmeq s29, s11, s9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMEQ_S_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMGE_D_D ========//
#define GEN_test_FCMGE_D_D \
__attribute__((noinline)) static void test_FCMGE_D_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMGE_D_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmge d29, d11, d9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMGE_D_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMGE_S_S ========//
#define GEN_test_FCMGE_S_S \
__attribute__((noinline)) static void test_FCMGE_S_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMGE_S_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmge s29, s11, s9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMGE_S_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMGT_D_D ========//
#define GEN_test_FCMGT_D_D \
__attribute__((noinline)) static void test_FCMGT_D_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMGT_D_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmgt d29, d11, d9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMGT_D_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMGT_S_S ========//
#define GEN_test_FCMGT_S_S \
__attribute__((noinline)) static void test_FCMGT_S_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMGT_S_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmgt s29, s11, s9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMGT_S_S after", &block[0], 4); \
printf("\n"); \
}
//======== FACGT_D_D ========//
#define GEN_test_FACGT_D_D \
__attribute__((noinline)) static void test_FACGT_D_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FACGT_D_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"facgt d29, d11, d9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FACGT_D_D after", &block[0], 4); \
printf("\n"); \
}
//======== FACGT_S_S ========//
#define GEN_test_FACGT_S_S \
__attribute__((noinline)) static void test_FACGT_S_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FACGT_S_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"facgt s29, s11, s9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FACGT_S_S after", &block[0], 4); \
printf("\n"); \
}
//======== FACGE_D_D ========//
#define GEN_test_FACGE_D_D \
__attribute__((noinline)) static void test_FACGE_D_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FACGE_D_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"facge d29, d11, d9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FACGE_D_D after", &block[0], 4); \
printf("\n"); \
}
//======== FACGE_S_S ========//
#define GEN_test_FACGE_S_S \
__attribute__((noinline)) static void test_FACGE_S_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FACGE_S_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"facge s29, s11, s9; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FACGE_S_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMEQ_Z_D ========//
#define GEN_test_FCMEQ_Z_D \
__attribute__((noinline)) static void test_FCMEQ_Z_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMEQ_Z_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmeq d29, d11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMEQ_Z_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMEQ_Z_S ========//
#define GEN_test_FCMEQ_Z_S \
__attribute__((noinline)) static void test_FCMEQ_Z_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMEQ_Z_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmeq s29, s11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMEQ_Z_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMGE_Z_D ========//
#define GEN_test_FCMGE_Z_D \
__attribute__((noinline)) static void test_FCMGE_Z_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMGE_Z_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmge d29, d11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMGE_Z_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMGE_Z_S ========//
#define GEN_test_FCMGE_Z_S \
__attribute__((noinline)) static void test_FCMGE_Z_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMGE_Z_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmge s29, s11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMGE_Z_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMGT_Z_D ========//
#define GEN_test_FCMGT_Z_D \
__attribute__((noinline)) static void test_FCMGT_Z_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMGT_Z_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmgt d29, d11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMGT_Z_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMGT_Z_S ========//
#define GEN_test_FCMGT_Z_S \
__attribute__((noinline)) static void test_FCMGT_Z_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMGT_Z_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmgt s29, s11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMGT_Z_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMLE_Z_D ========//
#define GEN_test_FCMLE_Z_D \
__attribute__((noinline)) static void test_FCMLE_Z_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMLE_Z_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmle d29, d11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMLE_Z_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMLE_Z_S ========//
#define GEN_test_FCMLE_Z_S \
__attribute__((noinline)) static void test_FCMLE_Z_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMLE_Z_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmle s29, s11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMLE_Z_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMLT_Z_D ========//
#define GEN_test_FCMLT_Z_D \
__attribute__((noinline)) static void test_FCMLT_Z_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMLT_Z_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmlt d29, d11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMLT_Z_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMLT_Z_S ========//
#define GEN_test_FCMLT_Z_S \
__attribute__((noinline)) static void test_FCMLT_Z_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMLT_Z_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmlt s29, s11, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMLT_Z_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMP_D_D ========//
#define GEN_test_FCMP_D_D \
__attribute__((noinline)) static void test_FCMP_D_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMP_D_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmp d29, d11; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMP_D_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMP_S_S ========//
#define GEN_test_FCMP_S_S \
__attribute__((noinline)) static void test_FCMP_S_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMP_S_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmp s29, s11; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMP_S_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMPE_D_D ========//
#define GEN_test_FCMPE_D_D \
__attribute__((noinline)) static void test_FCMPE_D_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMPE_D_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmpe d29, d11; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMPE_D_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMPE_S_S ========//
#define GEN_test_FCMPE_S_S \
__attribute__((noinline)) static void test_FCMPE_S_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMPE_S_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmpe s29, s11; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMPE_S_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMP_Z_D ========//
#define GEN_test_FCMP_Z_D \
__attribute__((noinline)) static void test_FCMP_Z_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMP_Z_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmp d29, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMP_Z_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMP_Z_S ========//
#define GEN_test_FCMP_Z_S \
__attribute__((noinline)) static void test_FCMP_Z_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMP_Z_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmp s29, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMP_Z_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCMPE_Z_D ========//
#define GEN_test_FCMPE_Z_D \
__attribute__((noinline)) static void test_FCMPE_Z_D ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMPE_Z_D before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmpe d29, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMPE_Z_D after", &block[0], 4); \
printf("\n"); \
}
//======== FCMPE_Z_S ========//
#define GEN_test_FCMPE_Z_S \
__attribute__((noinline)) static void test_FCMPE_Z_S ( void ) \
{ \
V128 block[4]; \
randBlock_Floats(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCMPE_Z_S before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcmpe s29, #0; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCMPE_Z_S after", &block[0], 4); \
printf("\n"); \
}
//======== FCSEL_D_D_D_EQ ========//
#define GEN_test_FCSEL_D_D_D_EQ \
__attribute__((noinline)) static void test_FCSEL_D_D_D_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCSEL_D_D_D_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcsel d29, d11, d9, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCSEL_D_D_D_EQ after", &block[0], 4); \
printf("\n"); \
}
//======== FCSEL_D_D_D_NE ========//
#define GEN_test_FCSEL_D_D_D_NE \
__attribute__((noinline)) static void test_FCSEL_D_D_D_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCSEL_D_D_D_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcsel d29, d11, d9, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCSEL_D_D_D_NE after", &block[0], 4); \
printf("\n"); \
}
//======== FCSEL_S_S_S_EQ ========//
#define GEN_test_FCSEL_S_S_S_EQ \
__attribute__((noinline)) static void test_FCSEL_S_S_S_EQ ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCSEL_S_S_S_EQ before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcsel s29, s11, s9, eq; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCSEL_S_S_S_EQ after", &block[0], 4); \
printf("\n"); \
}
//======== FCSEL_S_S_S_NE ========//
#define GEN_test_FCSEL_S_S_S_NE \
__attribute__((noinline)) static void test_FCSEL_S_S_S_NE ( void ) \
{ \
V128 block[4]; \
randBlock_Doubles(&block[0], 3); \
block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
showBlock("FCSEL_S_S_S_NE before", &block[0], 4); \
__asm__ __volatile__( \
"ldr x9, [%0, 48]; msr nzcv, x9; " \
"ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
"fcsel s29, s11, s9, ne; " \
"mrs x9, nzcv; str x9, [%0, 48]; " \
"str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
); \
showBlock("FCSEL_S_S_S_NE after", &block[0], 4); \
printf("\n"); \
}
/* ---------------------------------------------------------------- */
/* -- Tests, in the same order that they appear in main() -- */
/* ---------------------------------------------------------------- */
// ======================== FP ========================
GEN_TWOVEC_TEST(fabs_d_d, "fabs d22, d23", 22, 23)
GEN_TWOVEC_TEST(fabs_s_s, "fabs s22, s23", 22, 23)
GEN_TWOVEC_TEST(fabs_2d_2d, "fabs v22.2d, v23.2d", 22, 23)
GEN_TWOVEC_TEST(fabs_4s_4s, "fabs v22.4s, v23.4s", 22, 23)
GEN_TWOVEC_TEST(fabs_2s_2s, "fabs v22.2s, v23.2s", 22, 23)
GEN_TWOVEC_TEST(fneg_d_d, "fneg d22, d23", 22, 23)
GEN_TWOVEC_TEST(fneg_s_s, "fneg s22, s23", 22, 23)
GEN_TWOVEC_TEST(fneg_2d_2d, "fneg v22.2d, v23.2d", 22, 23)
GEN_TWOVEC_TEST(fneg_4s_4s, "fneg v22.4s, v23.4s", 22, 23)
GEN_TWOVEC_TEST(fneg_2s_2s, "fneg v22.2s, v23.2s", 22, 23)
GEN_TWOVEC_TEST(fsqrt_d_d, "fsqrt d22, d23", 22, 23)
GEN_TWOVEC_TEST(fsqrt_s_s, "fsqrt s22, s23", 22, 23)
GEN_TWOVEC_TEST(fsqrt_2d_2d, "fsqrt v22.2d, v23.2d", 22, 23)
GEN_TWOVEC_TEST(fsqrt_4s_4s, "fsqrt v22.4s, v23.4s", 22, 23)
GEN_TWOVEC_TEST(fsqrt_2s_2s, "fsqrt v22.2s, v23.2s", 22, 23)
GEN_THREEVEC_TEST(fadd_d_d_d, "fadd d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fadd_s_s_s, "fadd s2, s11, s29", 2, 11, 29)
GEN_THREEVEC_TEST(fsub_d_d_d, "fsub d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fsub_s_s_s, "fsub s2, s11, s29", 2, 11, 29)
GEN_BINARY_TEST(fadd, 2d, 2d, 2d)
GEN_BINARY_TEST(fadd, 4s, 4s, 4s)
GEN_BINARY_TEST(fadd, 2s, 2s, 2s)
GEN_BINARY_TEST(fsub, 2d, 2d, 2d)
GEN_BINARY_TEST(fsub, 4s, 4s, 4s)
GEN_BINARY_TEST(fsub, 2s, 2s, 2s)
GEN_THREEVEC_TEST(fabd_d_d_d, "fabd d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fabd_s_s_s, "fabd s2, s11, s29", 2, 11, 29)
GEN_BINARY_TEST(fabd, 2d, 2d, 2d)
GEN_BINARY_TEST(fabd, 4s, 4s, 4s)
GEN_BINARY_TEST(fabd, 2s, 2s, 2s)
GEN_TWOVEC_TEST(faddp_d_2d, "faddp d2, v23.2d", 2, 23)
GEN_TWOVEC_TEST(faddp_s_2s, "faddp s2, v23.2s", 2, 23)
GEN_THREEVEC_TEST(faddp_2d_2d_2d, "faddp v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(faddp_4s_4s_4s, "faddp v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(faddp_2s_2s_2s, "faddp v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_test_FCCMP_D_D_0xF_EQ
GEN_test_FCCMP_D_D_0xF_NE
GEN_test_FCCMP_D_D_0x0_EQ
GEN_test_FCCMP_D_D_0x0_NE
GEN_test_FCCMP_S_S_0xF_EQ
GEN_test_FCCMP_S_S_0xF_NE
GEN_test_FCCMP_S_S_0x0_EQ
GEN_test_FCCMP_S_S_0x0_NE
GEN_test_FCCMPE_D_D_0xF_EQ
GEN_test_FCCMPE_D_D_0xF_NE
GEN_test_FCCMPE_D_D_0x0_EQ
GEN_test_FCCMPE_D_D_0x0_NE
GEN_test_FCCMPE_S_S_0xF_EQ
GEN_test_FCCMPE_S_S_0xF_NE
GEN_test_FCCMPE_S_S_0x0_EQ
GEN_test_FCCMPE_S_S_0x0_NE
GEN_test_FCMEQ_D_D
GEN_test_FCMEQ_S_S
GEN_test_FCMGE_D_D
GEN_test_FCMGE_S_S
GEN_test_FCMGT_D_D
GEN_test_FCMGT_S_S
GEN_test_FACGT_D_D
GEN_test_FACGT_S_S
GEN_test_FACGE_D_D
GEN_test_FACGE_S_S
GEN_THREEVEC_TEST(fcmeq_2d_2d_2d, "fcmeq v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fcmeq_4s_4s_4s, "fcmeq v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fcmeq_2s_2s_2s, "fcmeq v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(fcmge_2d_2d_2d, "fcmge v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fcmge_4s_4s_4s, "fcmge v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fcmge_2s_2s_2s, "fcmge v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(fcmgt_2d_2d_2d, "fcmgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fcmgt_4s_4s_4s, "fcmgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fcmgt_2s_2s_2s, "fcmgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(facge_2d_2d_2d, "facge v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(facge_4s_4s_4s, "facge v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(facge_2s_2s_2s, "facge v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(facgt_2d_2d_2d, "facgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(facgt_4s_4s_4s, "facgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(facgt_2s_2s_2s, "facgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_test_FCMEQ_Z_D
GEN_test_FCMEQ_Z_S
GEN_test_FCMGE_Z_D
GEN_test_FCMGE_Z_S
GEN_test_FCMGT_Z_D
GEN_test_FCMGT_Z_S
GEN_test_FCMLE_Z_D
GEN_test_FCMLE_Z_S
GEN_test_FCMLT_Z_D
GEN_test_FCMLT_Z_S
GEN_TWOVEC_TEST(fcmeq_z_2d_2d, "fcmeq v2.2d, v23.2d, #0", 2, 23)
GEN_TWOVEC_TEST(fcmeq_z_4s_4s, "fcmeq v2.4s, v23.4s, #0", 2, 23)
GEN_TWOVEC_TEST(fcmeq_z_2s_2s, "fcmeq v2.2s, v23.2s, #0", 2, 23)
GEN_TWOVEC_TEST(fcmge_z_2d_2d, "fcmge v2.2d, v23.2d, #0", 2, 23)
GEN_TWOVEC_TEST(fcmge_z_4s_4s, "fcmge v2.4s, v23.4s, #0", 2, 23)
GEN_TWOVEC_TEST(fcmge_z_2s_2s, "fcmge v2.2s, v23.2s, #0", 2, 23)
GEN_TWOVEC_TEST(fcmgt_z_2d_2d, "fcmgt v2.2d, v23.2d, #0", 2, 23)
GEN_TWOVEC_TEST(fcmgt_z_4s_4s, "fcmgt v2.4s, v23.4s, #0", 2, 23)
GEN_TWOVEC_TEST(fcmgt_z_2s_2s, "fcmgt v2.2s, v23.2s, #0", 2, 23)
GEN_TWOVEC_TEST(fcmle_z_2d_2d, "fcmle v2.2d, v23.2d, #0", 2, 23)
GEN_TWOVEC_TEST(fcmle_z_4s_4s, "fcmle v2.4s, v23.4s, #0", 2, 23)
GEN_TWOVEC_TEST(fcmle_z_2s_2s, "fcmle v2.2s, v23.2s, #0", 2, 23)
GEN_TWOVEC_TEST(fcmlt_z_2d_2d, "fcmlt v2.2d, v23.2d, #0", 2, 23)
GEN_TWOVEC_TEST(fcmlt_z_4s_4s, "fcmlt v2.4s, v23.4s, #0", 2, 23)
GEN_TWOVEC_TEST(fcmlt_z_2s_2s, "fcmlt v2.2s, v23.2s, #0", 2, 23)
GEN_test_FCMP_Z_D
GEN_test_FCMP_Z_S
GEN_test_FCMPE_Z_D
GEN_test_FCMPE_Z_S
GEN_test_FCMP_D_D
GEN_test_FCMP_S_S
GEN_test_FCMPE_D_D
GEN_test_FCMPE_S_S
GEN_test_FCSEL_D_D_D_EQ
GEN_test_FCSEL_D_D_D_NE
GEN_test_FCSEL_S_S_S_EQ
GEN_test_FCSEL_S_S_S_NE
GEN_THREEVEC_TEST(fdiv_d_d_d, "fdiv d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fdiv_s_s_s, "fdiv s2, s11, s29", 2, 11, 29)
GEN_BINARY_TEST(fdiv, 2d, 2d, 2d)
GEN_BINARY_TEST(fdiv, 4s, 4s, 4s)
GEN_BINARY_TEST(fdiv, 2s, 2s, 2s)
GEN_FOURVEC_TEST(fmadd_d_d_d_d, "fmadd d2, d11, d29, d3", 2, 11, 29, 3)
GEN_FOURVEC_TEST(fmadd_s_s_s_s, "fmadd s2, s11, s29, s3", 2, 11, 29, 3)
GEN_FOURVEC_TEST(fnmadd_d_d_d_d, "fnmadd d2, d11, d29, d3", 2, 11, 29, 3)
GEN_FOURVEC_TEST(fnmadd_s_s_s_s, "fnmadd s2, s11, s29, s3", 2, 11, 29, 3)
GEN_FOURVEC_TEST(fmsub_d_d_d_d, "fmsub d2, d11, d29, d3", 2, 11, 29, 3)
GEN_FOURVEC_TEST(fmsub_s_s_s_s, "fmsub s2, s11, s29, s3", 2, 11, 29, 3)
GEN_FOURVEC_TEST(fnmsub_d_d_d_d, "fnmsub d2, d11, d29, d3", 2, 11, 29, 3)
GEN_FOURVEC_TEST(fnmsub_s_s_s_s, "fnmsub s2, s11, s29, s3", 2, 11, 29, 3)
GEN_THREEVEC_TEST(fnmul_d_d_d, "fnmul d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fnmul_s_s_s, "fnmul s2, s11, s29", 2, 11, 29)
GEN_THREEVEC_TEST(fmax_d_d_d, "fmax d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fmax_s_s_s, "fmax s2, s11, s29", 2, 11, 29)
GEN_THREEVEC_TEST(fmin_d_d_d, "fmin d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fmin_s_s_s, "fmin s2, s11, s29", 2, 11, 29)
GEN_THREEVEC_TEST(fmaxnm_d_d_d, "fmaxnm d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fmaxnm_s_s_s, "fmaxnm s2, s11, s29", 2, 11, 29)
GEN_THREEVEC_TEST(fminnm_d_d_d, "fminnm d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fminnm_s_s_s, "fminnm s2, s11, s29", 2, 11, 29)
GEN_THREEVEC_TEST(fmax_2d_2d_2d, "fmax v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fmax_4s_4s_4s, "fmax v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fmax_2s_2s_2s, "fmax v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(fmin_2d_2d_2d, "fmin v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fmin_4s_4s_4s, "fmin v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fmin_2s_2s_2s, "fmin v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(fmaxnm_2d_2d_2d, "fmaxnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fmaxnm_4s_4s_4s, "fmaxnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fmaxnm_2s_2s_2s, "fmaxnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(fminnm_2d_2d_2d, "fminnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fminnm_4s_4s_4s, "fminnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fminnm_2s_2s_2s, "fminnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_TWOVEC_TEST(fmaxnmp_d_2d, "fmaxnmp d2, v23.2d", 2, 23)
GEN_TWOVEC_TEST(fmaxnmp_s_2s, "fmaxnmp s2, v23.2s", 2, 23)
GEN_TWOVEC_TEST(fminnmp_d_2d, "fminnmp d2, v23.2d", 2, 23)
GEN_TWOVEC_TEST(fminnmp_s_2s, "fminnmp s2, v23.2s", 2, 23)
GEN_THREEVEC_TEST(fmaxnmp_2d_2d_2d, "fmaxnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fmaxnmp_4s_4s_4s, "fmaxnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fmaxnmp_2s_2s_2s, "fmaxnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(fminnmp_2d_2d_2d, "fminnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fminnmp_4s_4s_4s, "fminnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fminnmp_2s_2s_2s, "fminnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_TWOVEC_TEST(fmaxnmv_s_4s, "fmaxnmv s2, v23.4s", 2, 23)
GEN_TWOVEC_TEST(fminnmv_s_4s, "fminnmv s2, v23.4s", 2, 23)
GEN_TWOVEC_TEST(fmaxp_d_2d, "fmaxp d2, v23.2d", 2, 23)
GEN_TWOVEC_TEST(fmaxp_s_2s, "fmaxp s2, v23.2s", 2, 23)
GEN_TWOVEC_TEST(fminp_d_2d, "fminp d2, v23.2d", 2, 23)
GEN_TWOVEC_TEST(fminp_s_2s, "fminp s2, v23.2s", 2, 23)
GEN_THREEVEC_TEST(fmaxp_2d_2d_2d, "fmaxp v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fmaxp_4s_4s_4s, "fmaxp v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fmaxp_2s_2s_2s, "fmaxp v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(fminp_2d_2d_2d, "fminp v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fminp_4s_4s_4s, "fminp v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fminp_2s_2s_2s, "fminp v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_TWOVEC_TEST(fmaxv_s_4s, "fmaxv s2, v23.4s", 2, 23)
GEN_TWOVEC_TEST(fminv_s_4s, "fminv s2, v23.4s", 2, 23)
GEN_THREEVEC_TEST(fmla_2d_2d_2d, "fmla v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fmla_4s_4s_4s, "fmla v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fmla_2s_2s_2s, "fmla v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(fmls_2d_2d_2d, "fmls v2.2d, v23.2d, v11.2d", 2, 23, 11)
GEN_THREEVEC_TEST(fmls_4s_4s_4s, "fmls v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(fmls_2s_2s_2s, "fmls v2.2s, v23.2s, v11.2s", 2, 23, 11)
GEN_THREEVEC_TEST(fmla_d_d_d0, "fmla d2, d11, v29.d[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_d_d_d1, "fmla d2, d11, v29.d[1]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_s_s_s0, "fmla s2, s11, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_s_s_s3, "fmla s2, s11, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_d_d_d0, "fmls d2, d11, v29.d[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_d_d_d1, "fmls d2, d11, v29.d[1]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_s_s_s0, "fmls s2, s11, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_s_s_s3, "fmls s2, s11, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_2d_2d_d0, "fmla v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_2d_2d_d1, "fmla v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_4s_4s_s0, "fmla v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_4s_4s_s3, "fmla v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_2s_2s_s0, "fmla v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_2s_2s_s3, "fmla v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_2d_2d_d0, "fmls v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_2d_2d_d1, "fmls v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_4s_4s_s0, "fmls v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_4s_4s_s3, "fmls v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_2s_2s_s0, "fmls v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmls_2s_2s_s3, "fmls v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
GEN_TWOVEC_TEST(fmov_2d_imm_01, "fmov v22.2d, #0.125", 22, 23)
GEN_TWOVEC_TEST(fmov_2d_imm_02, "fmov v22.2d, #-4.0", 22, 23)
GEN_TWOVEC_TEST(fmov_2d_imm_03, "fmov v22.2d, #1.0", 22, 23)
GEN_TWOVEC_TEST(fmov_4s_imm_01, "fmov v22.4s, #0.125", 22, 23)
GEN_TWOVEC_TEST(fmov_4s_imm_02, "fmov v22.4s, #-4.0", 22, 23)
GEN_TWOVEC_TEST(fmov_4s_imm_03, "fmov v22.4s, #1.0", 22, 23)
GEN_TWOVEC_TEST(fmov_2s_imm_01, "fmov v22.2s, #0.125", 22, 23)
GEN_TWOVEC_TEST(fmov_2s_imm_02, "fmov v22.2s, #-4.0", 22, 23)
GEN_TWOVEC_TEST(fmov_2s_imm_03, "fmov v22.2s, #1.0", 22, 23)
GEN_TWOVEC_TEST(fmov_d_d, "fmov d22, d23", 22, 23)
GEN_TWOVEC_TEST(fmov_s_s, "fmov s22, s23", 22, 23)
GEN_ONEINT_ONEVEC_TEST(fmov_s_w, "fmov s7, w15", 15, 7)
GEN_ONEINT_ONEVEC_TEST(fmov_d_x, "fmov d7, x15", 15, 7)
GEN_ONEINT_ONEVEC_TEST(fmov_d1_x, "fmov v7.d[1], x15", 15, 7)
GEN_ONEINT_ONEVEC_TEST(fmov_w_s, "fmov w15, s7", 15, 7)
GEN_ONEINT_ONEVEC_TEST(fmov_x_d, "fmov x15, d7", 15, 7)
GEN_ONEINT_ONEVEC_TEST(fmov_x_d1, "fmov x15, v7.d[1]", 15, 7)
/* overkill -- don't need two vecs, only one */
GEN_TWOVEC_TEST(fmov_d_imm_01, "fmov d22, #0.125", 22, 23)
GEN_TWOVEC_TEST(fmov_d_imm_02, "fmov d22, #-4.0", 22, 23)
GEN_TWOVEC_TEST(fmov_d_imm_03, "fmov d22, #1.0", 22, 23)
GEN_TWOVEC_TEST(fmov_s_imm_01, "fmov s22, #0.125", 22, 23)
GEN_TWOVEC_TEST(fmov_s_imm_02, "fmov s22, #-4.0", 22, 23)
GEN_TWOVEC_TEST(fmov_s_imm_03, "fmov s22, #-1.0", 22, 23)
GEN_THREEVEC_TEST(fmul_d_d_d0, "fmul d2, d11, v29.d[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_d_d_d1, "fmul d2, d11, v29.d[1]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_s_s_s0, "fmul s2, s11, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_s_s_s3, "fmul s2, s11, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_2d_2d_d0, "fmul v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_2d_2d_d1, "fmul v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_4s_4s_s0, "fmul v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_4s_4s_s3, "fmul v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_2s_2s_s0, "fmul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_2s_2s_s3, "fmul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_d_d_d, "fmul d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_s_s_s, "fmul s2, s11, s29", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_2d_2d_2d, "fmul v2.2d, v11.2d, v29.2d", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_4s_4s_4s, "fmul v2.4s, v11.4s, v29.4s", 2, 11, 29)
GEN_THREEVEC_TEST(f