blob: 105a80d656a907f647203afb5a2d04d2bd8bf22e [file] [log] [blame]
/*
* Copyright (C) 2015 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.renderscript.cts;
import android.renderscript.*;
import android.util.Log;
import java.util.ArrayList;
public class IntrinsicBLAS extends IntrinsicBase {
private ScriptIntrinsicBLAS mBLAS;
private boolean mInitialized = false;
private ArrayList<Allocation> mMatrixS;
private final float alphaS = 1.0f;
private final float betaS = 1.0f;
private ArrayList<Allocation> mMatrixD;
private final double alphaD = 1.0;
private final double betaD = 1.0;
private ArrayList<Allocation> mMatrixC;
private final Float2 alphaC = new Float2(1.0f, 0.0f);
private final Float2 betaC = new Float2(1.0f, 0.0f);
private ArrayList<Allocation> mMatrixZ;
private final Double2 alphaZ = new Double2(1.0, 0.0);
private final Double2 betaZ = new Double2(1.0, 0.0);
private int[] mTranspose = {ScriptIntrinsicBLAS.NO_TRANSPOSE,
ScriptIntrinsicBLAS.TRANSPOSE,
ScriptIntrinsicBLAS.CONJ_TRANSPOSE,
0};
private int[] mUplo = {ScriptIntrinsicBLAS.UPPER,
ScriptIntrinsicBLAS.LOWER,
0};
private int[] mDiag = {ScriptIntrinsicBLAS.NON_UNIT,
ScriptIntrinsicBLAS.UNIT,
0};
private int[] mSide = {ScriptIntrinsicBLAS.LEFT,
ScriptIntrinsicBLAS.RIGHT,
0};
private int[] mInc = {0, 1, 2};
private int[] mK = {-1, 0, 1};
private int[] mDim = {1, 2, 3, 256};
@Override
protected void setUp() throws Exception {
super.setUp();
//now populate the test Matrixes and Vectors.
if (!mInitialized) {
mBLAS = ScriptIntrinsicBLAS.create(mRS);
mMatrixS = new ArrayList<Allocation>();
mMatrixD = new ArrayList<Allocation>();
mMatrixC = new ArrayList<Allocation>();
mMatrixZ = new ArrayList<Allocation>();
for (int x : mDim) {
for (int y : mDim) {
mMatrixS.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), x, y)));
mMatrixD.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), x, y)));
mMatrixC.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), x, y)));
mMatrixZ.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), x, y)));
}
}
//also need Allocation with mismatch Element.
Allocation misAlloc = Allocation.createTyped(mRS, Type.createXY(mRS, Element.U8(mRS), 1, 1));
mMatrixS.add(misAlloc);
mMatrixD.add(misAlloc);
mMatrixC.add(misAlloc);
mMatrixZ.add(misAlloc);
mInitialized = true;
}
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
}
private boolean validateSide(int Side) {
if (Side != ScriptIntrinsicBLAS.LEFT && Side != ScriptIntrinsicBLAS.RIGHT) {
return false;
}
return true;
}
private boolean validateTranspose(int Trans) {
if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE &&
Trans != ScriptIntrinsicBLAS.TRANSPOSE &&
Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) {
return false;
}
return true;
}
private boolean validateConjTranspose(int Trans) {
if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE &&
Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) {
return false;
}
return true;
}
private boolean validateDiag(int Diag) {
if (Diag != ScriptIntrinsicBLAS.NON_UNIT &&
Diag != ScriptIntrinsicBLAS.UNIT) {
return false;
}
return true;
}
private boolean validateUplo(int Uplo) {
if (Uplo != ScriptIntrinsicBLAS.UPPER &&
Uplo != ScriptIntrinsicBLAS.LOWER) {
return false;
}
return true;
}
private boolean validateVecInput(Allocation X) {
if (X.getType().getY() > 2) {
//for testing vector, need a mismatch Y for complete test coverage.
return false;
}
return true;
}
private boolean validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
if (!validateTranspose(TransA)) {
return false;
}
int M = A.getType().getY();
int N = A.getType().getX();
if (!A.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e) ||
!Y.getType().getElement().isCompatible(e)) {
return false;
}
if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
return false;
}
if (incX <= 0 || incY <= 0) {
return false;
}
int expectedXDim = -1, expectedYDim = -1;
if (TransA == ScriptIntrinsicBLAS.NO_TRANSPOSE) {
expectedXDim = 1 + (N - 1) * incX;
expectedYDim = 1 + (M - 1) * incY;
} else {
expectedXDim = 1 + (M - 1) * incX;
expectedYDim = 1 + (N - 1) * incY;
}
if (X.getType().getX() != expectedXDim ||
Y.getType().getX() != expectedYDim) {
return false;
}
return true;
}
private void xGEMV_API_test(int trans, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY);
fail("should throw RSRuntimeException for SGEMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY);
fail("should throw RSRuntimeException for DGEMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY);
fail("should throw RSRuntimeException for CGEMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
fail("should throw RSRuntimeException for ZGEMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xGEMV_API(ArrayList<Allocation> mMatrix) {
for (int trans : mTranspose) {
for (int incX : mInc) {
xGEMV_API_test(trans, incX, incX, mMatrix);
}
}
}
public void test_L2_SGEMV_API() {
L2_xGEMV_API(mMatrixS);
}
public void test_L2_DGEMV_API() {
L2_xGEMV_API(mMatrixD);
}
public void test_L2_CGEMV_API() {
L2_xGEMV_API(mMatrixC);
}
public void test_L2_ZGEMV_API() {
L2_xGEMV_API(mMatrixZ);
}
public void test_L2_SGEMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dM, 1));
matrixAS.copyFrom(BLASData.L2_sGEMV_A_mn);
vectorXS.copyFrom(BLASData.L2_sGEMV_x_n1);
vectorYS.copyFrom(BLASData.L2_sGEMV_y_m1);
//Test for the default case: NO_TRANS
mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dM, 1));
vectorYRef.copyFrom(BLASData.L2_sGEMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector Y, since it was overwritten by BLAS.
vectorYS.copyFrom(BLASData.L2_sGEMV_y_m1);
mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incX, betaS, vectorXS, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_sGEMV_o_T);
mVerify.invoke_verify(vectorYRef, vectorXS, vectorYRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXS.copyFrom(BLASData.L2_sGEMV_x_n1);
mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incX, betaS, vectorXS, incY);
vectorYRef.copyFrom(BLASData.L2_sGEMV_o_H);
mVerify.invoke_verify(vectorYRef, vectorXS, vectorYRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dM-1) * incY;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorXS.copyFrom(BLASData.L2_sGEMV_x_n2);
vectorYS.copyFrom(BLASData.L2_sGEMV_y_m2);
mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_sGEMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_DGEMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dM, 1));
matrixAD.copyFrom(BLASData.L2_dGEMV_A_mn);
vectorXD.copyFrom(BLASData.L2_dGEMV_x_n1);
vectorYD.copyFrom(BLASData.L2_dGEMV_y_m1);
//Test for the default case: NO_TRANS
mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dM, 1));
vectorYRef.copyFrom(BLASData.L2_dGEMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector Y, since it was overwritten by BLAS.
vectorYD.copyFrom(BLASData.L2_dGEMV_y_m1);
mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incX, betaD, vectorXD, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_dGEMV_o_T);
mVerify.invoke_verify(vectorYRef, vectorXD, vectorYRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXD.copyFrom(BLASData.L2_dGEMV_x_n1);
mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incX, betaD, vectorXD, incY);
vectorYRef.copyFrom(BLASData.L2_dGEMV_o_H);
mVerify.invoke_verify(vectorYRef, vectorXD, vectorYRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dM-1) * incY;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorXD.copyFrom(BLASData.L2_dGEMV_x_n2);
vectorYD.copyFrom(BLASData.L2_dGEMV_y_m2);
mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_dGEMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_CGEMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, 1));
matrixAC.copyFrom(BLASData.L2_cGEMV_A_mn);
vectorXC.copyFrom(BLASData.L2_cGEMV_x_n1);
vectorYC.copyFrom(BLASData.L2_cGEMV_y_m1);
//Test for the default case: NO_TRANS
mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, 1));
vectorYRef.copyFrom(BLASData.L2_cGEMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector Y, since it was overwritten by BLAS.
vectorYC.copyFrom(BLASData.L2_cGEMV_y_m1);
mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incX, betaC, vectorXC, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_cGEMV_o_T);
mVerify.invoke_verify(vectorYRef, vectorXC, vectorYRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXC.copyFrom(BLASData.L2_cGEMV_x_n1);
mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incX, betaC, vectorXC, incY);
vectorYRef.copyFrom(BLASData.L2_cGEMV_o_H);
mVerify.invoke_verify(vectorYRef, vectorXC, vectorYRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dM-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorXC.copyFrom(BLASData.L2_cGEMV_x_n2);
vectorYC.copyFrom(BLASData.L2_cGEMV_y_m2);
mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_cGEMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_ZGEMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, 1));
matrixAZ.copyFrom(BLASData.L2_zGEMV_A_mn);
vectorXZ.copyFrom(BLASData.L2_zGEMV_x_n1);
vectorYZ.copyFrom(BLASData.L2_zGEMV_y_m1);
//Test for the default case: NO_TRANS
mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, 1));
vectorYRef.copyFrom(BLASData.L2_zGEMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector Y, since it was overwritten by BLAS.
vectorYZ.copyFrom(BLASData.L2_zGEMV_y_m1);
mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incX, betaZ, vectorXZ, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_zGEMV_o_T);
mVerify.invoke_verify(vectorYRef, vectorXZ, vectorYRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXZ.copyFrom(BLASData.L2_zGEMV_x_n1);
mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incX, betaZ, vectorXZ, incY);
vectorYRef.copyFrom(BLASData.L2_zGEMV_o_H);
mVerify.invoke_verify(vectorYRef, vectorXZ, vectorYRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dM-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorXZ.copyFrom(BLASData.L2_zGEMV_x_n2);
vectorYZ.copyFrom(BLASData.L2_zGEMV_y_m2);
mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_zGEMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
mRS.finish();
checkError();
}
private void xGBMV_API_test(int trans, int KL, int KU, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY) && KU >= 0 && KL >= 0) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY);
fail("should throw RSRuntimeException for SGBMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY);
fail("should throw RSRuntimeException for DGBMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY);
fail("should throw RSRuntimeException for CGBMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
fail("should throw RSRuntimeException for ZGBMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xGBMV_API(ArrayList<Allocation> mMatrix) {
for (int trans : mTranspose) {
for (int incX : mInc) {
for (int K : mK) {
xGBMV_API_test(trans, K, K, incX, incX, mMatrix);
}
}
}
}
public void test_L2_SGBMV_API() {
L2_xGBMV_API(mMatrixS);
}
public void test_L2_DGBMV_API() {
L2_xGBMV_API(mMatrixD);
}
public void test_L2_CGBMV_API() {
L2_xGBMV_API(mMatrixC);
}
public void test_L2_ZGBMV_API() {
L2_xGBMV_API(mMatrixZ);
}
public void test_L2_SGBMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dM, 1));
matrixAS.copy2DRangeFrom(0, 0, BLASData.KL + BLASData.KU + 1, BLASData.dM, BLASData.L2_sGBMV_A_mn);
vectorXS.copyFrom(BLASData.L2_sGBMV_x_n1);
vectorYS.copyFrom(BLASData.L2_sGBMV_y_m1);
//Test for the default case: NO_TRANS
mBLAS.SGBMV(trans, BLASData.KL, BLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dM, 1));
vectorYRef.copyFrom(BLASData.L2_sGBMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector Y, since it was overwritten by BLAS.
vectorYS.copyFrom(BLASData.L2_sGBMV_y_m1);
mBLAS.SGBMV(trans, BLASData.KL, BLASData.KU, alphaS, matrixAS, vectorYS, incX, betaS, vectorXS, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_sGBMV_o_T);
mVerify.invoke_verify(vectorYRef, vectorXS, vectorYRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXS.copyFrom(BLASData.L2_sGBMV_x_n1);
mBLAS.SGBMV(trans, BLASData.KL, BLASData.KU, alphaS, matrixAS, vectorYS, incX, betaS, vectorXS, incY);
vectorYRef.copyFrom(BLASData.L2_sGBMV_o_H);
mVerify.invoke_verify(vectorYRef, vectorXS, vectorYRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dM-1) * incY;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorXS.copyFrom(BLASData.L2_sGBMV_x_n2);
vectorYS.copyFrom(BLASData.L2_sGBMV_y_m2);
mBLAS.SGBMV(trans, BLASData.KL, BLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_sGBMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_DGBMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dM, 1));
matrixAD.copy2DRangeFrom(0, 0, BLASData.KL + BLASData.KU + 1, BLASData.dM, BLASData.L2_dGBMV_A_mn);
vectorXD.copyFrom(BLASData.L2_dGBMV_x_n1);
vectorYD.copyFrom(BLASData.L2_dGBMV_y_m1);
//Test for the default case: NO_TRANS
mBLAS.DGBMV(trans, BLASData.KL, BLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dM, 1));
vectorYRef.copyFrom(BLASData.L2_dGBMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector Y, since it was overwritten by BLAS.
vectorYD.copyFrom(BLASData.L2_dGBMV_y_m1);
mBLAS.DGBMV(trans, BLASData.KL, BLASData.KU, alphaD, matrixAD, vectorYD, incX, betaD, vectorXD, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_dGBMV_o_T);
mVerify.invoke_verify(vectorYRef, vectorXD, vectorYRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXD.copyFrom(BLASData.L2_dGBMV_x_n1);
mBLAS.DGBMV(trans, BLASData.KL, BLASData.KU, alphaD, matrixAD, vectorYD, incX, betaD, vectorXD, incY);
vectorYRef.copyFrom(BLASData.L2_dGBMV_o_H);
mVerify.invoke_verify(vectorYRef, vectorXD, vectorYRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dM-1) * incY;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorXD.copyFrom(BLASData.L2_dGBMV_x_n2);
vectorYD.copyFrom(BLASData.L2_dGBMV_y_m2);
mBLAS.DGBMV(trans, BLASData.KL, BLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_dGBMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_CGBMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, 1));
matrixAC.copy2DRangeFrom(0, 0, BLASData.KL + BLASData.KU + 1, BLASData.dM, BLASData.L2_cGBMV_A_mn);
vectorXC.copyFrom(BLASData.L2_cGBMV_x_n1);
vectorYC.copyFrom(BLASData.L2_cGBMV_y_m1);
//Test for the default case: NO_TRANS
mBLAS.CGBMV(trans, BLASData.KL, BLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, 1));
vectorYRef.copyFrom(BLASData.L2_cGBMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector Y, since it was overwritten by BLAS.
vectorYC.copyFrom(BLASData.L2_cGBMV_y_m1);
mBLAS.CGBMV(trans, BLASData.KL, BLASData.KU, alphaC, matrixAC, vectorYC, incX, betaC, vectorXC, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_cGBMV_o_T);
mVerify.invoke_verify(vectorYRef, vectorXC, vectorYRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXC.copyFrom(BLASData.L2_cGBMV_x_n1);
mBLAS.CGBMV(trans, BLASData.KL, BLASData.KU, alphaC, matrixAC, vectorYC, incX, betaC, vectorXC, incY);
vectorYRef.copyFrom(BLASData.L2_cGBMV_o_H);
mVerify.invoke_verify(vectorYRef, vectorXC, vectorYRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dM-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorXC.copyFrom(BLASData.L2_cGBMV_x_n2);
vectorYC.copyFrom(BLASData.L2_cGBMV_y_m2);
mBLAS.CGBMV(trans, BLASData.KL, BLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_cGBMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_ZGBMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, 1));
matrixAZ.copy2DRangeFrom(0, 0, BLASData.KL + BLASData.KU + 1, BLASData.dM, BLASData.L2_zGBMV_A_mn);
vectorXZ.copyFrom(BLASData.L2_zGBMV_x_n1);
vectorYZ.copyFrom(BLASData.L2_zGBMV_y_m1);
//Test for the default case: NO_TRANS
mBLAS.ZGBMV(trans, BLASData.KL, BLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, 1));
vectorYRef.copyFrom(BLASData.L2_zGBMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector Y, since it was overwritten by BLAS.
vectorYZ.copyFrom(BLASData.L2_zGBMV_y_m1);
mBLAS.ZGBMV(trans, BLASData.KL, BLASData.KU, alphaZ, matrixAZ, vectorYZ, incX, betaZ, vectorXZ, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_zGBMV_o_T);
mVerify.invoke_verify(vectorYRef, vectorXZ, vectorYRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXZ.copyFrom(BLASData.L2_zGBMV_x_n1);
mBLAS.ZGBMV(trans, BLASData.KL, BLASData.KU, alphaZ, matrixAZ, vectorYZ, incX, betaZ, vectorXZ, incY);
vectorYRef.copyFrom(BLASData.L2_zGBMV_o_H);
mVerify.invoke_verify(vectorYRef, vectorXZ, vectorYRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dM-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorXZ.copyFrom(BLASData.L2_zGBMV_x_n2);
vectorYZ.copyFrom(BLASData.L2_zGBMV_y_m2);
mBLAS.ZGBMV(trans, BLASData.KL, BLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_zGBMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
mRS.finish();
checkError();
}
private void xHEMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
fail("should throw RSRuntimeException for CHEMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
fail("should throw RSRuntimeException for ZHEMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xHEMV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xHEMV_API_test(Uplo, incX, incX, mMatrix);
}
}
}
public void test_L2_CHEMV_API() {
L2_xHEMV_API(mMatrixC);
}
public void test_L2_ZHEMV_API() {
L2_xHEMV_API(mMatrixZ);
}
public void test_L2_CHEMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copyFrom(BLASData.L2_cHEMV_A_nn);
vectorXC.copyFrom(BLASData.L2_cHEMV_x_n1);
vectorYC.copyFrom(BLASData.L2_cHEMV_y_n1);
//Test for the default case:
mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_cHEMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorXC.copyFrom(BLASData.L2_cHEMV_x_n2);
vectorYC.copyFrom(BLASData.L2_cHEMV_y_n2);
mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_cHEMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_ZHEMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copyFrom(BLASData.L2_zHEMV_A_nn);
vectorXZ.copyFrom(BLASData.L2_zHEMV_x_n1);
vectorYZ.copyFrom(BLASData.L2_zHEMV_y_n1);
//Test for the default case: NO_TRANS
mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_zHEMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorXZ.copyFrom(BLASData.L2_zHEMV_x_n2);
vectorYZ.copyFrom(BLASData.L2_zHEMV_y_n2);
mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_zHEMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
mRS.finish();
checkError();
}
private void xHBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA) && K >= 0) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY);
fail("should throw RSRuntimeException for CHBMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
fail("should throw RSRuntimeException for ZHBMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xHBMV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int K : mK) {
for (int incX : mInc) {
xHBMV_API_test(Uplo, K, incX, incX, mMatrix);
}
}
}
}
public void test_L2_CHBMV_API() {
L2_xHBMV_API(mMatrixC);
}
public void test_L2_ZHBMV_API() {
L2_xHBMV_API(mMatrixZ);
}
public void test_L2_CHBMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_cHBMV_A_nn);
vectorXC.copyFrom(BLASData.L2_cHBMV_x_n1);
vectorYC.copyFrom(BLASData.L2_cHBMV_y_n1);
//Test for the default case:
mBLAS.CHBMV(uplo, BLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_cHBMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorXC.copyFrom(BLASData.L2_cHBMV_x_n2);
vectorYC.copyFrom(BLASData.L2_cHBMV_y_n2);
mBLAS.CHBMV(uplo, BLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_cHBMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_ZHBMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_zHBMV_A_nn);
vectorXZ.copyFrom(BLASData.L2_zHBMV_x_n1);
vectorYZ.copyFrom(BLASData.L2_zHBMV_y_n1);
//Test for the default case: NO_TRANS
mBLAS.ZHBMV(uplo, BLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_zHBMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorXZ.copyFrom(BLASData.L2_zHBMV_x_n2);
vectorYZ.copyFrom(BLASData.L2_zHBMV_y_n2);
mBLAS.ZHBMV(uplo, BLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_zHBMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
mRS.finish();
checkError();
}
private void xHPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
fail("should throw RSRuntimeException for CHPMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
fail("should throw RSRuntimeException for ZHPMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xHPMV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xHPMV_API_test(Uplo, incX, incX, mMatrix);
}
}
}
public void test_L2_CHPMV_API() {
L2_xHPMV_API(mMatrixC);
}
public void test_L2_ZHPMV_API() {
L2_xHPMV_API(mMatrixZ);
}
public void test_L2_CHPMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
matrixAC.copyFrom(BLASData.L2_cHEMV_A_nn_pu);
vectorXC.copyFrom(BLASData.L2_cHEMV_x_n1);
vectorYC.copyFrom(BLASData.L2_cHEMV_y_n1);
//Test for the default case:
mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
vectorYRef.copyFrom(BLASData.L2_cHEMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorXC.copyFrom(BLASData.L2_cHEMV_x_n2);
vectorYC.copyFrom(BLASData.L2_cHEMV_y_n2);
mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_cHEMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYC, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_ZHPMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
matrixAZ.copyFrom(BLASData.L2_zHEMV_A_nn_pu);
vectorXZ.copyFrom(BLASData.L2_zHEMV_x_n1);
vectorYZ.copyFrom(BLASData.L2_zHEMV_y_n1);
//Test for the default case: NO_TRANS
mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
vectorYRef.copyFrom(BLASData.L2_zHEMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorXZ.copyFrom(BLASData.L2_zHEMV_x_n2);
vectorYZ.copyFrom(BLASData.L2_zHEMV_y_n2);
mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_zHEMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYZ, vectorYRef);
mRS.finish();
checkError();
}
private boolean validateSYMV(Element e, int Uplo, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
if (!validateUplo(Uplo)) {
return false;
}
int N = A.getType().getY();
if (A.getType().getX() != N) {
return false;
}
if (!A.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e) ||
!Y.getType().getElement().isCompatible(e) ) {
return false;
}
if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
return false;
}
if (incX <= 0 || incY <= 0) {
return false;
}
int expectedXDim = 1 + (N - 1) * incX;
if (X.getType().getX() != expectedXDim) {
return false;
}
int expectedYDim = 1 + (N - 1) * incY;
if (Y.getType().getX() != expectedYDim) {
return false;
}
return true;
}
private void xSYMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
fail("should throw RSRuntimeException for SSYMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
fail("should throw RSRuntimeException for DSYMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xSYMV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xSYMV_API_test(Uplo, incX, incX, mMatrix);
}
}
}
public void test_L2_SSYMV_API() {
L2_xSYMV_API(mMatrixS);
}
public void test_L2_DSYMV_API() {
L2_xSYMV_API(mMatrixD);
}
public void test_L2_SSYMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
matrixAS.copyFrom(BLASData.L2_sSYMV_A_nn);
vectorXS.copyFrom(BLASData.L2_sSYMV_x_n1);
vectorYS.copyFrom(BLASData.L2_sSYMV_y_n1);
//Test for the default case:
mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_sSYMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorXS.copyFrom(BLASData.L2_sSYMV_x_n2);
vectorYS.copyFrom(BLASData.L2_sSYMV_y_n2);
mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_sSYMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_DSYMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
matrixAD.copyFrom(BLASData.L2_dSYMV_A_nn);
vectorXD.copyFrom(BLASData.L2_dSYMV_x_n1);
vectorYD.copyFrom(BLASData.L2_dSYMV_y_n1);
//Test for the default case:
mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_dSYMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorXD.copyFrom(BLASData.L2_dSYMV_x_n2);
vectorYD.copyFrom(BLASData.L2_dSYMV_y_n2);
mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_dSYMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
mRS.finish();
checkError();
}
private void xSBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY) && K >= 0) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY);
fail("should throw RSRuntimeException for SSBMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY);
fail("should throw RSRuntimeException for DSBMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xSBMV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int K : mK) {
for (int incX : mInc) {
xSBMV_API_test(Uplo, K, incX, incX, mMatrix);
}
}
}
}
public void test_L2_SSBMV_API() {
L2_xSBMV_API(mMatrixS);
}
public void test_L2_DSBMV_API() {
L2_xSBMV_API(mMatrixD);
}
public void test_L2_SSBMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
matrixAS.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_sSBMV_A_nn);
vectorXS.copyFrom(BLASData.L2_sSBMV_x_n1);
vectorYS.copyFrom(BLASData.L2_sSBMV_y_n1);
//Test for the default case:
mBLAS.SSBMV(uplo, BLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_sSBMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorXS.copyFrom(BLASData.L2_sSBMV_x_n2);
vectorYS.copyFrom(BLASData.L2_sSBMV_y_n2);
mBLAS.SSBMV(uplo, BLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_sSBMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_DSBMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
matrixAD.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_dSBMV_A_nn);
vectorXD.copyFrom(BLASData.L2_dSBMV_x_n1);
vectorYD.copyFrom(BLASData.L2_dSBMV_y_n1);
//Test for the default case:
mBLAS.DSBMV(uplo, BLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
vectorYRef.copyFrom(BLASData.L2_dSBMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorXD.copyFrom(BLASData.L2_dSBMV_x_n2);
vectorYD.copyFrom(BLASData.L2_dSBMV_y_n2);
mBLAS.DSBMV(uplo, BLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_dSBMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
mRS.finish();
checkError();
}
private boolean validateSPMV(Element e, int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
if (!validateUplo(Uplo)) {
return false;
}
if (!Ap.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e) ||
!Y.getType().getElement().isCompatible(e)) {
return false;
}
if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
return false;
}
if (Ap.getType().getY() > 1) {
return false;
}
int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
return false;
}
if (incX <= 0 || incY <= 0) {
return false;
}
int expectedXDim = 1 + (N - 1) * incX;
if (X.getType().getX() != expectedXDim) {
return false;
}
int expectedYDim = 1 + (N - 1) * incY;
if (Y.getType().getX() != expectedYDim) {
return false;
}
return true;
}
private void xSPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSPMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
fail("should throw RSRuntimeException for SSPMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
fail("should throw RSRuntimeException for DSPMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xSPMV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xSPMV_API_test(Uplo, incX, incX, mMatrix);
}
}
}
public void test_L2_SSPMV_API() {
L2_xSPMV_API(mMatrixS);
}
public void test_L2_DSPMV_API() {
L2_xSPMV_API(mMatrixD);
}
public void test_L2_SSPMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
matrixAS.copyFrom(BLASData.L2_sSYMV_A_nn_pu);
vectorXS.copyFrom(BLASData.L2_sSYMV_x_n1);
vectorYS.copyFrom(BLASData.L2_sSYMV_y_n1);
//Test for the default case:
mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
vectorYRef.copyFrom(BLASData.L2_sSYMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorXS.copyFrom(BLASData.L2_sSYMV_x_n2);
vectorYS.copyFrom(BLASData.L2_sSYMV_y_n2);
mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_sSYMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYS, vectorYRef);
mRS.finish();
checkError();
}
public void test_L2_DSPMV_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
matrixAD.copyFrom(BLASData.L2_dSYMV_A_nn_pu);
vectorXD.copyFrom(BLASData.L2_dSYMV_x_n1);
vectorYD.copyFrom(BLASData.L2_dSYMV_y_n1);
//Test for the default case:
mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
vectorYRef.copyFrom(BLASData.L2_dSYMV_o_N);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorXD.copyFrom(BLASData.L2_dSYMV_x_n2);
vectorYD.copyFrom(BLASData.L2_dSYMV_y_n2);
mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorYRef.copyFrom(BLASData.L2_dSYMV_o_N2);
mVerify.invoke_verify(vectorYRef, vectorYD, vectorYRef);
mRS.finish();
checkError();
}
private boolean validateTRMV(Element e, int Uplo, int TransA, int Diag, Allocation A, Allocation X, int incX) {
if (!validateUplo(Uplo)) {
return false;
}
if (!validateTranspose(TransA)) {
return false;
}
if (!validateDiag(Diag)) {
return false;
}
int N = A.getType().getY();
if (A.getType().getX() != N) {
return false;
}
if (!A.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e)) {
return false;
}
if (X.getType().getY() > 1) {
return false;
}
if (incX <= 0) {
return false;
}
int expectedXDim = 1 + (N - 1) * incX;
if (X.getType().getX() != expectedXDim) {
return false;
}
return true;
}
private void xTRMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for STRMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for DTRMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for CTRMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for ZTRMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xTRMV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int TransA : mTranspose) {
for (int Diag : mDiag) {
for (int incX : mInc) {
xTRMV_API_test(Uplo, TransA, Diag, incX, mMatrix);
}
}
}
}
}
public void test_L2_STRMV_API() {
L2_xTRMV_API(mMatrixS);
}
public void test_L2_DTRMV_API() {
L2_xTRMV_API(mMatrixD);
}
public void test_L2_CTRMV_API() {
L2_xTRMV_API(mMatrixC);
}
public void test_L2_ZTRMV_API() {
L2_xTRMV_API(mMatrixZ);
}
public void test_L2_STRMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
matrixAS.copyFrom(BLASData.L2_sTRMV_A_nn);
vectorXS.copyFrom(BLASData.L2_sTRMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_sTRMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXS.copyFrom(BLASData.L2_sTRMV_x_n1);
mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTRMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXS.copyFrom(BLASData.L2_sTRMV_x_n1);
mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTRMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXS.copyFrom(BLASData.L2_sTRMV_x_n2);
mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_sTRMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_DTRMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
matrixAD.copyFrom(BLASData.L2_dTRMV_A_nn);
vectorXD.copyFrom(BLASData.L2_dTRMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_dTRMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXD.copyFrom(BLASData.L2_dTRMV_x_n1);
mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTRMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXD.copyFrom(BLASData.L2_dTRMV_x_n1);
mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTRMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXD.copyFrom(BLASData.L2_dTRMV_x_n2);
mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_dTRMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_CTRMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copyFrom(BLASData.L2_cTRMV_A_nn);
vectorXC.copyFrom(BLASData.L2_cTRMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_cTRMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXC.copyFrom(BLASData.L2_cTRMV_x_n1);
mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTRMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXC.copyFrom(BLASData.L2_cTRMV_x_n1);
mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTRMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXC.copyFrom(BLASData.L2_cTRMV_x_n2);
mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_cTRMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_ZTRMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copyFrom(BLASData.L2_zTRMV_A_nn);
vectorXZ.copyFrom(BLASData.L2_zTRMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_zTRMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXZ.copyFrom(BLASData.L2_zTRMV_x_n1);
mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTRMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXZ.copyFrom(BLASData.L2_zTRMV_x_n1);
mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTRMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXZ.copyFrom(BLASData.L2_zTRMV_x_n2);
mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_zTRMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
mRS.finish();
checkError();
}
private void xTBMV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
fail("should throw RSRuntimeException for STBMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
fail("should throw RSRuntimeException for DTBMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
fail("should throw RSRuntimeException for CTBMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
fail("should throw RSRuntimeException for ZTBMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xTBMV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int TransA : mTranspose) {
for (int Diag : mDiag) {
for (int K : mK) {
for (int incX : mInc) {
xTBMV_API_test(Uplo, TransA, Diag, K, incX, mMatrix);
}
}
}
}
}
}
public void test_L2_STBMV_API() {
L2_xTBMV_API(mMatrixS);
}
public void test_L2_DTBMV_API() {
L2_xTBMV_API(mMatrixD);
}
public void test_L2_CTBMV_API() {
L2_xTBMV_API(mMatrixC);
}
public void test_L2_ZTBMV_API() {
L2_xTBMV_API(mMatrixZ);
}
public void test_L2_STBMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
matrixAS.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_sTBMV_A_nn);
vectorXS.copyFrom(BLASData.L2_sTBMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.STBMV(uplo, trans, diag, BLASData.KL, matrixAS, vectorXS, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_sTBMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXS.copyFrom(BLASData.L2_sTBMV_x_n1);
mBLAS.STBMV(uplo, trans, diag, BLASData.KL, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTBMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXS.copyFrom(BLASData.L2_sTBMV_x_n1);
mBLAS.STBMV(uplo, trans, diag, BLASData.KL, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTBMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXS.copyFrom(BLASData.L2_sTBMV_x_n2);
mBLAS.STBMV(uplo, trans, diag, BLASData.KL, matrixAS, vectorXS, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_sTBMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_DTBMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
matrixAD.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_dTBMV_A_nn);
vectorXD.copyFrom(BLASData.L2_dTBMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.DTBMV(uplo, trans, diag, BLASData.KL, matrixAD, vectorXD, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_dTBMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXD.copyFrom(BLASData.L2_dTBMV_x_n1);
mBLAS.DTBMV(uplo, trans, diag, BLASData.KL, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTBMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXD.copyFrom(BLASData.L2_dTBMV_x_n1);
mBLAS.DTBMV(uplo, trans, diag, BLASData.KL, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTBMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXD.copyFrom(BLASData.L2_dTBMV_x_n2);
mBLAS.DTBMV(uplo, trans, diag, BLASData.KL, matrixAD, vectorXD, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_dTBMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_CTBMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_cTBMV_A_nn);
vectorXC.copyFrom(BLASData.L2_cTBMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.CTBMV(uplo, trans, diag, BLASData.KL, matrixAC, vectorXC, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_cTBMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXC.copyFrom(BLASData.L2_cTBMV_x_n1);
mBLAS.CTBMV(uplo, trans, diag, BLASData.KL, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTBMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXC.copyFrom(BLASData.L2_cTBMV_x_n1);
mBLAS.CTBMV(uplo, trans, diag, BLASData.KL, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTBMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXC.copyFrom(BLASData.L2_cTBMV_x_n2);
mBLAS.CTBMV(uplo, trans, diag, BLASData.KL, matrixAC, vectorXC, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_cTBMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_ZTBMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_zTBMV_A_nn);
vectorXZ.copyFrom(BLASData.L2_zTBMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.ZTBMV(uplo, trans, diag, BLASData.KL, matrixAZ, vectorXZ, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_zTBMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXZ.copyFrom(BLASData.L2_zTBMV_x_n1);
mBLAS.ZTBMV(uplo, trans, diag, BLASData.KL, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTBMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXZ.copyFrom(BLASData.L2_zTBMV_x_n1);
mBLAS.ZTBMV(uplo, trans, diag, BLASData.KL, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTBMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXZ.copyFrom(BLASData.L2_zTBMV_x_n2);
mBLAS.ZTBMV(uplo, trans, diag, BLASData.KL, matrixAZ, vectorXZ, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_zTBMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
mRS.finish();
checkError();
}
private boolean validateTPMV(Element e, int Uplo, int TransA, int Diag, Allocation Ap, Allocation X, int incX) {
if (!validateUplo(Uplo)) {
return false;
}
if (!validateTranspose(TransA)) {
return false;
}
if (!validateDiag(Diag)) {
return false;
}
if (!Ap.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e)) {
return false;
}
if (X.getType().getY() > 1) {
return false;
}
if (Ap.getType().getY() > 1) {
return false;
}
int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
return false;
}
if (incX <= 0) {
return false;
}
int expectedXDim = 1 + (N - 1) * incX;
if (X.getType().getX() != expectedXDim) {
return false;
}
return true;
}
private void xTPMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for STPMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for DTPMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for CTPMV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for ZTPMV");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xTPMV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int TransA : mTranspose) {
for (int Diag : mDiag) {
for (int incX : mInc) {
xTPMV_API_test(Uplo, TransA, Diag, incX, mMatrix);
}
}
}
}
}
public void test_L2_STPMV_API() {
L2_xTPMV_API(mMatrixS);
}
public void test_L2_DTPMV_API() {
L2_xTPMV_API(mMatrixD);
}
public void test_L2_CTPMV_API() {
L2_xTPMV_API(mMatrixC);
}
public void test_L2_ZTPMV_API() {
L2_xTPMV_API(mMatrixZ);
}
public void test_L2_STPMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
matrixAS.copyFrom(BLASData.L2_sTRMV_A_nn_pu);
vectorXS.copyFrom(BLASData.L2_sTRMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
vectorXRef.copyFrom(BLASData.L2_sTRMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXS.copyFrom(BLASData.L2_sTRMV_x_n1);
mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTRMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXS.copyFrom(BLASData.L2_sTRMV_x_n1);
mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTRMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (N-1) * incX;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXS.copyFrom(BLASData.L2_sTRMV_x_n2);
mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_sTRMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_DTPMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
matrixAD.copyFrom(BLASData.L2_dTRMV_A_nn_pu);
vectorXD.copyFrom(BLASData.L2_dTRMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
vectorXRef.copyFrom(BLASData.L2_dTRMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXD.copyFrom(BLASData.L2_dTRMV_x_n1);
mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTRMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXD.copyFrom(BLASData.L2_dTRMV_x_n1);
mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTRMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (N-1) * incX;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXD.copyFrom(BLASData.L2_dTRMV_x_n2);
mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_dTRMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_CTPMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
matrixAC.copyFrom(BLASData.L2_cTRMV_A_nn_pu);
vectorXC.copyFrom(BLASData.L2_cTRMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
vectorXRef.copyFrom(BLASData.L2_cTRMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXC.copyFrom(BLASData.L2_cTRMV_x_n1);
mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTRMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXC.copyFrom(BLASData.L2_cTRMV_x_n1);
mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTRMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (N-1) * incX;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXC.copyFrom(BLASData.L2_cTRMV_x_n2);
mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_cTRMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_ZTPMV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
matrixAZ.copyFrom(BLASData.L2_zTRMV_A_nn_pu);
vectorXZ.copyFrom(BLASData.L2_zTRMV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
vectorXRef.copyFrom(BLASData.L2_zTRMV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXZ.copyFrom(BLASData.L2_zTRMV_x_n1);
mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTRMV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXZ.copyFrom(BLASData.L2_zTRMV_x_n1);
mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTRMV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (N-1) * incX;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXZ.copyFrom(BLASData.L2_zTRMV_x_n2);
mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_zTRMV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
mRS.finish();
checkError();
}
private void xTRSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for STRSV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for DTRSV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for CTRSV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for ZTRSV");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xTRSV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int TransA : mTranspose) {
for (int Diag : mDiag) {
for (int incX : mInc) {
xTRSV_API_test(Uplo, TransA, Diag, incX, mMatrix);
}
}
}
}
}
public void test_L2_STRSV_API() {
L2_xTRSV_API(mMatrixS);
}
public void test_L2_DTRSV_API() {
L2_xTRSV_API(mMatrixD);
}
public void test_L2_CTRSV_API() {
L2_xTRSV_API(mMatrixC);
}
public void test_L2_ZTRSV_API() {
L2_xTRSV_API(mMatrixZ);
}
public void test_L2_STRSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
matrixAS.copyFrom(BLASData.L2_sTRSV_A_nn);
vectorXS.copyFrom(BLASData.L2_sTRSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_sTRSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXS.copyFrom(BLASData.L2_sTRSV_x_n1);
mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTRSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXS.copyFrom(BLASData.L2_sTRSV_x_n1);
mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTRSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXS.copyFrom(BLASData.L2_sTRSV_x_n2);
mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_sTRSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_DTRSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
matrixAD.copyFrom(BLASData.L2_dTRSV_A_nn);
vectorXD.copyFrom(BLASData.L2_dTRSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_dTRSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXD.copyFrom(BLASData.L2_dTRSV_x_n1);
mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTRSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXD.copyFrom(BLASData.L2_dTRSV_x_n1);
mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTRSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXD.copyFrom(BLASData.L2_dTRSV_x_n2);
mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_dTRSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_CTRSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copyFrom(BLASData.L2_cTRSV_A_nn);
vectorXC.copyFrom(BLASData.L2_cTRSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_cTRSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXC.copyFrom(BLASData.L2_cTRSV_x_n1);
mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTRSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXC.copyFrom(BLASData.L2_cTRSV_x_n1);
mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTRSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXC.copyFrom(BLASData.L2_cTRSV_x_n2);
mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_cTRSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_ZTRSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copyFrom(BLASData.L2_zTRSV_A_nn);
vectorXZ.copyFrom(BLASData.L2_zTRSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_zTRSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXZ.copyFrom(BLASData.L2_zTRSV_x_n1);
mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTRSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXZ.copyFrom(BLASData.L2_zTRSV_x_n1);
mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTRSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXZ.copyFrom(BLASData.L2_zTRSV_x_n2);
mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_zTRSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
mRS.finish();
checkError();
}
private void xTBSV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
fail("should throw RSRuntimeException for STBSV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
fail("should throw RSRuntimeException for DTBSV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
fail("should throw RSRuntimeException for CTBSV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
fail("should throw RSRuntimeException for ZTBSV");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xTBSV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int TransA : mTranspose) {
for (int Diag : mDiag) {
for (int K : mK) {
for (int incX : mInc) {
xTBSV_API_test(Uplo, TransA, Diag, K, incX, mMatrix);
}
}
}
}
}
}
public void test_L2_STBSV_API() {
L2_xTBSV_API(mMatrixS);
}
public void test_L2_DTBSV_API() {
L2_xTBSV_API(mMatrixD);
}
public void test_L2_CTBSV_API() {
L2_xTBSV_API(mMatrixC);
}
public void test_L2_ZTBSV_API() {
L2_xTBSV_API(mMatrixZ);
}
public void test_L2_STBSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
matrixAS.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_sTBSV_A_nn);
vectorXS.copyFrom(BLASData.L2_sTBSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.STBSV(uplo, trans, diag, BLASData.KL, matrixAS, vectorXS, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_sTBSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXS.copyFrom(BLASData.L2_sTBSV_x_n1);
mBLAS.STBSV(uplo, trans, diag, BLASData.KL, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTBSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXS.copyFrom(BLASData.L2_sTBSV_x_n1);
mBLAS.STBSV(uplo, trans, diag, BLASData.KL, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTBSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXS.copyFrom(BLASData.L2_sTBSV_x_n2);
mBLAS.STBSV(uplo, trans, diag, BLASData.KL, matrixAS, vectorXS, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_sTBSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_DTBSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
matrixAD.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_dTBSV_A_nn);
vectorXD.copyFrom(BLASData.L2_dTBSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.DTBSV(uplo, trans, diag, BLASData.KL, matrixAD, vectorXD, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_dTBSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXD.copyFrom(BLASData.L2_dTBSV_x_n1);
mBLAS.DTBSV(uplo, trans, diag, BLASData.KL, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTBSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXD.copyFrom(BLASData.L2_dTBSV_x_n1);
mBLAS.DTBSV(uplo, trans, diag, BLASData.KL, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTBSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXD.copyFrom(BLASData.L2_dTBSV_x_n2);
mBLAS.DTBSV(uplo, trans, diag, BLASData.KL, matrixAD, vectorXD, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_dTBSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_CTBSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_cTBSV_A_nn);
vectorXC.copyFrom(BLASData.L2_cTBSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.CTBSV(uplo, trans, diag, BLASData.KL, matrixAC, vectorXC, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_cTBSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXC.copyFrom(BLASData.L2_cTBSV_x_n1);
mBLAS.CTBSV(uplo, trans, diag, BLASData.KL, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTBSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXC.copyFrom(BLASData.L2_cTBSV_x_n1);
mBLAS.CTBSV(uplo, trans, diag, BLASData.KL, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTBSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXC.copyFrom(BLASData.L2_cTBSV_x_n2);
mBLAS.CTBSV(uplo, trans, diag, BLASData.KL, matrixAC, vectorXC, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_cTBSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_ZTBSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copy2DRangeFrom(0, 0, BLASData.KL + 1, BLASData.dN, BLASData.L2_zTBSV_A_nn);
vectorXZ.copyFrom(BLASData.L2_zTBSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.ZTBSV(uplo, trans, diag, BLASData.KL, matrixAZ, vectorXZ, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
vectorXRef.copyFrom(BLASData.L2_zTBSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXZ.copyFrom(BLASData.L2_zTBSV_x_n1);
mBLAS.ZTBSV(uplo, trans, diag, BLASData.KL, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTBSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXZ.copyFrom(BLASData.L2_zTBSV_x_n1);
mBLAS.ZTBSV(uplo, trans, diag, BLASData.KL, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTBSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXZ.copyFrom(BLASData.L2_zTBSV_x_n2);
mBLAS.ZTBSV(uplo, trans, diag, BLASData.KL, matrixAZ, vectorXZ, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_zTBSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
mRS.finish();
checkError();
}
private void xTPSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for STPSV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for DTPSV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for CTPSV");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX);
fail("should throw RSRuntimeException for ZTPSV");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xTPSV_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int TransA : mTranspose) {
for (int Diag : mDiag) {
for (int incX : mInc) {
xTPSV_API_test(Uplo, TransA, Diag, incX, mMatrix);
}
}
}
}
}
public void test_L2_STPSV_API() {
L2_xTPSV_API(mMatrixS);
}
public void test_L2_DTPSV_API() {
L2_xTPSV_API(mMatrixD);
}
public void test_L2_CTPSV_API() {
L2_xTPSV_API(mMatrixC);
}
public void test_L2_ZTPSV_API() {
L2_xTPSV_API(mMatrixZ);
}
public void test_L2_STPSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
matrixAS.copyFrom(BLASData.L2_sTRSV_A_nn_pu);
vectorXS.copyFrom(BLASData.L2_sTRSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
vectorXRef.copyFrom(BLASData.L2_sTRSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXS.copyFrom(BLASData.L2_sTRSV_x_n1);
mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTRSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXS.copyFrom(BLASData.L2_sTRSV_x_n1);
mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef.copyFrom(BLASData.L2_sTRSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (N-1) * incX;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXS.copyFrom(BLASData.L2_sTRSV_x_n2);
mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_sTRSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXS, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_DTPSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
matrixAD.copyFrom(BLASData.L2_dTRSV_A_nn_pu);
vectorXD.copyFrom(BLASData.L2_dTRSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
vectorXRef.copyFrom(BLASData.L2_dTRSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXD.copyFrom(BLASData.L2_dTRSV_x_n1);
mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTRSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXD.copyFrom(BLASData.L2_dTRSV_x_n1);
mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef.copyFrom(BLASData.L2_dTRSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (N-1) * incX;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXD.copyFrom(BLASData.L2_dTRSV_x_n2);
mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_dTRSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXD, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_CTPSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
matrixAC.copyFrom(BLASData.L2_cTRSV_A_nn_pu);
vectorXC.copyFrom(BLASData.L2_cTRSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
vectorXRef.copyFrom(BLASData.L2_cTRSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXC.copyFrom(BLASData.L2_cTRSV_x_n1);
mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTRSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXC.copyFrom(BLASData.L2_cTRSV_x_n1);
mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef.copyFrom(BLASData.L2_cTRSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (N-1) * incX;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXC.copyFrom(BLASData.L2_cTRSV_x_n2);
mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_cTRSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXC, vectorXRef);
mRS.finish();
checkError();
}
public void test_L2_ZTPSV_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
matrixAZ.copyFrom(BLASData.L2_zTRSV_A_nn_pu);
vectorXZ.copyFrom(BLASData.L2_zTRSV_x_n1);
//Test for the default case: NO_TRANS
mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
vectorXRef.copyFrom(BLASData.L2_zTRSV_o_UN);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
trans = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload vector X, since it was overwritten by BLAS.
vectorXZ.copyFrom(BLASData.L2_zTRSV_x_n1);
mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTRSV_o_UT);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
vectorXZ.copyFrom(BLASData.L2_zTRSV_x_n1);
mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef.copyFrom(BLASData.L2_zTRSV_o_UH);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
//Test for incX = 2 & incY = 2;
trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
incX = 2;
int dimX = 1 + (N-1) * incX;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXZ.copyFrom(BLASData.L2_zTRSV_x_n2);
mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXRef.copyFrom(BLASData.L2_zTRSV_o_UN2);
mVerify.invoke_verify(vectorXRef, vectorXZ, vectorXRef);
mRS.finish();
checkError();
}
private boolean validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
if (!A.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e) ||
!Y.getType().getElement().isCompatible(e) ) {
return false;
}
if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
return false;
}
int M = A.getType().getY();
int N = A.getType().getX();
if (N < 1 || M < 1) {
return false;
}
if (incX <= 0 || incY <= 0) {
return false;
}
int expectedXDim = 1 + (M - 1) * incX;
if (X.getType().getX() != expectedXDim) {
return false;
}
int expectedYDim = 1 + (N - 1) * incY;
if (Y.getType().getX() != expectedYDim) {
return false;
}
return true;
}
private void xGER_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateGER(elemA, vecX, incX, vecY, incY, matA)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for SGER");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for DGER");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
private void L2_xGER_API(ArrayList<Allocation> mMatrix) {
for (int incX : mInc) {
for (int incY : mInc) {
xGERU_API_test(incX, incY, mMatrix);
}
}
}
public void test_L2_SGER_API() {
L2_xGER_API(mMatrixS);
}
public void test_L2_DGER_API() {
L2_xGER_API(mMatrixD);
}
public void test_L2_SGER_Correctness() {
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dM, 1));
Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
matrixAS.copyFrom(BLASData.L2_sGER_A_mn);
vectorXS.copyFrom(BLASData.L2_sGER_x_m1);
vectorYS.copyFrom(BLASData.L2_sGER_y_n1);
//Test for the default case: NO_TRANS
mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
matrixARef.copyFrom(BLASData.L2_sGER_o_N);
mVerify.invoke_verify(matrixARef, matrixAS, matrixARef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dM-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorXS.copyFrom(BLASData.L2_sGER_x_m2);
vectorYS.copyFrom(BLASData.L2_sGER_y_n2);
matrixAS.copyFrom(BLASData.L2_sGER_A_mn);
mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
mVerify.invoke_verify(matrixARef, matrixAS, matrixARef);
mRS.finish();
checkError();
}
public void test_L2_DGER_Correctness() {
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dM, 1));
Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
matrixAD.copyFrom(BLASData.L2_dGER_A_mn);
vectorXD.copyFrom(BLASData.L2_dGER_x_m1);
vectorYD.copyFrom(BLASData.L2_dGER_y_n1);
//Test for the default case: NO_TRANS
mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
matrixARef.copyFrom(BLASData.L2_dGER_o_N);
mVerify.invoke_verify(matrixARef, matrixAD, matrixARef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dM-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorXD.copyFrom(BLASData.L2_dGER_x_m2);
vectorYD.copyFrom(BLASData.L2_dGER_y_n2);
matrixAD.copyFrom(BLASData.L2_dGER_A_mn);
mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
mVerify.invoke_verify(matrixARef, matrixAD, matrixARef);
mRS.finish();
checkError();
}
private boolean validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
if (!A.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e) ||
!Y.getType().getElement().isCompatible(e)) {
return false;
}
if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
return false;
}
int M = A.getType().getY();
int N = A.getType().getX();
if (incX <= 0 || incY <= 0) {
return false;
}
int expectedXDim = 1 + (M - 1) * incX;
if (X.getType().getX() != expectedXDim) {
return false;
}
int expectedYDim = 1 + (N - 1) * incY;
if (Y.getType().getX() != expectedYDim) {
return false;
}
return true;
}
private void xGERU_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for CGERU");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for ZGERU");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
private void L2_xGERU_API(ArrayList<Allocation> mMatrix) {
for (int incX : mInc) {
for (int incY : mInc) {
xGERU_API_test(incX, incY, mMatrix);
}
}
}
public void test_L2_CGERU_API() {
L2_xGERU_API(mMatrixC);
}
public void test_L2_ZGERU_API() {
L2_xGERU_API(mMatrixZ);
}
public void test_L2_CGERU_Correctness() {
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, 1));
Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copyFrom(BLASData.L2_cGERU_A_mn);
vectorXC.copyFrom(BLASData.L2_cGERU_x_m1);
vectorYC.copyFrom(BLASData.L2_cGERU_y_n1);
//Test for the default case: NO_TRANS
mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixARef.copyFrom(BLASData.L2_cGERU_o_N);
mVerify.invoke_verify(matrixARef, matrixAC, matrixARef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dM-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorXC.copyFrom(BLASData.L2_cGERU_x_m2);
vectorYC.copyFrom(BLASData.L2_cGERU_y_n2);
matrixAC.copyFrom(BLASData.L2_cGERU_A_mn);
mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
mVerify.invoke_verify(matrixARef, matrixAC, matrixARef);
mRS.finish();
checkError();
}
public void test_L2_ZGERU_Correctness() {
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, 1));
Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copyFrom(BLASData.L2_zGERU_A_mn);
vectorXZ.copyFrom(BLASData.L2_zGERU_x_m1);
vectorYZ.copyFrom(BLASData.L2_zGERU_y_n1);
//Test for the default case: NO_TRANS
mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixARef.copyFrom(BLASData.L2_zGERU_o_N);
mVerify.invoke_verify(matrixARef, matrixAZ, matrixARef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dM-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorXZ.copyFrom(BLASData.L2_zGERU_x_m2);
vectorYZ.copyFrom(BLASData.L2_zGERU_y_n2);
matrixAZ.copyFrom(BLASData.L2_zGERU_A_mn);
mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
mVerify.invoke_verify(matrixARef, matrixAZ, matrixARef);
mRS.finish();
checkError();
}
private void xGERC_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for CGERC");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for ZGERC");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
private void L2_xGERC_API(ArrayList<Allocation> mMatrix) {
for (int incX : mInc) {
for (int incY : mInc) {
xGERC_API_test(incX, incY, mMatrix);
}
}
}
public void test_L2_CGERC_API() {
L2_xGERC_API(mMatrixC);
}
public void test_L2_ZGERC_API() {
L2_xGERC_API(mMatrixZ);
}
public void test_L2_CGERC_Correctness() {
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, 1));
Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copyFrom(BLASData.L2_cGERC_A_mn);
vectorXC.copyFrom(BLASData.L2_cGERC_x_m1);
vectorYC.copyFrom(BLASData.L2_cGERC_y_n1);
//Test for the default case: NO_TRANS
mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixARef.copyFrom(BLASData.L2_cGERC_o_N);
mVerify.invoke_verify(matrixARef, matrixAC, matrixARef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dM-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorXC.copyFrom(BLASData.L2_cGERC_x_m2);
vectorYC.copyFrom(BLASData.L2_cGERC_y_n2);
matrixAC.copyFrom(BLASData.L2_cGERC_A_mn);
mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
mVerify.invoke_verify(matrixARef, matrixAC, matrixARef);
mRS.finish();
checkError();
}
public void test_L2_ZGERC_Correctness() {
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, 1));
Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copyFrom(BLASData.L2_zGERC_A_mn);
vectorXZ.copyFrom(BLASData.L2_zGERC_x_m1);
vectorYZ.copyFrom(BLASData.L2_zGERC_y_n1);
//Test for the default case: NO_TRANS
mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixARef.copyFrom(BLASData.L2_zGERC_o_N);
mVerify.invoke_verify(matrixARef, matrixAZ, matrixARef);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dM-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorXZ.copyFrom(BLASData.L2_zGERC_x_m2);
vectorYZ.copyFrom(BLASData.L2_zGERC_y_n2);
matrixAZ.copyFrom(BLASData.L2_zGERC_A_mn);
mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
mVerify.invoke_verify(matrixARef, matrixAZ, matrixARef);
mRS.finish();
checkError();
}
private void xHER_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSYR(elemA, Uplo, vecX, incX, matA)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHER(Uplo, alphaS, vecX, incX, matA);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHER(Uplo, alphaS, vecX, incX, matA);
fail("should throw RSRuntimeException for CHER");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA);
fail("should throw RSRuntimeException for ZHER");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xHER_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xHER_API_test(Uplo, incX, mMatrix);
}
}
}
public void test_L2_CHER_API() {
L2_xHER_API(mMatrixC);
}
public void test_L2_ZHER_API() {
L2_xHER_API(mMatrixZ);
}
public void test_L2_CHER_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copyFrom(BLASData.L2_cHER_A_nn);
vectorXC.copyFrom(BLASData.L2_cHER_x_n1);
//Test for the default case: NO_TRANS
mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixARef.copyFrom(BLASData.L2_cHER_o_N);
mVerify.invoke_verifyUpLo(matrixARef, matrixAC, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXC.copyFrom(BLASData.L2_cHER_x_n2);
matrixAC.copyFrom(BLASData.L2_cHER_A_nn);
mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC);
mVerify.invoke_verifyUpLo(matrixARef, matrixAC, matrixARef, true);
mRS.finish();
checkError();
}
public void test_L2_ZHER_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copyFrom(BLASData.L2_zHER_A_nn);
vectorXZ.copyFrom(BLASData.L2_zHER_x_n1);
//Test for the default case: NO_TRANS
mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixARef.copyFrom(BLASData.L2_zHER_o_N);
mVerify.invoke_verifyUpLo(matrixARef, matrixAZ, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXZ.copyFrom(BLASData.L2_zHER_x_n2);
matrixAZ.copyFrom(BLASData.L2_zHER_A_nn);
mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ);
mVerify.invoke_verifyUpLo(matrixARef, matrixAZ, matrixARef, true);
mRS.finish();
checkError();
}
private void xHPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSPR(elemA, Uplo, vecX, incX, matA)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA);
fail("should throw RSRuntimeException for CHPR");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA);
fail("should throw RSRuntimeException for ZHPR");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xHPR_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xHPR_API_test(Uplo, incX, mMatrix);
}
}
}
public void test_L2_CHPR_API() {
L2_xHPR_API(mMatrixC);
}
public void test_L2_ZHPR_API() {
L2_xHPR_API(mMatrixZ);
}
public void test_L2_CHPR_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
matrixAC.copyFrom(BLASData.L2_cHER_A_nn_pu);
vectorXC.copyFrom(BLASData.L2_cHER_x_n1);
//Test for the default case: NO_TRANS
mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
matrixARef.copyFrom(BLASData.L2_cHER_o_N_pu);
mVerify.invoke_verifyUpLo(matrixARef, matrixAC, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorXC.copyFrom(BLASData.L2_cHER_x_n2);
matrixAC.copyFrom(BLASData.L2_cHER_A_nn_pu);
mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC);
mVerify.invoke_verifyUpLo(matrixARef, matrixAC, matrixARef, true);
mRS.finish();
checkError();
}
public void test_L2_ZHPR_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
matrixAZ.copyFrom(BLASData.L2_zHER_A_nn_pu);
vectorXZ.copyFrom(BLASData.L2_zHER_x_n1);
//Test for the default case: NO_TRANS
mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
matrixARef.copyFrom(BLASData.L2_zHER_o_N_pu);
mVerify.invoke_verifyUpLo(matrixARef, matrixAZ, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorXZ.copyFrom(BLASData.L2_zHER_x_n2);
matrixAZ.copyFrom(BLASData.L2_zHER_A_nn_pu);
mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ);
mVerify.invoke_verifyUpLo(matrixARef, matrixAZ, matrixARef, true);
mRS.finish();
checkError();
}
private void xHER2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for CHER2");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for ZHER2");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xHER2_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xHER2_API_test(Uplo, incX, incX, mMatrix);
}
}
}
public void test_L2_CHER2_API() {
L2_xHER2_API(mMatrixC);
}
public void test_L2_ZHER2_API() {
L2_xHER2_API(mMatrixZ);
}
public void test_L2_CHER2_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, 1));
matrixAC.copyFrom(BLASData.L2_cHER2_A_nn);
vectorXC.copyFrom(BLASData.L2_cHER2_x_n1);
vectorYC.copyFrom(BLASData.L2_cHER2_y_n1);
//Test for the default case: NO_TRANS
mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixARef.copyFrom(BLASData.L2_cHER2_o_N);
mVerify.invoke_verifyUpLo(matrixARef, matrixAC, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorXC.copyFrom(BLASData.L2_cHER2_x_n2);
vectorYC.copyFrom(BLASData.L2_cHER2_y_n2);
matrixAC.copyFrom(BLASData.L2_cHER2_A_nn);
mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
mVerify.invoke_verifyUpLo(matrixARef, matrixAC, matrixARef, true);
mRS.finish();
checkError();
}
public void test_L2_ZHER2_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, 1));
matrixAZ.copyFrom(BLASData.L2_zHER2_A_nn);
vectorXZ.copyFrom(BLASData.L2_zHER2_x_n1);
vectorYZ.copyFrom(BLASData.L2_zHER2_y_n1);
//Test for the default case: NO_TRANS
mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixARef.copyFrom(BLASData.L2_zHER2_o_N);
mVerify.invoke_verifyUpLo(matrixARef, matrixAZ, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorXZ.copyFrom(BLASData.L2_zHER2_x_n2);
vectorYZ.copyFrom(BLASData.L2_zHER2_y_n2);
matrixAZ.copyFrom(BLASData.L2_zHER2_A_nn);
mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
mVerify.invoke_verifyUpLo(matrixARef, matrixAZ, matrixARef, true);
mRS.finish();
checkError();
}
private void xHPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for CHPR2");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for ZHPR2");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xHPR2_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xHPR2_API_test(Uplo, incX, incX, mMatrix);
}
}
}
public void test_L2_CHPR2_API() {
L2_xHPR2_API(mMatrixC);
}
public void test_L2_ZHPR2_API() {
L2_xHPR2_API(mMatrixZ);
}
public void test_L2_CHPR2_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
matrixAC.copyFrom(BLASData.L2_cHER2_A_nn_pu);
vectorXC.copyFrom(BLASData.L2_cHER2_x_n1);
vectorYC.copyFrom(BLASData.L2_cHER2_y_n1);
//Test for the default case: NO_TRANS
mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
matrixARef.copyFrom(BLASData.L2_cHER2_o_N_pu);
mVerify.invoke_verifyUpLo(matrixARef, matrixAC, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
vectorXC.copyFrom(BLASData.L2_cHER2_x_n2);
vectorYC.copyFrom(BLASData.L2_cHER2_y_n2);
matrixAC.copyFrom(BLASData.L2_cHER2_A_nn_pu);
mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
mVerify.invoke_verifyUpLo(matrixARef, matrixAC, matrixARef, true);
mRS.finish();
checkError();
}
public void test_L2_ZHPR2_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
matrixAZ.copyFrom(BLASData.L2_zHER2_A_nn_pu);
vectorXZ.copyFrom(BLASData.L2_zHER2_x_n1);
vectorYZ.copyFrom(BLASData.L2_zHER2_y_n1);
//Test for the default case: NO_TRANS
mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
matrixARef.copyFrom(BLASData.L2_zHER2_o_N_pu);
mVerify.invoke_verifyUpLo(matrixARef, matrixAZ, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
vectorXZ.copyFrom(BLASData.L2_zHER2_x_n2);
vectorYZ.copyFrom(BLASData.L2_zHER2_y_n2);
matrixAZ.copyFrom(BLASData.L2_zHER2_A_nn_pu);
mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
mVerify.invoke_verifyUpLo(matrixARef, matrixAZ, matrixARef, true);
mRS.finish();
checkError();
}
private boolean validateSYR(Element e, int Uplo, Allocation X, int incX, Allocation A) {
if (!validateUplo(Uplo)) {
return false;
}
if (!A.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e)) {
return false;
}
int N = A.getType().getX();
if (X.getType().getY() > 1) {
return false;
}
if (N != A.getType().getY()) {
return false;
}
if (incX <= 0) {
return false;
}
int expectedXDim = 1 + (N - 1) * incX;
if (X.getType().getX() != expectedXDim) {
return false;
}
return true;
}
private void xSYR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSYR(elemA, Uplo, vecX, incX, matA)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA);
fail("should throw RSRuntimeException for SSYR");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA);
fail("should throw RSRuntimeException for DSYR");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xSYR_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xSYR_API_test(Uplo, incX, mMatrix);
}
}
}
public void test_L2_SSYR_API() {
L2_xSYR_API(mMatrixS);
}
public void test_L2_DSYR_API() {
L2_xSYR_API(mMatrixD);
}
public void test_L2_SSYR_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
matrixAS.copyFrom(BLASData.L2_sSYR_A_nn);
vectorXS.copyFrom(BLASData.L2_sSYR_x_n1);
//Test for the default case: NO_TRANS
mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
matrixARef.copyFrom(BLASData.L2_sSYR_o_N);
mVerify.invoke_verifyUpLo(matrixARef, matrixAS, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXS.copyFrom(BLASData.L2_sSYR_x_n2);
matrixAS.copyFrom(BLASData.L2_sSYR_A_nn);
mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS);
mVerify.invoke_verifyUpLo(matrixARef, matrixAS, matrixARef, true);
mRS.finish();
checkError();
}
public void test_L2_DSYR_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
matrixAD.copyFrom(BLASData.L2_dSYR_A_nn);
vectorXD.copyFrom(BLASData.L2_dSYR_x_n1);
//Test for the default case: NO_TRANS
mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
matrixARef.copyFrom(BLASData.L2_dSYR_o_N);
mVerify.invoke_verifyUpLo(matrixARef, matrixAD, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXD.copyFrom(BLASData.L2_dSYR_x_n2);
matrixAD.copyFrom(BLASData.L2_dSYR_A_nn);
mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD);
mVerify.invoke_verifyUpLo(matrixARef, matrixAD, matrixARef, true);
mRS.finish();
checkError();
}
private boolean validateSPR(Element e, int Uplo, Allocation X, int incX, Allocation Ap) {
if (!validateUplo(Uplo)) {
return false;
}
if (!Ap.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e)) {
return false;
}
if (X.getType().getY() > 1) {
return false;
}
if (Ap.getType().getY() > 1) {
return false;
}
int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
return false;
}
if (incX <= 0) {
return false;
}
int expectedXDim = 1 + (N - 1) * incX;
if (X.getType().getX() != expectedXDim) {
return false;
}
return true;
}
private void xSPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSPR(elemA, Uplo, vecX, incX, matA)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA);
fail("should throw RSRuntimeException for SSPR");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA);
fail("should throw RSRuntimeException for DSPR");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L2_xSPR_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xSPR_API_test(Uplo, incX, mMatrix);
}
}
}
public void test_L2_SSPR_API() {
L2_xSPR_API(mMatrixS);
}
public void test_L2_DSPR_API() {
L2_xSPR_API(mMatrixD);
}
public void test_L2_SSPR_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
matrixAS.copyFrom(BLASData.L2_sSYR_A_nn_pu);
vectorXS.copyFrom(BLASData.L2_sSYR_x_n1);
//Test for the default case: NO_TRANS
mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
matrixARef.copyFrom(BLASData.L2_sSYR_o_N_pu);
mVerify.invoke_verifyUpLo(matrixARef, matrixAS, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorXS.copyFrom(BLASData.L2_sSYR_x_n2);
matrixAS.copyFrom(BLASData.L2_sSYR_A_nn_pu);
mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS);
mVerify.invoke_verifyUpLo(matrixARef, matrixAS, matrixARef, true);
mRS.finish();
checkError();
}
public void test_L2_DSPR_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
matrixAD.copyFrom(BLASData.L2_dSYR_A_nn_pu);
vectorXD.copyFrom(BLASData.L2_dSYR_x_n1);
//Test for the default case: NO_TRANS
mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
matrixARef.copyFrom(BLASData.L2_dSYR_o_N_pu);
mVerify.invoke_verifyUpLo(matrixARef, matrixAD, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorXD.copyFrom(BLASData.L2_dSYR_x_n2);
matrixAD.copyFrom(BLASData.L2_dSYR_A_nn_pu);
mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD);
mVerify.invoke_verifyUpLo(matrixARef, matrixAD, matrixARef, true);
mRS.finish();
checkError();
}
private boolean validateSYR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
if (!validateUplo(Uplo)) {
return false;
}
if (!A.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e) ||
!Y.getType().getElement().isCompatible(e)) {
return false;
}
if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
return false;
}
int N = A.getType().getX();
if (N != A.getType().getY()) {
return false;
}
if (incX <= 0 || incY <= 0) {
return false;
}
int expectedXDim = 1 + (N - 1) * incX;
int expectedYDim = 1 + (N - 1) * incY;
if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
return false;
}
return true;
}
private void xSYR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for SSYR2");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for DSYR2");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xSYR2_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xSYR2_API_test(Uplo, incX, incX, mMatrix);
}
}
}
public void test_L2_SSYR2_API() {
L2_xSYR2_API(mMatrixS);
}
public void test_L2_DSYR2_API() {
L2_xSYR2_API(mMatrixD);
}
public void test_L2_SSYR2_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, 1));
matrixAS.copyFrom(BLASData.L2_sSYR2_A_nn);
vectorXS.copyFrom(BLASData.L2_sSYR2_x_n1);
vectorYS.copyFrom(BLASData.L2_sSYR2_y_n1);
//Test for the default case: NO_TRANS
mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
matrixARef.copyFrom(BLASData.L2_sSYR2_o_N);
mVerify.invoke_verifyUpLo(matrixARef, matrixAS, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorXS.copyFrom(BLASData.L2_sSYR2_x_n2);
vectorYS.copyFrom(BLASData.L2_sSYR2_y_n2);
matrixAS.copyFrom(BLASData.L2_sSYR2_A_nn);
mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
mVerify.invoke_verifyUpLo(matrixARef, matrixAS, matrixARef, true);
mRS.finish();
checkError();
}
public void test_L2_DSYR2_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, 1));
matrixAD.copyFrom(BLASData.L2_dSYR2_A_nn);
vectorXD.copyFrom(BLASData.L2_dSYR2_x_n1);
vectorYD.copyFrom(BLASData.L2_dSYR2_y_n1);
//Test for the default case: NO_TRANS
mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
matrixARef.copyFrom(BLASData.L2_dSYR2_o_N);
mVerify.invoke_verifyUpLo(matrixARef, matrixAD, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (BLASData.dN-1) * incX;
int dimY = 1 + (BLASData.dN-1) * incY;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorXD.copyFrom(BLASData.L2_dSYR2_x_n2);
vectorYD.copyFrom(BLASData.L2_dSYR2_y_n2);
matrixAD.copyFrom(BLASData.L2_dSYR2_A_nn);
mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
mVerify.invoke_verifyUpLo(matrixARef, matrixAD, matrixARef, true);
mRS.finish();
checkError();
}
private boolean validateSPR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
if (!validateUplo(Uplo)) {
return false;
}
if (!Ap.getType().getElement().isCompatible(e) ||
!X.getType().getElement().isCompatible(e) ||
!Y.getType().getElement().isCompatible(e)) {
return false;
}
if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
return false;
}
if (Ap.getType().getY() > 1) {
return false;
}
int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
return false;
}
if (incX <= 0 || incY <= 0) {
return false;
}
int expectedXDim = 1 + (N - 1) * incX;
int expectedYDim = 1 + (N - 1) * incY;
if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
return false;
}
return true;
}
private void xSPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation vecX : mMatrix) {
if (!validateVecInput(vecX)) {
continue;
}
for (Allocation vecY : mMatrix) {
if (!validateVecInput(vecY)) {
continue;
}
Element elemA = matA.getType().getElement();
if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for SSPR2");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
fail("should throw RSRuntimeException for DSPR2");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L2_xSPR2_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int incX : mInc) {
xSPR2_API_test(Uplo, incX, incX, mMatrix);
}
}
}
public void test_L2_SSPR2_API() {
L2_xSPR2_API(mMatrixS);
}
public void test_L2_DSPR2_API() {
L2_xSPR2_API(mMatrixD);
}
public void test_L2_SSPR2_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
matrixAS.copyFrom(BLASData.L2_sSYR2_A_nn_pu);
vectorXS.copyFrom(BLASData.L2_sSYR2_x_n1);
vectorYS.copyFrom(BLASData.L2_sSYR2_y_n1);
//Test for the default case: NO_TRANS
mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
matrixARef.copyFrom(BLASData.L2_sSYR2_o_N_pu);
mVerify.invoke_verifyUpLo(matrixARef, matrixAS, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
vectorXS.copyFrom(BLASData.L2_sSYR2_x_n2);
vectorYS.copyFrom(BLASData.L2_sSYR2_y_n2);
matrixAS.copyFrom(BLASData.L2_sSYR2_A_nn_pu);
mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
mVerify.invoke_verifyUpLo(matrixARef, matrixAS, matrixARef, true);
mRS.finish();
checkError();
}
public void test_L2_DSPR2_Correctness() {
int uplo = ScriptIntrinsicBLAS.UPPER;
int incX = 1;
int incY = 1;
//populate input allocations
int N = BLASData.dN;
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
matrixAD.copyFrom(BLASData.L2_dSYR2_A_nn_pu);
vectorXD.copyFrom(BLASData.L2_dSYR2_x_n1);
vectorYD.copyFrom(BLASData.L2_dSYR2_y_n1);
//Test for the default case: NO_TRANS
mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
matrixARef.copyFrom(BLASData.L2_dSYR2_o_N_pu);
mVerify.invoke_verifyUpLo(matrixARef, matrixAD, matrixARef, true);
//Test for incX = 2 & incY = 2;
incX = 2;
incY = 2;
int dimX = 1 + (N-1) * incX;
int dimY = 1 + (N-1) * incY;
vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
vectorXD.copyFrom(BLASData.L2_dSYR2_x_n2);
vectorYD.copyFrom(BLASData.L2_dSYR2_y_n2);
matrixAD.copyFrom(BLASData.L2_dSYR2_A_nn_pu);
mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
mVerify.invoke_verifyUpLo(matrixARef, matrixAD, matrixARef, true);
mRS.finish();
checkError();
}
private boolean validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
if ((A != null && !A.getType().getElement().isCompatible(e)) ||
(B != null && !B.getType().getElement().isCompatible(e)) ||
(C != null && !C.getType().getElement().isCompatible(e))) {
return false;
}
if (C == null) {
//since matrix C is used to store the result, it cannot be null.
return false;
}
cM = C.getType().getY();
cN = C.getType().getX();
if (Side == ScriptIntrinsicBLAS.RIGHT) {
if ((A == null && B != null) || (A != null && B == null)) {
return false;
}
if (B != null) {
bM = A.getType().getY();
bN = A.getType().getX();
}
if (A != null) {
aM = B.getType().getY();
aN = B.getType().getX();
}
} else {
if (A != null) {
if (TransA == ScriptIntrinsicBLAS.TRANSPOSE ||
TransA == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) {
aN = A.getType().getY();
aM = A.getType().getX();
} else {
aM = A.getType().getY();
aN = A.getType().getX();
}
}
if (B != null) {
if (TransB == ScriptIntrinsicBLAS.TRANSPOSE ||
TransB == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) {
bN = B.getType().getY();
bM = B.getType().getX();
} else {
bM = B.getType().getY();
bN = B.getType().getX();
}
}
}
if (A != null && B != null && C != null) {
if (aN != bM || aM != cM || bN != cN) {
return false;
}
} else if (A != null && C != null) {
// A and C only, for SYRK
if (cM != cN) {
return false;
}
if (aM != cM) {
return false;
}
} else if (A != null && B != null) {
// A and B only
if (aN != bM) {
return false;
}
}
return true;
}
private boolean validateL3_xGEMM(Element e, int TransA, int TransB, Allocation A, Allocation B, Allocation C) {
boolean result = true;
result &= validateTranspose(TransA);
result &= validateTranspose(TransB);
result &= validateL3(e, TransA, TransB, 0, A, B, C);
return result;
}
private void xGEMM_API_test(int transA, int transB, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation matB : mMatrix) {
for (Allocation matC : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateL3_xGEMM(elemA, transA, transB, matA, matB, matC)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC);
fail("should throw RSRuntimeException for SGEMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC);
fail("should throw RSRuntimeException for DGEMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC);
fail("should throw RSRuntimeException for CGEMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC);
fail("should throw RSRuntimeException for ZGEMM");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
private void L3_xGEMM_API(ArrayList<Allocation> mMatrix) {
for (int transA : mTranspose) {
for (int transB : mTranspose) {
xGEMM_API_test(transA, transB, mMatrix);
}
}
}
public void test_L3_SGEMM_API() {
L3_xGEMM_API(mMatrixS);
}
public void test_L3_DGEMM_API() {
L3_xGEMM_API(mMatrixD);
}
public void test_L3_CGEMM_API() {
L3_xGEMM_API(mMatrixC);
}
public void test_L3_ZGEMM_API() {
L3_xGEMM_API(mMatrixZ);
}
public void test_L3_SGEMM_Correctness() {
int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dK, BLASData.dM));
Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dK));
Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
matrixAS.copyFrom(BLASData.L3_sGEMM_A_mk);
matrixBS.copyFrom(BLASData.L3_sGEMM_B_kn);
matrixCS.copyFrom(BLASData.L3_sGEMM_C_mn);
//Test for the default case: NO_TRANS
mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_sGEMM_o_NN);
mVerify.invoke_verify(matrixCRef, matrixCS, matrixCRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dM, BLASData.dK));
matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dK, BLASData.dN));
matrixAS.copyFrom(BLASData.L3_sGEMM_A_km);
matrixBS.copyFrom(BLASData.L3_sGEMM_B_nk);
transA = ScriptIntrinsicBLAS.TRANSPOSE;
transB = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload matrix C, since it was overwritten by BLAS.
matrixCS.copyFrom(BLASData.L3_sGEMM_C_mn);
mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS);
matrixCRef.copyFrom(BLASData.L3_sGEMM_o_TT);
mVerify.invoke_verify(matrixCRef, matrixCS, matrixCRef);
transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
matrixCS.copyFrom(BLASData.L3_sGEMM_C_mn);
mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS);
matrixCRef.copyFrom(BLASData.L3_sGEMM_o_HH);
mVerify.invoke_verify(matrixCRef, matrixCS, matrixCRef);
mRS.finish();
checkError();
}
public void test_L3_DGEMM_Correctness() {
int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dK, BLASData.dM));
Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dK));
Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
matrixAD.copyFrom(BLASData.L3_dGEMM_A_mk);
matrixBD.copyFrom(BLASData.L3_dGEMM_B_kn);
matrixCD.copyFrom(BLASData.L3_dGEMM_C_mn);
//Test for the default case: NO_TRANS
mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_dGEMM_o_NN);
mVerify.invoke_verify(matrixCRef, matrixCD, matrixCRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dM, BLASData.dK));
matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dK, BLASData.dN));
matrixAD.copyFrom(BLASData.L3_dGEMM_A_km);
matrixBD.copyFrom(BLASData.L3_dGEMM_B_nk);
transA = ScriptIntrinsicBLAS.TRANSPOSE;
transB = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload matrix C, since it was overwritten by BLAS.
matrixCD.copyFrom(BLASData.L3_dGEMM_C_mn);
mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD);
matrixCRef.copyFrom(BLASData.L3_dGEMM_o_TT);
mVerify.invoke_verify(matrixCRef, matrixCD, matrixCRef);
transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
matrixCD.copyFrom(BLASData.L3_dGEMM_C_mn);
mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD);
matrixCRef.copyFrom(BLASData.L3_dGEMM_o_HH);
mVerify.invoke_verify(matrixCRef, matrixCD, matrixCRef);
mRS.finish();
checkError();
}
public void test_L3_CGEMM_Correctness() {
int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dK, BLASData.dM));
Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dK));
Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixAC.copyFrom(BLASData.L3_cGEMM_A_mk);
matrixBC.copyFrom(BLASData.L3_cGEMM_B_kn);
matrixCC.copyFrom(BLASData.L3_cGEMM_C_mn);
//Test for the default case: NO_TRANS
mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_cGEMM_o_NN);
mVerify.invoke_verify(matrixCRef, matrixCC, matrixCRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, BLASData.dK));
matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dK, BLASData.dN));
matrixAC.copyFrom(BLASData.L3_cGEMM_A_km);
matrixBC.copyFrom(BLASData.L3_cGEMM_B_nk);
transA = ScriptIntrinsicBLAS.TRANSPOSE;
transB = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload matrix C, since it was overwritten by BLAS.
matrixCC.copyFrom(BLASData.L3_cGEMM_C_mn);
mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC);
matrixCRef.copyFrom(BLASData.L3_cGEMM_o_TT);
mVerify.invoke_verify(matrixCRef, matrixCC, matrixCRef);
transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
matrixCC.copyFrom(BLASData.L3_cGEMM_C_mn);
mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC);
matrixCRef.copyFrom(BLASData.L3_cGEMM_o_HH);
mVerify.invoke_verify(matrixCRef, matrixCC, matrixCRef);
mRS.finish();
checkError();
}
public void test_L3_ZGEMM_Correctness() {
int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dK, BLASData.dM));
Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dK));
Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixAZ.copyFrom(BLASData.L3_zGEMM_A_mk);
matrixBZ.copyFrom(BLASData.L3_zGEMM_B_kn);
matrixCZ.copyFrom(BLASData.L3_zGEMM_C_mn);
//Test for the default case: NO_TRANS
mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_zGEMM_o_NN);
mVerify.invoke_verify(matrixCRef, matrixCZ, matrixCRef);
//test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, BLASData.dK));
matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dK, BLASData.dN));
matrixAZ.copyFrom(BLASData.L3_zGEMM_A_km);
matrixBZ.copyFrom(BLASData.L3_zGEMM_B_nk);
transA = ScriptIntrinsicBLAS.TRANSPOSE;
transB = ScriptIntrinsicBLAS.TRANSPOSE;
//Reload matrix C, since it was overwritten by BLAS.
matrixCZ.copyFrom(BLASData.L3_zGEMM_C_mn);
mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
matrixCRef.copyFrom(BLASData.L3_zGEMM_o_TT);
mVerify.invoke_verify(matrixCRef, matrixCZ, matrixCRef);
transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
matrixCZ.copyFrom(BLASData.L3_zGEMM_C_mn);
mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
matrixCRef.copyFrom(BLASData.L3_zGEMM_o_HH);
mVerify.invoke_verify(matrixCRef, matrixCZ, matrixCRef);
mRS.finish();
checkError();
}
private boolean validateL3_xSYMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) {
boolean result = true;
result &= validateSide(Side);
result &= validateUplo(Uplo);
result &= validateL3(e, 0, 0, Side, A, B, C);
result &= (A.getType().getX() == A.getType().getY());
return result;
}
private void xSYMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation matB : mMatrix) {
for (Allocation matC : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateL3_xSYMM(elemA, Side, Uplo, matA, matB, matC)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC);
fail("should throw RSRuntimeException for SSYMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC);
fail("should throw RSRuntimeException for DSYMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
fail("should throw RSRuntimeException for CSYMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
fail("should throw RSRuntimeException for ZSYMM");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
private void L3_xSYMM_API(ArrayList<Allocation> mMatrix) {
for (int Side : mSide) {
for (int Uplo : mUplo) {
xSYMM_API_test(Side, Uplo, mMatrix);
}
}
}
public void test_L3_SSYMM_API() {
L3_xSYMM_API(mMatrixS);
}
public void test_L3_DSYMM_API() {
L3_xSYMM_API(mMatrixD);
}
public void test_L3_CSYMM_API() {
L3_xSYMM_API(mMatrixC);
}
public void test_L3_ZSYMM_API() {
L3_xSYMM_API(mMatrixZ);
}
public void test_L3_SSYMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
matrixAS.copyFrom(BLASData.L3_sSYMM_A_mm);
matrixBS.copyFrom(BLASData.L3_sSYMM_B_mn);
matrixCS.copyFrom(BLASData.L3_sSYMM_C_mn);
//default case: SIDE = LEFT
mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_sSYMM_o_L);
mVerify.invoke_verify(matrixCRef, matrixCS, matrixCRef);
//SIDE = RIGHT
side = ScriptIntrinsicBLAS.RIGHT;
matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
matrixAS.copyFrom(BLASData.L3_sSYMM_A_nn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCS.copyFrom(BLASData.L3_sSYMM_C_mn);
mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS);
matrixCRef.copyFrom(BLASData.L3_sSYMM_o_R);
mVerify.invoke_verify(matrixCRef, matrixCS, matrixCRef);
mRS.finish();
checkError();
}
public void test_L3_DSYMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
matrixAD.copyFrom(BLASData.L3_dSYMM_A_mm);
matrixBD.copyFrom(BLASData.L3_dSYMM_B_mn);
matrixCD.copyFrom(BLASData.L3_dSYMM_C_mn);
//default case: SIDE = LEFT
mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_dSYMM_o_L);
mVerify.invoke_verify(matrixCRef, matrixCD, matrixCRef);
//SIDE = RIGHT
side = ScriptIntrinsicBLAS.RIGHT;
matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
matrixAD.copyFrom(BLASData.L3_dSYMM_A_nn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCD.copyFrom(BLASData.L3_dSYMM_C_mn);
mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD);
matrixCRef.copyFrom(BLASData.L3_dSYMM_o_R);
mVerify.invoke_verify(matrixCRef, matrixCD, matrixCRef);
mRS.finish();
checkError();
}
public void test_L3_CSYMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixAC.copyFrom(BLASData.L3_cSYMM_A_mm);
matrixBC.copyFrom(BLASData.L3_cSYMM_B_mn);
matrixCC.copyFrom(BLASData.L3_cSYMM_C_mn);
//default case: SIDE = LEFT
mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_cSYMM_o_L);
mVerify.invoke_verify(matrixCRef, matrixCC, matrixCRef);
//SIDE = RIGHT
side = ScriptIntrinsicBLAS.RIGHT;
matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixAC.copyFrom(BLASData.L3_cSYMM_A_nn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCC.copyFrom(BLASData.L3_cSYMM_C_mn);
mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
matrixCRef.copyFrom(BLASData.L3_cSYMM_o_R);
mVerify.invoke_verify(matrixCRef, matrixCC, matrixCRef);
mRS.finish();
checkError();
}
public void test_L3_ZSYMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixAZ.copyFrom(BLASData.L3_zSYMM_A_mm);
matrixBZ.copyFrom(BLASData.L3_zSYMM_B_mn);
matrixCZ.copyFrom(BLASData.L3_zSYMM_C_mn);
//default case: SIDE = LEFT
mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_zSYMM_o_L);
mVerify.invoke_verify(matrixCRef, matrixCZ, matrixCRef);
//SIDE = RIGHT
side = ScriptIntrinsicBLAS.RIGHT;
matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixAZ.copyFrom(BLASData.L3_zSYMM_A_nn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCZ.copyFrom(BLASData.L3_zSYMM_C_mn);
mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
matrixCRef.copyFrom(BLASData.L3_zSYMM_o_R);
mVerify.invoke_verify(matrixCRef, matrixCZ, matrixCRef);
mRS.finish();
checkError();
}
private boolean validateHEMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) {
if (!validateSide(Side)) {
return false;
}
if (!validateUplo(Uplo)) {
return false;
}
if (!A.getType().getElement().isCompatible(e) ||
!B.getType().getElement().isCompatible(e) ||
!C.getType().getElement().isCompatible(e)) {
return false;
}
// A must be square; can potentially be relaxed similar to TRSM
int adim = A.getType().getX();
if (adim != A.getType().getY()) {
return false;
}
if ((Side == ScriptIntrinsicBLAS.LEFT && adim != B.getType().getY()) ||
(Side == ScriptIntrinsicBLAS.RIGHT && adim != B.getType().getX())) {
return false;
}
if (B.getType().getX() != C.getType().getX() ||
B.getType().getY() != C.getType().getY()) {
return false;
}
return true;
}
private void xHEMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation matB : mMatrix) {
for (Allocation matC : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateHEMM(elemA, Side, Uplo, matA, matB, matC)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
fail("should throw RSRuntimeException for CHEMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
fail("should throw RSRuntimeException for ZHEMM");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L3_xHEMM_API(ArrayList<Allocation> mMatrix) {
for (int Side : mSide) {
for (int Uplo : mUplo) {
xHEMM_API_test(Side, Uplo, mMatrix);
}
}
}
public void test_L3_CHEMM_API() {
L3_xHEMM_API(mMatrixC);
}
public void test_L3_ZHEMM_API() {
L3_xHEMM_API(mMatrixZ);
}
public void test_L3_CHEMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixAC.copyFrom(BLASData.L3_cHEMM_A_mm);
matrixBC.copyFrom(BLASData.L3_cHEMM_B_mn);
matrixCC.copyFrom(BLASData.L3_cHEMM_C_mn);
//default case: SIDE = LEFT
mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_cHEMM_o_L);
mVerify.invoke_verify(matrixCRef, matrixCC, matrixCRef);
//SIDE = RIGHT
side = ScriptIntrinsicBLAS.RIGHT;
matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixAC.copyFrom(BLASData.L3_cHEMM_A_nn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCC.copyFrom(BLASData.L3_cHEMM_C_mn);
mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
matrixCRef.copyFrom(BLASData.L3_cHEMM_o_R);
mVerify.invoke_verify(matrixCRef, matrixCC, matrixCRef);
mRS.finish();
checkError();
}
public void test_L3_ZHEMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixAZ.copyFrom(BLASData.L3_zHEMM_A_mm);
matrixBZ.copyFrom(BLASData.L3_zHEMM_B_mn);
matrixCZ.copyFrom(BLASData.L3_zHEMM_C_mn);
//default case: SIDE = LEFT
mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixCRef.copyFrom(BLASData.L3_zHEMM_o_L);
mVerify.invoke_verify(matrixCRef, matrixCZ, matrixCRef);
//SIDE = RIGHT
side = ScriptIntrinsicBLAS.RIGHT;
matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixAZ.copyFrom(BLASData.L3_zHEMM_A_nn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCZ.copyFrom(BLASData.L3_zHEMM_C_mn);
mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
matrixCRef.copyFrom(BLASData.L3_zHEMM_o_R);
mVerify.invoke_verify(matrixCRef, matrixCZ, matrixCRef);
mRS.finish();
checkError();
}
private boolean validateL3_xSYRK(Element e, int Uplo, int Trans, Allocation A, Allocation C) {
boolean result = true;
result &= validateTranspose(Trans);
result &= validateUplo(Uplo);
result &= validateL3(e, Trans, 0, 0, A, null, C);
return result;
}
private void xSYRK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation matC : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateL3_xSYRK(elemA, Uplo, Trans, matA, matC)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC);
fail("should throw RSRuntimeException for SSYRK");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC);
fail("should throw RSRuntimeException for DSYRK");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC);
fail("should throw RSRuntimeException for CSYRK");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC);
fail("should throw RSRuntimeException for ZSYRK");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L3_xSYRK_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int Trans : mTranspose) {
xSYRK_API_test(Uplo, Trans, mMatrix);
}
}
}
public void test_L3_SSYRK_API() {
L3_xSYRK_API(mMatrixS);
}
public void test_L3_DSYRK_API() {
L3_xSYRK_API(mMatrixD);
}
public void test_L3_CSYRK_API() {
L3_xSYRK_API(mMatrixC);
}
public void test_L3_ZSYRK_API() {
L3_xSYRK_API(mMatrixZ);
}
public void test_L3_SSYRK_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
matrixAS.copyFrom(BLASData.L3_sSYRK_A_nk);
matrixCS.copyFrom(BLASData.L3_sSYRK_C_nn);
//default case: NO_TRANSPOSE
mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_sSYRK_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCS, matrixCRef, true);
//case: TRANSPOSE
matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dK));
matrixAS.copyFrom(BLASData.L3_sSYRK_A_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCS.copyFrom(BLASData.L3_sSYRK_C_nn);
trans = ScriptIntrinsicBLAS.TRANSPOSE;
mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS);
matrixCRef.copyFrom(BLASData.L3_sSYRK_o_T);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCS, matrixCRef, true);
mRS.finish();
checkError();
}
public void test_L3_DSYRK_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
matrixAD.copyFrom(BLASData.L3_dSYRK_A_nk);
matrixCD.copyFrom(BLASData.L3_dSYRK_C_nn);
//default case: NO_TRANSPOSE
mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_dSYRK_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCD, matrixCRef, true);
//case: TRANSPOSE
matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dK));
matrixAD.copyFrom(BLASData.L3_dSYRK_A_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCD.copyFrom(BLASData.L3_dSYRK_C_nn);
trans = ScriptIntrinsicBLAS.TRANSPOSE;
mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD);
matrixCRef.copyFrom(BLASData.L3_dSYRK_o_T);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCD, matrixCRef, true);
mRS.finish();
checkError();
}
public void test_L3_CSYRK_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixAC.copyFrom(BLASData.L3_cSYRK_A_nk);
matrixCC.copyFrom(BLASData.L3_cSYRK_C_nn);
//default case: NO_TRANSPOSE
mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_cSYRK_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCC, matrixCRef, true);
//case: TRANSPOSE
matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dK));
matrixAC.copyFrom(BLASData.L3_cSYRK_A_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCC.copyFrom(BLASData.L3_cSYRK_C_nn);
trans = ScriptIntrinsicBLAS.TRANSPOSE;
mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC);
matrixCRef.copyFrom(BLASData.L3_cSYRK_o_T);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCC, matrixCRef, true);
mRS.finish();
checkError();
}
public void test_L3_ZSYRK_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixAZ.copyFrom(BLASData.L3_zSYRK_A_nk);
matrixCZ.copyFrom(BLASData.L3_zSYRK_C_nn);
//default case: NO_TRANSPOSE
mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_zSYRK_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCZ, matrixCRef, true);
//case: TRANSPOSE
matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dK));
matrixAZ.copyFrom(BLASData.L3_zSYRK_A_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCZ.copyFrom(BLASData.L3_zSYRK_C_nn);
trans = ScriptIntrinsicBLAS.TRANSPOSE;
mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ);
matrixCRef.copyFrom(BLASData.L3_zSYRK_o_T);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCZ, matrixCRef, true);
mRS.finish();
checkError();
}
private boolean validateHERK(Element e, int Uplo, int Trans, Allocation A, Allocation C) {
if (!validateUplo(Uplo)) {
return false;
}
if (!A.getType().getElement().isCompatible(e) ||
!C.getType().getElement().isCompatible(e)) {
return false;
}
if (!validateConjTranspose(Trans)) {
return false;
}
int cdim = C.getType().getX();
if (cdim != C.getType().getY()) {
return false;
}
if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) {
if (cdim != A.getType().getY()) {
return false;
}
} else {
if (cdim != A.getType().getX()) {
return false;
}
}
return true;
}
private void xHERK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation matC : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateHERK(elemA, Uplo, Trans, matA, matC)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC);
fail("should throw RSRuntimeException for CHERK");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC);
fail("should throw RSRuntimeException for ZHERK");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L3_xHERK_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int Trans : mTranspose) {
xHERK_API_test(Uplo, Trans, mMatrix);
}
}
}
public void test_L3_CHERK_API() {
L3_xHERK_API(mMatrixC);
}
public void test_L3_ZHERK_API() {
L3_xHERK_API(mMatrixZ);
}
public void test_L3_CHERK_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixAC.copyFrom(BLASData.L3_cHERK_A_nk);
matrixCC.copyFrom(BLASData.L3_cHERK_C_nn);
//default case: NO_TRANSPOSE
mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_cHERK_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCC, matrixCRef, true);
//case: TRANSPOSE
matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dK));
matrixAC.copyFrom(BLASData.L3_cHERK_A_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCC.copyFrom(BLASData.L3_cHERK_C_nn);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC);
matrixCRef.copyFrom(BLASData.L3_cHERK_o_H);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCC, matrixCRef, true);
mRS.finish();
checkError();
}
public void test_L3_ZHERK_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixAZ.copyFrom(BLASData.L3_zHERK_A_nk);
matrixCZ.copyFrom(BLASData.L3_zHERK_C_nn);
//default case: NO_TRANSPOSE
mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_zHERK_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCZ, matrixCRef, true);
//case: TRANSPOSE
matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dK));
matrixAZ.copyFrom(BLASData.L3_zHERK_A_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCZ.copyFrom(BLASData.L3_zHERK_C_nn);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ);
matrixCRef.copyFrom(BLASData.L3_zHERK_o_H);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCZ, matrixCRef, true);
mRS.finish();
checkError();
}
private boolean validateSYR2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) {
if (!validateTranspose(Trans)) {
return false;
}
if (!validateUplo(Uplo)) {
return false;
}
if (!A.getType().getElement().isCompatible(e) ||
!B.getType().getElement().isCompatible(e) ||
!C.getType().getElement().isCompatible(e)) {
return false;
}
int Cdim = -1;
// A is n x k if no transpose, k x n if transpose
// C is n x n
if (Trans == ScriptIntrinsicBLAS.TRANSPOSE) {
// check columns versus C
Cdim = A.getType().getX();
} else {
// check rows versus C
Cdim = A.getType().getY();
}
if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
return false;
}
// A dims == B dims
if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
return false;
}
return true;
}
private void xSYR2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation matB : mMatrix) {
for (Allocation matC : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateSYR2K(elemA, Uplo, Trans, matA, matB, matC)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC);
fail("should throw RSRuntimeException for SSYR2K");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC);
fail("should throw RSRuntimeException for DSYR2K");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC);
fail("should throw RSRuntimeException for CSYR2K");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC);
fail("should throw RSRuntimeException for ZSYR2K");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L3_xSYR2K_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int Trans : mTranspose) {
xSYR2K_API_test(Uplo, Trans, mMatrix);
}
}
}
public void test_L3_SSYR2K_API() {
L3_xSYR2K_API(mMatrixS);
}
public void test_L3_DSYR2K_API() {
L3_xSYR2K_API(mMatrixD);
}
public void test_L3_CSYR2K_API() {
L3_xSYR2K_API(mMatrixC);
}
public void test_L3_ZSYR2K_API() {
L3_xSYR2K_API(mMatrixZ);
}
public void test_L3_SSYR2K_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dK, BLASData.dN));
Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
matrixAS.copyFrom(BLASData.L3_sSYR2K_A_nk);
matrixBS.copyFrom(BLASData.L3_sSYR2K_B_nk);
matrixCS.copyFrom(BLASData.L3_sSYR2K_C_nn);
//default case: NO_TRANSPOSE
mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_sSYR2K_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCS, matrixCRef, true);
//case: TRANSPOSE
matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dK));
matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dK));
matrixAS.copyFrom(BLASData.L3_sSYR2K_A_kn);
matrixBS.copyFrom(BLASData.L3_sSYR2K_B_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCS.copyFrom(BLASData.L3_sSYR2K_C_nn);
trans = ScriptIntrinsicBLAS.TRANSPOSE;
mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS);
matrixCRef.copyFrom(BLASData.L3_sSYR2K_o_T);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCS, matrixCRef, true);
mRS.finish();
checkError();
}
public void test_L3_DSYR2K_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dK, BLASData.dN));
Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
matrixAD.copyFrom(BLASData.L3_dSYR2K_A_nk);
matrixBD.copyFrom(BLASData.L3_dSYR2K_B_nk);
matrixCD.copyFrom(BLASData.L3_dSYR2K_C_nn);
//default case: NO_TRANSPOSE
mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_dSYR2K_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCD, matrixCRef, true);
//case: TRANSPOSE
matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dK));
matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dK));
matrixAD.copyFrom(BLASData.L3_dSYR2K_A_kn);
matrixBD.copyFrom(BLASData.L3_dSYR2K_B_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCD.copyFrom(BLASData.L3_dSYR2K_C_nn);
trans = ScriptIntrinsicBLAS.TRANSPOSE;
mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD);
matrixCRef.copyFrom(BLASData.L3_dSYR2K_o_T);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCD, matrixCRef, true);
mRS.finish();
checkError();
}
public void test_L3_CSYR2K_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixAC.copyFrom(BLASData.L3_cSYR2K_A_nk);
matrixBC.copyFrom(BLASData.L3_cSYR2K_B_nk);
matrixCC.copyFrom(BLASData.L3_cSYR2K_C_nn);
//default case: NO_TRANSPOSE
mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_cSYR2K_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCC, matrixCRef, true);
//case: TRANSPOSE
matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dK));
matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dK));
matrixAC.copyFrom(BLASData.L3_cSYR2K_A_kn);
matrixBC.copyFrom(BLASData.L3_cSYR2K_B_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCC.copyFrom(BLASData.L3_cSYR2K_C_nn);
trans = ScriptIntrinsicBLAS.TRANSPOSE;
mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC);
matrixCRef.copyFrom(BLASData.L3_cSYR2K_o_T);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCC, matrixCRef, true);
mRS.finish();
checkError();
}
public void test_L3_ZSYR2K_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixAZ.copyFrom(BLASData.L3_zSYR2K_A_nk);
matrixBZ.copyFrom(BLASData.L3_zSYR2K_B_nk);
matrixCZ.copyFrom(BLASData.L3_zSYR2K_C_nn);
//default case: NO_TRANSPOSE
mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_zSYR2K_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCZ, matrixCRef, true);
//case: TRANSPOSE
matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dK));
matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dK));
matrixAZ.copyFrom(BLASData.L3_zSYR2K_A_kn);
matrixBZ.copyFrom(BLASData.L3_zSYR2K_B_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCZ.copyFrom(BLASData.L3_zSYR2K_C_nn);
trans = ScriptIntrinsicBLAS.TRANSPOSE;
mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
matrixCRef.copyFrom(BLASData.L3_zSYR2K_o_T);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCZ, matrixCRef, true);
mRS.finish();
checkError();
}
private boolean validateHER2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) {
if (!validateUplo(Uplo)) {
return false;
}
if (!A.getType().getElement().isCompatible(e) ||
!B.getType().getElement().isCompatible(e) ||
!C.getType().getElement().isCompatible(e)) {
return false;
}
if (!validateConjTranspose(Trans)) {
return false;
}
int cdim = C.getType().getX();
if (cdim != C.getType().getY()) {
return false;
}
if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) {
if (A.getType().getY() != cdim) {
return false;
}
} else {
if (A.getType().getX() != cdim) {
return false;
}
}
if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
return false;
}
return true;
}
private void xHER2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation matB : mMatrix) {
for (Allocation matC : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateHER2K(elemA, Uplo, Trans, matA, matB, matC)) {
try {
if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC);
fail("should throw RSRuntimeException for CHER2K");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC);
fail("should throw RSRuntimeException for ZHER2K");
} catch (RSRuntimeException e) {
}
}
}
}
}
}
public void L3_xHER2K_API(ArrayList<Allocation> mMatrix) {
for (int Uplo : mUplo) {
for (int Trans : mTranspose) {
xHER2K_API_test(Uplo, Trans, mMatrix);
}
}
}
public void test_L3_CHER2K_API() {
L3_xHER2K_API(mMatrixC);
}
public void test_L3_ZHER2K_API() {
L3_xHER2K_API(mMatrixZ);
}
public void test_L3_CHER2K_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixAC.copyFrom(BLASData.L3_cHER2K_A_nk);
matrixBC.copyFrom(BLASData.L3_cHER2K_B_nk);
matrixCC.copyFrom(BLASData.L3_cHER2K_C_nn);
//default case: NO_TRANSPOSE
mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_cHER2K_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCC, matrixCRef, true);
//case: TRANSPOSE
matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dK));
matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dK));
matrixAC.copyFrom(BLASData.L3_cHER2K_A_kn);
matrixBC.copyFrom(BLASData.L3_cHER2K_B_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCC.copyFrom(BLASData.L3_cHER2K_C_nn);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC);
matrixCRef.copyFrom(BLASData.L3_cHER2K_o_H);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCC, matrixCRef, true);
mRS.finish();
checkError();
}
public void test_L3_ZHER2K_Correctness() {
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dK, BLASData.dN));
Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixAZ.copyFrom(BLASData.L3_zHER2K_A_nk);
matrixBZ.copyFrom(BLASData.L3_zHER2K_B_nk);
matrixCZ.copyFrom(BLASData.L3_zHER2K_C_nn);
//default case: NO_TRANSPOSE
mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ);
Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixCRef.copyFrom(BLASData.L3_zHER2K_o_N);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCZ, matrixCRef, true);
//case: TRANSPOSE
matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dK));
matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dK));
matrixAZ.copyFrom(BLASData.L3_zHER2K_A_kn);
matrixBZ.copyFrom(BLASData.L3_zHER2K_B_kn);
//Reload matrix C, since it was overwritten by BLAS.
matrixCZ.copyFrom(BLASData.L3_zHER2K_C_nn);
trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ);
matrixCRef.copyFrom(BLASData.L3_zHER2K_o_H);
mVerify.invoke_verifyUpLo(matrixCRef, matrixCZ, matrixCRef, true);
mRS.finish();
checkError();
}
private boolean validateTRMM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) {
if (!validateSide(Side)) {
return false;
}
if (!validateUplo(Uplo)) {
return false;
}
if (!validateTranspose(TransA)) {
return false;
}
if (!validateDiag(Diag)) {
return false;
}
int aM = -1, aN = -1, bM = -1, bN = -1;
if (!A.getType().getElement().isCompatible(e) ||
!B.getType().getElement().isCompatible(e)) {
return false;
}
aM = A.getType().getY();
aN = A.getType().getX();
if (aM != aN) {
return false;
}
bM = B.getType().getY();
bN = B.getType().getX();
if (Side == ScriptIntrinsicBLAS.LEFT) {
if (aN != bM) {
return false;
}
} else {
if (bN != aM) {
return false;
}
}
return true;
}
private void xTRMM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation matB : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateTRMM(elemA, Side, Uplo, TransA, Diag, matA, matB)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
fail("should throw RSRuntimeException for STRMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
fail("should throw RSRuntimeException for DTRMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
fail("should throw RSRuntimeException for CTRMM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
fail("should throw RSRuntimeException for ZTRMM");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L3_xTRMM_API(ArrayList<Allocation> mMatrix) {
for (int Side : mSide) {
for (int Uplo : mUplo) {
for (int TransA : mTranspose) {
for (int Diag : mDiag) {
xTRMM_API_test(Side, Uplo, TransA, Diag, mMatrix);
}
}
}
}
}
public void test_L3_STRMM_API() {
L3_xTRMM_API(mMatrixS);
}
public void test_L3_DTRMM_API() {
L3_xTRMM_API(mMatrixD);
}
public void test_L3_CTRMM_API() {
L3_xTRMM_API(mMatrixC);
}
public void test_L3_ZTRMM_API() {
L3_xTRMM_API(mMatrixZ);
}
public void test_L3_STRMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
matrixAS.copyFrom(BLASData.L3_sTRMM_A_mm);
matrixBS.copyFrom(BLASData.L3_sTRMM_B_mn);
//default case: LEFT, UPPER, NO_TRANSPOSE
mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
matrixBRef.copyFrom(BLASData.L3_sTRMM_o_LUN);
mVerify.invoke_verify(matrixBRef, matrixBS, matrixBRef);
//case: RIGHT, LOWER, TRANSPOSE
matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
matrixAS.copyFrom(BLASData.L3_sTRMM_A_nn);
//Reload matrix B, since it was overwritten by BLAS.
matrixBS.copyFrom(BLASData.L3_sTRMM_B_mn);
side = ScriptIntrinsicBLAS.RIGHT;
trans = ScriptIntrinsicBLAS.TRANSPOSE;
uplo = ScriptIntrinsicBLAS.LOWER;
mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
matrixBRef.copyFrom(BLASData.L3_sTRMM_o_RLT);
mVerify.invoke_verify(matrixBRef, matrixBS, matrixBRef);
mRS.finish();
checkError();
}
public void test_L3_DTRMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
matrixAD.copyFrom(BLASData.L3_dTRMM_A_mm);
matrixBD.copyFrom(BLASData.L3_dTRMM_B_mn);
//default case: LEFT, UPPER, NO_TRANSPOSE
mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
matrixBRef.copyFrom(BLASData.L3_dTRMM_o_LUN);
mVerify.invoke_verify(matrixBRef, matrixBD, matrixBRef);
//case: RIGHT, LOWER, TRANSPOSE
matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
matrixAD.copyFrom(BLASData.L3_dTRMM_A_nn);
//Reload matrix B, since it was overwritten by BLAS.
matrixBD.copyFrom(BLASData.L3_dTRMM_B_mn);
side = ScriptIntrinsicBLAS.RIGHT;
trans = ScriptIntrinsicBLAS.TRANSPOSE;
uplo = ScriptIntrinsicBLAS.LOWER;
mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
matrixBRef.copyFrom(BLASData.L3_dTRMM_o_RLT);
mVerify.invoke_verify(matrixBRef, matrixBD, matrixBRef);
mRS.finish();
checkError();
}
public void test_L3_CTRMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixAC.copyFrom(BLASData.L3_cTRMM_A_mm);
matrixBC.copyFrom(BLASData.L3_cTRMM_B_mn);
//default case: LEFT, UPPER, NO_TRANSPOSE
mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixBRef.copyFrom(BLASData.L3_cTRMM_o_LUN);
mVerify.invoke_verify(matrixBRef, matrixBC, matrixBRef);
//case: RIGHT, LOWER, TRANSPOSE
matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixAC.copyFrom(BLASData.L3_cTRMM_A_nn);
//Reload matrix B, since it was overwritten by BLAS.
matrixBC.copyFrom(BLASData.L3_cTRMM_B_mn);
side = ScriptIntrinsicBLAS.RIGHT;
trans = ScriptIntrinsicBLAS.TRANSPOSE;
uplo = ScriptIntrinsicBLAS.LOWER;
mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
matrixBRef.copyFrom(BLASData.L3_cTRMM_o_RLT);
mVerify.invoke_verify(matrixBRef, matrixBC, matrixBRef);
mRS.finish();
checkError();
}
public void test_L3_ZTRMM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixAZ.copyFrom(BLASData.L3_zTRMM_A_mm);
matrixBZ.copyFrom(BLASData.L3_zTRMM_B_mn);
//default case: LEFT, UPPER, NO_TRANSPOSE
mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixBRef.copyFrom(BLASData.L3_zTRMM_o_LUN);
mVerify.invoke_verify(matrixBRef, matrixBZ, matrixBRef);
//case: RIGHT, LOWER, TRANSPOSE
matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixAZ.copyFrom(BLASData.L3_zTRMM_A_nn);
//Reload matrix B, since it was overwritten by BLAS.
matrixBZ.copyFrom(BLASData.L3_zTRMM_B_mn);
side = ScriptIntrinsicBLAS.RIGHT;
trans = ScriptIntrinsicBLAS.TRANSPOSE;
uplo = ScriptIntrinsicBLAS.LOWER;
mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
matrixBRef.copyFrom(BLASData.L3_zTRMM_o_RLT);
mVerify.invoke_verify(matrixBRef, matrixBZ, matrixBRef);
mRS.finish();
checkError();
}
private boolean validateTRSM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) {
int adim = -1, bM = -1, bN = -1;
if (!validateSide(Side)) {
return false;
}
if (!validateTranspose(TransA)) {
return false;
}
if (!validateUplo(Uplo)) {
return false;
}
if (!validateDiag(Diag)) {
return false;
}
if (!A.getType().getElement().isCompatible(e) ||
!B.getType().getElement().isCompatible(e)) {
return false;
}
adim = A.getType().getX();
if (adim != A.getType().getY()) {
// this may be unnecessary, the restriction could potentially be relaxed
// A needs to contain at least that symmetric matrix but could theoretically be larger
// for now we assume adapters are sufficient, will reevaluate in the future
return false;
}
bM = B.getType().getY();
bN = B.getType().getX();
if (Side == ScriptIntrinsicBLAS.LEFT) {
// A is M*M
if (adim != bM) {
return false;
}
} else {
// A is N*N
if (adim != bN) {
return false;
}
}
return true;
}
private void xTRSM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) {
for (Allocation matA : mMatrix) {
for (Allocation matB : mMatrix) {
Element elemA = matA.getType().getElement();
if (validateTRSM(elemA, Side, Uplo, TransA, Diag, matA, matB)) {
try {
if (elemA.isCompatible(Element.F32(mRS))) {
mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
} else if (elemA.isCompatible(Element.F64(mRS))) {
mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
} else if (elemA.isCompatible(Element.F32_2(mRS))) {
mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
} else if (elemA.isCompatible(Element.F64_2(mRS))) {
mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
}
} catch (RSRuntimeException e) {
fail("should NOT throw RSRuntimeException");
}
} else {
try {
mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
fail("should throw RSRuntimeException for STRSM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
fail("should throw RSRuntimeException for DTRSM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
fail("should throw RSRuntimeException for CTRSM");
} catch (RSRuntimeException e) {
}
try {
mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
fail("should throw RSRuntimeException for ZTRSM");
} catch (RSRuntimeException e) {
}
}
}
}
}
public void L3_xTRSM_API(ArrayList<Allocation> mMatrix) {
for (int Side : mSide) {
for (int Uplo : mUplo) {
for (int TransA : mTranspose) {
for (int Diag : mDiag) {
xTRSM_API_test(Side, Uplo, TransA, Diag, mMatrix);
}
}
}
}
}
public void test_L3_STRSM_API() {
L3_xTRSM_API(mMatrixS);
}
public void test_L3_DTRSM_API() {
L3_xTRSM_API(mMatrixD);
}
public void test_L3_CTRSM_API() {
L3_xTRSM_API(mMatrixC);
}
public void test_L3_ZTRSM_API() {
L3_xTRSM_API(mMatrixZ);
}
public void test_L3_STRSM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
//populate input allocations
Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
matrixAS.copyFrom(BLASData.L3_sTRSM_A_mm);
matrixBS.copyFrom(BLASData.L3_sTRSM_B_mn);
//default case: LEFT, UPPER, NO_TRANSPOSE
mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dM));
matrixBRef.copyFrom(BLASData.L3_sTRSM_o_LUN);
mVerify.invoke_verify(matrixBRef, matrixBS, matrixBRef);
//case: RIGHT, LOWER, TRANSPOSE
matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), BLASData.dN, BLASData.dN));
matrixAS.copyFrom(BLASData.L3_sTRSM_A_nn);
//Reload matrix B, since it was overwritten by BLAS.
matrixBS.copyFrom(BLASData.L3_sTRSM_B_mn);
side = ScriptIntrinsicBLAS.RIGHT;
trans = ScriptIntrinsicBLAS.TRANSPOSE;
uplo = ScriptIntrinsicBLAS.LOWER;
mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
matrixBRef.copyFrom(BLASData.L3_sTRSM_o_RLT);
mVerify.invoke_verify(matrixBRef, matrixBS, matrixBRef);
mRS.finish();
checkError();
}
public void test_L3_DTRSM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
//populate input allocations
Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
matrixAD.copyFrom(BLASData.L3_dTRSM_A_mm);
matrixBD.copyFrom(BLASData.L3_dTRSM_B_mn);
//default case: LEFT, UPPER, NO_TRANSPOSE
mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dM));
matrixBRef.copyFrom(BLASData.L3_dTRSM_o_LUN);
mVerify.invoke_verify(matrixBRef, matrixBD, matrixBRef);
//case: RIGHT, LOWER, TRANSPOSE
matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), BLASData.dN, BLASData.dN));
matrixAD.copyFrom(BLASData.L3_dTRSM_A_nn);
//Reload matrix B, since it was overwritten by BLAS.
matrixBD.copyFrom(BLASData.L3_dTRSM_B_mn);
side = ScriptIntrinsicBLAS.RIGHT;
trans = ScriptIntrinsicBLAS.TRANSPOSE;
uplo = ScriptIntrinsicBLAS.LOWER;
mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
matrixBRef.copyFrom(BLASData.L3_dTRSM_o_RLT);
mVerify.invoke_verify(matrixBRef, matrixBD, matrixBRef);
mRS.finish();
checkError();
}
public void test_L3_CTRSM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
//populate input allocations
Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixAC.copyFrom(BLASData.L3_cTRSM_A_mm);
matrixBC.copyFrom(BLASData.L3_cTRSM_B_mn);
//default case: LEFT, UPPER, NO_TRANSPOSE
mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dM));
matrixBRef.copyFrom(BLASData.L3_cTRSM_o_LUN);
mVerify.invoke_verify(matrixBRef, matrixBC, matrixBRef);
//case: RIGHT, LOWER, TRANSPOSE
matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), BLASData.dN, BLASData.dN));
matrixAC.copyFrom(BLASData.L3_cTRSM_A_nn);
//Reload matrix B, since it was overwritten by BLAS.
matrixBC.copyFrom(BLASData.L3_cTRSM_B_mn);
side = ScriptIntrinsicBLAS.RIGHT;
trans = ScriptIntrinsicBLAS.TRANSPOSE;
uplo = ScriptIntrinsicBLAS.LOWER;
mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
matrixBRef.copyFrom(BLASData.L3_cTRSM_o_RLT);
mVerify.invoke_verify(matrixBRef, matrixBC, matrixBRef);
mRS.finish();
checkError();
}
public void test_L3_ZTRSM_Correctness() {
int side = ScriptIntrinsicBLAS.LEFT;
int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
int uplo = ScriptIntrinsicBLAS.UPPER;
int diag = ScriptIntrinsicBLAS.NON_UNIT;
//populate input allocations
Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dM, BLASData.dM));
Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixAZ.copyFrom(BLASData.L3_zTRSM_A_mm);
matrixBZ.copyFrom(BLASData.L3_zTRSM_B_mn);
//default case: LEFT, UPPER, NO_TRANSPOSE
mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dM));
matrixBRef.copyFrom(BLASData.L3_zTRSM_o_LUN);
mVerify.invoke_verify(matrixBRef, matrixBZ, matrixBRef);
//case: RIGHT, LOWER, TRANSPOSE
matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), BLASData.dN, BLASData.dN));
matrixAZ.copyFrom(BLASData.L3_zTRSM_A_nn);
//Reload matrix B, since it was overwritten by BLAS.
matrixBZ.copyFrom(BLASData.L3_zTRSM_B_mn);
side = ScriptIntrinsicBLAS.RIGHT;
trans = ScriptIntrinsicBLAS.TRANSPOSE;
uplo = ScriptIntrinsicBLAS.LOWER;
mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
matrixBRef.copyFrom(BLASData.L3_zTRSM_o_RLT);
mVerify.invoke_verify(matrixBRef, matrixBZ, matrixBRef);
mRS.finish();
checkError();
}
}