Making libRSSupport able to optionally bundle libblas(V8) through dlopen
and dlsym.

Change-Id: I3ade3ad2802f3b8e5fc5661319b98a6212e6d8a2
diff --git a/cpu_ref/rsCpuBLAS.inc b/cpu_ref/rsCpuBLAS.inc
new file mode 100644
index 0000000..17fdcd3
--- /dev/null
+++ b/cpu_ref/rsCpuBLAS.inc
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is a helper file to apply macros to different cblas routines.
+// Will be include multiple times.
+
+#if !defined(RS_APPLY_MACRO_TO)
+#error "You must define the macro RS_APPLY_MACRO_TO to include this file"
+#endif
+
+RS_APPLY_MACRO_TO(cblas_sgemv)
+RS_APPLY_MACRO_TO(cblas_sgbmv)
+RS_APPLY_MACRO_TO(cblas_strmv)
+RS_APPLY_MACRO_TO(cblas_stbmv)
+RS_APPLY_MACRO_TO(cblas_stpmv)
+RS_APPLY_MACRO_TO(cblas_strsv)
+RS_APPLY_MACRO_TO(cblas_stbsv)
+RS_APPLY_MACRO_TO(cblas_stpsv)
+
+RS_APPLY_MACRO_TO(cblas_dgemv)
+RS_APPLY_MACRO_TO(cblas_dgbmv)
+RS_APPLY_MACRO_TO(cblas_dtrmv)
+RS_APPLY_MACRO_TO(cblas_dtbmv)
+RS_APPLY_MACRO_TO(cblas_dtpmv)
+RS_APPLY_MACRO_TO(cblas_dtrsv)
+RS_APPLY_MACRO_TO(cblas_dtbsv)
+RS_APPLY_MACRO_TO(cblas_dtpsv)
+
+RS_APPLY_MACRO_TO(cblas_cgemv)
+RS_APPLY_MACRO_TO(cblas_cgbmv)
+RS_APPLY_MACRO_TO(cblas_ctrmv)
+RS_APPLY_MACRO_TO(cblas_ctbmv)
+RS_APPLY_MACRO_TO(cblas_ctpmv)
+RS_APPLY_MACRO_TO(cblas_ctrsv)
+RS_APPLY_MACRO_TO(cblas_ctbsv)
+RS_APPLY_MACRO_TO(cblas_ctpsv)
+
+RS_APPLY_MACRO_TO(cblas_zgemv)
+RS_APPLY_MACRO_TO(cblas_zgbmv)
+RS_APPLY_MACRO_TO(cblas_ztrmv)
+RS_APPLY_MACRO_TO(cblas_ztbmv)
+RS_APPLY_MACRO_TO(cblas_ztpmv)
+RS_APPLY_MACRO_TO(cblas_ztrsv)
+RS_APPLY_MACRO_TO(cblas_ztbsv)
+RS_APPLY_MACRO_TO(cblas_ztpsv)
+
+RS_APPLY_MACRO_TO(cblas_ssymv)
+RS_APPLY_MACRO_TO(cblas_ssbmv)
+RS_APPLY_MACRO_TO(cblas_sspmv)
+RS_APPLY_MACRO_TO(cblas_sger)
+RS_APPLY_MACRO_TO(cblas_ssyr)
+RS_APPLY_MACRO_TO(cblas_sspr)
+RS_APPLY_MACRO_TO(cblas_ssyr2)
+RS_APPLY_MACRO_TO(cblas_sspr2)
+
+RS_APPLY_MACRO_TO(cblas_dsymv)
+RS_APPLY_MACRO_TO(cblas_dsbmv)
+RS_APPLY_MACRO_TO(cblas_dspmv)
+RS_APPLY_MACRO_TO(cblas_dger)
+RS_APPLY_MACRO_TO(cblas_dsyr)
+RS_APPLY_MACRO_TO(cblas_dspr)
+RS_APPLY_MACRO_TO(cblas_dsyr2)
+RS_APPLY_MACRO_TO(cblas_dspr2)
+
+RS_APPLY_MACRO_TO(cblas_chemv)
+RS_APPLY_MACRO_TO(cblas_chbmv)
+RS_APPLY_MACRO_TO(cblas_chpmv)
+RS_APPLY_MACRO_TO(cblas_cgeru)
+RS_APPLY_MACRO_TO(cblas_cgerc)
+RS_APPLY_MACRO_TO(cblas_cher)
+RS_APPLY_MACRO_TO(cblas_chpr)
+RS_APPLY_MACRO_TO(cblas_cher2)
+RS_APPLY_MACRO_TO(cblas_chpr2)
+
+RS_APPLY_MACRO_TO(cblas_zhemv)
+RS_APPLY_MACRO_TO(cblas_zhbmv)
+RS_APPLY_MACRO_TO(cblas_zhpmv)
+RS_APPLY_MACRO_TO(cblas_zgeru)
+RS_APPLY_MACRO_TO(cblas_zgerc)
+RS_APPLY_MACRO_TO(cblas_zher)
+RS_APPLY_MACRO_TO(cblas_zhpr)
+RS_APPLY_MACRO_TO(cblas_zher2)
+RS_APPLY_MACRO_TO(cblas_zhpr2)
+
+
+RS_APPLY_MACRO_TO(cblas_sgemm)
+RS_APPLY_MACRO_TO(cblas_ssymm)
+RS_APPLY_MACRO_TO(cblas_ssyrk)
+RS_APPLY_MACRO_TO(cblas_ssyr2k)
+RS_APPLY_MACRO_TO(cblas_strmm)
+RS_APPLY_MACRO_TO(cblas_strsm)
+
+RS_APPLY_MACRO_TO(cblas_dgemm)
+RS_APPLY_MACRO_TO(cblas_dsymm)
+RS_APPLY_MACRO_TO(cblas_dsyrk)
+RS_APPLY_MACRO_TO(cblas_dsyr2k)
+RS_APPLY_MACRO_TO(cblas_dtrmm)
+RS_APPLY_MACRO_TO(cblas_dtrsm)
+
+RS_APPLY_MACRO_TO(cblas_cgemm)
+RS_APPLY_MACRO_TO(cblas_csymm)
+RS_APPLY_MACRO_TO(cblas_csyrk)
+RS_APPLY_MACRO_TO(cblas_csyr2k)
+RS_APPLY_MACRO_TO(cblas_ctrmm)
+RS_APPLY_MACRO_TO(cblas_ctrsm)
+
+RS_APPLY_MACRO_TO(cblas_zgemm)
+RS_APPLY_MACRO_TO(cblas_zsymm)
+RS_APPLY_MACRO_TO(cblas_zsyrk)
+RS_APPLY_MACRO_TO(cblas_zsyr2k)
+RS_APPLY_MACRO_TO(cblas_ztrmm)
+RS_APPLY_MACRO_TO(cblas_ztrsm)
+
+RS_APPLY_MACRO_TO(cblas_chemm)
+RS_APPLY_MACRO_TO(cblas_cherk)
+RS_APPLY_MACRO_TO(cblas_cher2k)
+
+RS_APPLY_MACRO_TO(cblas_zhemm)
+RS_APPLY_MACRO_TO(cblas_zherk)
+RS_APPLY_MACRO_TO(cblas_zher2k)
+
+// Undefine the macro so that we can include this file multiple times to generate different functionality.
+#undef RS_APPLY_MACRO_TO
diff --git a/cpu_ref/rsCpuBLASDispatch.h b/cpu_ref/rsCpuBLASDispatch.h
new file mode 100644
index 0000000..4602135
--- /dev/null
+++ b/cpu_ref/rsCpuBLASDispatch.h
@@ -0,0 +1,468 @@
+#ifndef RS_COMPATIBILITY_LIB
+#include "cblas.h"
+#else
+#include <dlfcn.h>
+/*
+ * The following enum and function pointers are based on cblas.h
+ * ===========================================================================
+ * Prototypes for level 2 BLAS
+ * ===========================================================================
+ */
+
+/*
+ * Routines with standard 4 prefixes (S, D, C, Z)
+ */
+enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
+enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113};
+enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
+enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
+enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
+
+typedef void (*FnPtr_cblas_sgemv)(const enum CBLAS_ORDER order,
+                                  const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+                                  const float alpha, const float *A, const int lda,
+                                  const float *X, const int incX, const float beta,
+                                  float *Y, const int incY);
+typedef void (*FnPtr_cblas_sgbmv)(const enum CBLAS_ORDER order,
+                                  const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+                                  const int KL, const int KU, const float alpha,
+                                  const float *A, const int lda, const float *X,
+                                  const int incX, const float beta, float *Y, const int incY);
+typedef void (*FnPtr_cblas_strmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const float *A, const int lda,
+                                  float *X, const int incX);
+typedef void (*FnPtr_cblas_stbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const int K, const float *A, const int lda,
+                                  float *X, const int incX);
+typedef void (*FnPtr_cblas_stpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const float *Ap, float *X, const int incX);
+typedef void (*FnPtr_cblas_strsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const float *A, const int lda, float *X,
+                                  const int incX);
+typedef void (*FnPtr_cblas_stbsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const int K, const float *A, const int lda,
+                                  float *X, const int incX);
+typedef void (*FnPtr_cblas_stpsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const float *Ap, float *X, const int incX);
+
+typedef void (*FnPtr_cblas_dgemv)(const enum CBLAS_ORDER order,
+                                  const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+                                  const double alpha, const double *A, const int lda,
+                                  const double *X, const int incX, const double beta,
+                                  double *Y, const int incY);
+typedef void (*FnPtr_cblas_dgbmv)(const enum CBLAS_ORDER order,
+                                  const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+                                  const int KL, const int KU, const double alpha,
+                                  const double *A, const int lda, const double *X,
+                                  const int incX, const double beta, double *Y, const int incY);
+typedef void (*FnPtr_cblas_dtrmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const double *A, const int lda,
+                                  double *X, const int incX);
+typedef void (*FnPtr_cblas_dtbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const int K, const double *A, const int lda,
+                                  double *X, const int incX);
+typedef void (*FnPtr_cblas_dtpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const double *Ap, double *X, const int incX);
+typedef void (*FnPtr_cblas_dtrsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const double *A, const int lda, double *X,
+                                  const int incX);
+typedef void (*FnPtr_cblas_dtbsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const int K, const double *A, const int lda,
+                                  double *X, const int incX);
+typedef void (*FnPtr_cblas_dtpsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const double *Ap, double *X, const int incX);
+
+typedef void (*FnPtr_cblas_cgemv)(const enum CBLAS_ORDER order,
+                                  const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  const void *X, const int incX, const void *beta,
+                                  void *Y, const int incY);
+typedef void (*FnPtr_cblas_cgbmv)(const enum CBLAS_ORDER order,
+                                  const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+                                  const int KL, const int KU, const void *alpha,
+                                  const void *A, const int lda, const void *X,
+                                  const int incX, const void *beta, void *Y, const int incY);
+typedef void (*FnPtr_cblas_ctrmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const void *A, const int lda,
+                                  void *X, const int incX);
+typedef void (*FnPtr_cblas_ctbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const int K, const void *A, const int lda,
+                                  void *X, const int incX);
+typedef void (*FnPtr_cblas_ctpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const void *Ap, void *X, const int incX);
+typedef void (*FnPtr_cblas_ctrsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const void *A, const int lda, void *X,
+                                  const int incX);
+typedef void (*FnPtr_cblas_ctbsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const int K, const void *A, const int lda,
+                                  void *X, const int incX);
+typedef void (*FnPtr_cblas_ctpsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const void *Ap, void *X, const int incX);
+
+typedef void (*FnPtr_cblas_zgemv)(const enum CBLAS_ORDER order,
+                                  const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  const void *X, const int incX, const void *beta,
+                                  void *Y, const int incY);
+typedef void (*FnPtr_cblas_zgbmv)(const enum CBLAS_ORDER order,
+                                  const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+                                  const int KL, const int KU, const void *alpha,
+                                  const void *A, const int lda, const void *X,
+                                  const int incX, const void *beta, void *Y, const int incY);
+typedef void (*FnPtr_cblas_ztrmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const void *A, const int lda,
+                                  void *X, const int incX);
+typedef void (*FnPtr_cblas_ztbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const int K, const void *A, const int lda,
+                                  void *X, const int incX);
+typedef void (*FnPtr_cblas_ztpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const void *Ap, void *X, const int incX);
+typedef void (*FnPtr_cblas_ztrsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const void *A, const int lda, void *X,
+                                  const int incX);
+typedef void (*FnPtr_cblas_ztbsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const int K, const void *A, const int lda,
+                                  void *X, const int incX);
+typedef void (*FnPtr_cblas_ztpsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+                                  const int N, const void *Ap, void *X, const int incX);
+
+
+/*
+ * Routines with S and D prefixes only
+ */
+typedef void (*FnPtr_cblas_ssymv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const float alpha, const float *A,
+                                  const int lda, const float *X, const int incX,
+                                  const float beta, float *Y, const int incY);
+typedef void (*FnPtr_cblas_ssbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const int K, const float alpha, const float *A,
+                                  const int lda, const float *X, const int incX,
+                                  const float beta, float *Y, const int incY);
+typedef void (*FnPtr_cblas_sspmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const float alpha, const float *Ap,
+                                  const float *X, const int incX,
+                                  const float beta, float *Y, const int incY);
+typedef void (*FnPtr_cblas_sger)(const enum CBLAS_ORDER order, const int M, const int N,
+                                 const float alpha, const float *X, const int incX,
+                                 const float *Y, const int incY, float *A, const int lda);
+typedef void (*FnPtr_cblas_ssyr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                 const int N, const float alpha, const float *X,
+                                 const int incX, float *A, const int lda);
+typedef void (*FnPtr_cblas_sspr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                 const int N, const float alpha, const float *X,
+                                 const int incX, float *Ap);
+typedef void (*FnPtr_cblas_ssyr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const float alpha, const float *X,
+                                  const int incX, const float *Y, const int incY, float *A,
+                                  const int lda);
+typedef void (*FnPtr_cblas_sspr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const float alpha, const float *X,
+                                  const int incX, const float *Y, const int incY, float *A);
+
+typedef void (*FnPtr_cblas_dsymv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const double alpha, const double *A,
+                                  const int lda, const double *X, const int incX,
+                                  const double beta, double *Y, const int incY);
+typedef void (*FnPtr_cblas_dsbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const int K, const double alpha, const double *A,
+                                  const int lda, const double *X, const int incX,
+                                  const double beta, double *Y, const int incY);
+typedef void (*FnPtr_cblas_dspmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const double alpha, const double *Ap,
+                                  const double *X, const int incX,
+                                  const double beta, double *Y, const int incY);
+typedef void (*FnPtr_cblas_dger)(const enum CBLAS_ORDER order, const int M, const int N,
+                                 const double alpha, const double *X, const int incX,
+                                 const double *Y, const int incY, double *A, const int lda);
+typedef void (*FnPtr_cblas_dsyr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                 const int N, const double alpha, const double *X,
+                                 const int incX, double *A, const int lda);
+typedef void (*FnPtr_cblas_dspr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                 const int N, const double alpha, const double *X,
+                                 const int incX, double *Ap);
+typedef void (*FnPtr_cblas_dsyr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const double alpha, const double *X,
+                                  const int incX, const double *Y, const int incY, double *A,
+                                  const int lda);
+typedef void (*FnPtr_cblas_dspr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const double alpha, const double *X,
+                                  const int incX, const double *Y, const int incY, double *A);
+
+
+/*
+ * Routines with C and Z prefixes only
+ */
+typedef void (*FnPtr_cblas_chemv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const void *alpha, const void *A,
+                                  const int lda, const void *X, const int incX,
+                                  const void *beta, void *Y, const int incY);
+typedef void (*FnPtr_cblas_chbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const int K, const void *alpha, const void *A,
+                                  const int lda, const void *X, const int incX,
+                                  const void *beta, void *Y, const int incY);
+typedef void (*FnPtr_cblas_chpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const void *alpha, const void *Ap,
+                                  const void *X, const int incX,
+                                  const void *beta, void *Y, const int incY);
+typedef void (*FnPtr_cblas_cgeru)(const enum CBLAS_ORDER order, const int M, const int N,
+                                  const void *alpha, const void *X, const int incX,
+                                  const void *Y, const int incY, void *A, const int lda);
+typedef void (*FnPtr_cblas_cgerc)(const enum CBLAS_ORDER order, const int M, const int N,
+                                  const void *alpha, const void *X, const int incX,
+                                  const void *Y, const int incY, void *A, const int lda);
+typedef void (*FnPtr_cblas_cher)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                 const int N, const float alpha, const void *X, const int incX,
+                                 void *A, const int lda);
+typedef void (*FnPtr_cblas_chpr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                 const int N, const float alpha, const void *X,
+                                 const int incX, void *A);
+typedef void (*FnPtr_cblas_cher2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N,
+                                  const void *alpha, const void *X, const int incX,
+                                  const void *Y, const int incY, void *A, const int lda);
+typedef void (*FnPtr_cblas_chpr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N,
+                                  const void *alpha, const void *X, const int incX,
+                                  const void *Y, const int incY, void *Ap);
+
+typedef void (*FnPtr_cblas_zhemv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const void *alpha, const void *A,
+                                  const int lda, const void *X, const int incX,
+                                  const void *beta, void *Y, const int incY);
+typedef void (*FnPtr_cblas_zhbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const int K, const void *alpha, const void *A,
+                                  const int lda, const void *X, const int incX,
+                                  const void *beta, void *Y, const int incY);
+typedef void (*FnPtr_cblas_zhpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                  const int N, const void *alpha, const void *Ap,
+                                  const void *X, const int incX,
+                                  const void *beta, void *Y, const int incY);
+typedef void (*FnPtr_cblas_zgeru)(const enum CBLAS_ORDER order, const int M, const int N,
+                                  const void *alpha, const void *X, const int incX,
+                                  const void *Y, const int incY, void *A, const int lda);
+typedef void (*FnPtr_cblas_zgerc)(const enum CBLAS_ORDER order, const int M, const int N,
+                                  const void *alpha, const void *X, const int incX,
+                                  const void *Y, const int incY, void *A, const int lda);
+typedef void (*FnPtr_cblas_zher)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                 const int N, const double alpha, const void *X, const int incX,
+                                 void *A, const int lda);
+typedef void (*FnPtr_cblas_zhpr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
+                                 const int N, const double alpha, const void *X,
+                                 const int incX, void *A);
+typedef void (*FnPtr_cblas_zher2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N,
+                                  const void *alpha, const void *X, const int incX,
+                                  const void *Y, const int incY, void *A, const int lda);
+typedef void (*FnPtr_cblas_zhpr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N,
+                                  const void *alpha, const void *X, const int incX,
+                                  const void *Y, const int incY, void *Ap);
+
+/*
+ * ===========================================================================
+ * Prototypes for level 3 BLAS
+ * ===========================================================================
+ */
+
+/*
+ * Routines with standard 4 prefixes (S, D, C, Z)
+ */
+typedef void (*FnPtr_cblas_sgemm)(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+                                  const int K, const float alpha, const float *A,
+                                  const int lda, const float *B, const int ldb,
+                                  const float beta, float *C, const int ldc);
+typedef void (*FnPtr_cblas_ssymm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const int M, const int N,
+                                  const float alpha, const float *A, const int lda,
+                                  const float *B, const int ldb, const float beta,
+                                  float *C, const int ldc);
+typedef void (*FnPtr_cblas_ssyrk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                  const float alpha, const float *A, const int lda,
+                                  const float beta, float *C, const int ldc);
+typedef void (*FnPtr_cblas_ssyr2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                   const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                   const float alpha, const float *A, const int lda,
+                                   const float *B, const int ldb, const float beta,
+                                   float *C, const int ldc);
+typedef void (*FnPtr_cblas_strmm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_DIAG Diag, const int M, const int N,
+                                  const float alpha, const float *A, const int lda,
+                                  float *B, const int ldb);
+typedef void (*FnPtr_cblas_strsm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_DIAG Diag, const int M, const int N,
+                                  const float alpha, const float *A, const int lda,
+                                  float *B, const int ldb);
+
+typedef void (*FnPtr_cblas_dgemm)(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+                                  const int K, const double alpha, const double *A,
+                                  const int lda, const double *B, const int ldb,
+                                  const double beta, double *C, const int ldc);
+typedef void (*FnPtr_cblas_dsymm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const int M, const int N,
+                                  const double alpha, const double *A, const int lda,
+                                  const double *B, const int ldb, const double beta,
+                                  double *C, const int ldc);
+typedef void (*FnPtr_cblas_dsyrk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                  const double alpha, const double *A, const int lda,
+                                  const double beta, double *C, const int ldc);
+typedef void (*FnPtr_cblas_dsyr2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                   const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                   const double alpha, const double *A, const int lda,
+                                   const double *B, const int ldb, const double beta,
+                                   double *C, const int ldc);
+typedef void (*FnPtr_cblas_dtrmm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_DIAG Diag, const int M, const int N,
+                                  const double alpha, const double *A, const int lda,
+                                  double *B, const int ldb);
+typedef void (*FnPtr_cblas_dtrsm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_DIAG Diag, const int M, const int N,
+                                  const double alpha, const double *A, const int lda,
+                                  double *B, const int ldb);
+
+typedef void (*FnPtr_cblas_cgemm)(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+                                  const int K, const void *alpha, const void *A,
+                                  const int lda, const void *B, const int ldb,
+                                  const void *beta, void *C, const int ldc);
+typedef void (*FnPtr_cblas_csymm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  const void *B, const int ldb, const void *beta,
+                                  void *C, const int ldc);
+typedef void (*FnPtr_cblas_csyrk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                  const void *alpha, const void *A, const int lda,
+                                  const void *beta, void *C, const int ldc);
+typedef void (*FnPtr_cblas_csyr2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                   const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                   const void *alpha, const void *A, const int lda,
+                                   const void *B, const int ldb, const void *beta,
+                                   void *C, const int ldc);
+typedef void (*FnPtr_cblas_ctrmm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_DIAG Diag, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  void *B, const int ldb);
+typedef void (*FnPtr_cblas_ctrsm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_DIAG Diag, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  void *B, const int ldb);
+
+typedef void (*FnPtr_cblas_zgemm)(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+                                  const int K, const void *alpha, const void *A,
+                                  const int lda, const void *B, const int ldb,
+                                  const void *beta, void *C, const int ldc);
+typedef void (*FnPtr_cblas_zsymm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  const void *B, const int ldb, const void *beta,
+                                  void *C, const int ldc);
+typedef void (*FnPtr_cblas_zsyrk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                  const void *alpha, const void *A, const int lda,
+                                  const void *beta, void *C, const int ldc);
+typedef void (*FnPtr_cblas_zsyr2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                   const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                   const void *alpha, const void *A, const int lda,
+                                   const void *B, const int ldb, const void *beta,
+                                   void *C, const int ldc);
+typedef void (*FnPtr_cblas_ztrmm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_DIAG Diag, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  void *B, const int ldb);
+typedef void (*FnPtr_cblas_ztrsm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+                                  const enum CBLAS_DIAG Diag, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  void *B, const int ldb);
+
+
+/*
+ * Routines with prefixes C and Z only
+ */
+typedef void (*FnPtr_cblas_chemm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  const void *B, const int ldb, const void *beta,
+                                  void *C, const int ldc);
+typedef void (*FnPtr_cblas_cherk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                  const float alpha, const void *A, const int lda,
+                                  const float beta, void *C, const int ldc);
+typedef void (*FnPtr_cblas_cher2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                   const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                   const void *alpha, const void *A, const int lda,
+                                   const void *B, const int ldb, const float beta,
+                                   void *C, const int ldc);
+
+typedef void (*FnPtr_cblas_zhemm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+                                  const enum CBLAS_UPLO Uplo, const int M, const int N,
+                                  const void *alpha, const void *A, const int lda,
+                                  const void *B, const int ldb, const void *beta,
+                                  void *C, const int ldc);
+typedef void (*FnPtr_cblas_zherk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                  const double alpha, const void *A, const int lda,
+                                  const double beta, void *C, const int ldc);
+typedef void (*FnPtr_cblas_zher2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+                                   const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+                                   const void *alpha, const void *A, const int lda,
+                                   const void *B, const int ldb, const double beta,
+                                   void *C, const int ldc);
+
+// Macros to help declare our function pointers for the dispatch table.
+#define RS_APPLY_MACRO_TO(x) \
+    FnPtr_##x x;
+#include "rsCpuBLAS.inc"
+
+bool loadBLASLib() {
+    void* handle = NULL;
+    handle = dlopen("libblasV8.so", RTLD_LAZY | RTLD_LOCAL);
+
+    if (handle == NULL) {
+        return false;
+    }
+
+// Macros to help load the function pointers.
+#define RS_APPLY_MACRO_TO(x) \
+    x = (FnPtr_##x)dlsym(handle, #x); \
+    if (x == nullptr) { \
+        ALOGE("Failed to load " #x " for RS BLAS implementation."); \
+        return false; \
+    }
+#include "rsCpuBLAS.inc"
+    return true;
+}
+
+#endif
diff --git a/cpu_ref/rsCpuIntrinsicBLAS.cpp b/cpu_ref/rsCpuIntrinsicBLAS.cpp
index c020f2f..9364328 100644
--- a/cpu_ref/rsCpuIntrinsicBLAS.cpp
+++ b/cpu_ref/rsCpuIntrinsicBLAS.cpp
@@ -17,7 +17,7 @@
 
 #include "rsCpuIntrinsic.h"
 #include "rsCpuIntrinsicInlines.h"
-#include "cblas.h"
+#include "rsCpuBLASDispatch.h"
 #include "eight_bit_int_gemm.h"
 
 using namespace android;
@@ -47,6 +47,9 @@
     uint8_t b_offset = 0;
     uint8_t c_offset = 0;
 
+#ifdef RS_COMPATIBILITY_LIB
+    bool isBlasLibInitialized = false;
+#endif
     static void kernelBNNM(size_t m, size_t n, size_t k,
                            const uint8_t* a, uint8_t a_offset, size_t lda,
                            const uint8_t* b, uint8_t b_offset, size_t ldb,
@@ -112,6 +115,17 @@
 
     int lda = 0, ldb = 0, ldc = 0;
 
+#ifdef RS_COMPATIBILITY_LIB
+    // Allow BNNM even without libblas
+    if (call->func != RsBlas_bnnm && !isBlasLibInitialized) {
+        if (!loadBLASLib()) {
+            ALOGE("Failed to load the BLAS lib, IntrinsicBLAS NOT supported!\n");
+            return;
+        }
+        isBlasLibInitialized = true;
+    }
+#endif
+
     switch (call->func) {
 
     // Level 1 BLAS: returns into a 1D Allocation