blob: 7537f6679d1904641bdcdc5565d6f99d8673bd8c [file] [log] [blame]
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/THVectorDispatch.c"
#else
/* For now there are only SIMD implementations for FLOAT and DOUBLE.
* Hopefully in the future this can be made totally generic (e.g, there are SIMD implementations
* for a lot of functions */
/* Each function with multiple implementations has:
* 1. A DISPATCHPTR which will be initialized to point to the best available implementation for the host
* 2. A DISPATCHTABLE which holds pointers to each implementation of a function, and a value indicating
* which SIMD extension a given implementation uses
* 3. A dispatch stub, which is what is actually called by clients, that simply wraps the dispatch pointer.
*/
static void (*THVector_(fill_DISPATCHPTR))(real *, const real, const ptrdiff_t) = &THVector_(fill_DEFAULT);
static FunctionDescription THVector_(fill_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(fill_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(__PPC64__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(fill_VSX), SIMDExtension_VSX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(fill_SSE), SIMDExtension_SSE),
#endif
#endif
FUNCTION_IMPL(THVector_(fill_DEFAULT), SIMDExtension_DEFAULT)
};
void THVector_(fill)(real *x, const real c, const ptrdiff_t n) {
THVector_(fill_DISPATCHPTR)(x, c, n);
}
static void (*THVector_(cadd_DISPATCHPTR))(real *, const real *, const real *, const real, const ptrdiff_t) = &THVector_(cadd_DEFAULT);
static FunctionDescription THVector_(cadd_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cadd_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(USE_AVX)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cadd_AVX), SIMDExtension_AVX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cadd_SSE), SIMDExtension_SSE),
#endif
#endif
FUNCTION_IMPL(THVector_(cadd_DEFAULT), SIMDExtension_DEFAULT)
};
void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n) {
THVector_(cadd_DISPATCHPTR)(z, x, y, c, n);
}
static void (*THVector_(add_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(add_DEFAULT);
static FunctionDescription THVector_(add_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(add_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(USE_AVX)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(add_AVX), SIMDExtension_AVX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(add_SSE), SIMDExtension_SSE),
#endif
#endif
FUNCTION_IMPL(THVector_(add_DEFAULT), SIMDExtension_DEFAULT)
};
// Dispatch stubs that just call the pointers
TH_API void THVector_(add)(real *r_, const real *t, const real value, const ptrdiff_t n) {
THVector_(add_DISPATCHPTR)(r_, t, value, n);
}
static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT);
static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cmul_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(__PPC64__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(mul_VSX), SIMDExtension_VSX),
#endif
#endif
#if defined(USE_AVX)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cmul_AVX), SIMDExtension_AVX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cmul_SSE), SIMDExtension_SSE),
#endif
#endif
FUNCTION_IMPL(THVector_(cmul_DEFAULT), SIMDExtension_DEFAULT)
};
void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n) {
THVector_(cmul_DISPATCHPTR)(z, x, y, n);
}
static void (*THVector_(mul_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(mul_DEFAULT);
static FunctionDescription THVector_(mul_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(mul_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(USE_AVX)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(mul_AVX), SIMDExtension_AVX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(mul_SSE), SIMDExtension_SSE),
#endif
#endif
FUNCTION_IMPL(THVector_(mul_DEFAULT), SIMDExtension_DEFAULT)
};
void THVector_(mul)(real *y, const real *x, const real c, const ptrdiff_t n) {
THVector_(mul_DISPATCHPTR)(y, x, c, n);
}
static void (*THVector_(cdiv_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cdiv_DEFAULT);
static FunctionDescription THVector_(cdiv_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cdiv_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(USE_AVX)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cdiv_AVX), SIMDExtension_AVX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cdiv_SSE), SIMDExtension_SSE),
#endif
#endif
FUNCTION_IMPL(THVector_(cdiv_DEFAULT), SIMDExtension_DEFAULT)
};
void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n) {
THVector_(cdiv_DISPATCHPTR)(z, x, y, n);
}
static void (*THVector_(div_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(div_DEFAULT);
static FunctionDescription THVector_(div_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(div_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(USE_AVX)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(div_AVX), SIMDExtension_AVX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(div_SSE), SIMDExtension_SSE),
#endif
#endif
FUNCTION_IMPL(THVector_(div_DEFAULT), SIMDExtension_DEFAULT)
};
void THVector_(div)(real *y, const real *x, const real c, const ptrdiff_t n) {
THVector_(div_DISPATCHPTR)(y, x, c, n);
}
/* This needs to be called in order to initialize the dispatch pointers at runtime.
* This function simply checks what SIMD extensions are available, and then walks the dispatch table
* to choose the best function.
* NOTE: As implemented, it will initialize the dispatch pointer to the first supported function.
* This means that in the dispatch tables, implementations supporting more recent extensions
* need to come first
*/
void THVector_(vectorDispatchInit)(void)
{
uint32_t hostSimdExts = detectHostSIMDExtensions();
INIT_DISPATCH_PTR(fill);
INIT_DISPATCH_PTR(cadd);
INIT_DISPATCH_PTR(add);
INIT_DISPATCH_PTR(cmul);
INIT_DISPATCH_PTR(mul);
INIT_DISPATCH_PTR(cdiv);
INIT_DISPATCH_PTR(div);
}
#endif