blob: 7920fb13b1429f0ba5e25da0665f004e756deb10 [file] [log] [blame]
static void THFloatVector_fill_NEON(float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
x[i] = c;
x[i+1] = c;
x[i+2] = c;
x[i+3] = c;
}
for(; i < n; i++)
x[i] = c;
}
static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
z[i] = x[i] * y[i];
z[i+1] = x[i+1] * y[i+1];
z[i+2] = x[i+2] * y[i+2];
z[i+3] = x[i+3] * y[i+3];
}
for(; i < n; i++)
z[i] = x[i] * y[i];
}
static void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
y[i] = x[i] * c;
y[i+1] = x[i+1] * c;
y[i+2] = x[i+2] * c;
y[i+3] = x[i+3] * c;
}
for(; i < n; i++)
y[i] = x[i] * c;
}
static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
{
z[i] = x[i] + c * y[i];
z[i+1] = x[i+1] + c * y[i+1];
z[i+2] = x[i+2] + c * y[i+2];
z[i+3] = x[i+3] + c * y[i+3];
}
for(; i < n; i++)
z[i] = x[i] + c * y[i];
}
static void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
{
y[i] = x[i] + c;
y[i+1] = x[i+1] + c;
y[i+2] = x[i+2] + c;
y[i+3] = x[i+3] + c;
}
for(; i < n; i++)
y[i] = x[i] + c;
}
static void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
{
z[i] = x[i] / y[i];
z[i+1] = x[i+1] / y[i+1];
z[i+2] = x[i+2] / y[i+2];
z[i+3] = x[i+3] / y[i+3];
}
for(; i < n; i++)
z[i] = x[i] / y[i];
}
static void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
{
y[i] = x[i] / c;
y[i+1] = x[i+1] / c;
y[i+2] = x[i+2] / c;
y[i+3] = x[i+3] / c;
}
for(; i < n; i++)
y[i] = x[i] / c;
}