blob: 327b006accc2188f80aec931aac11d3e14012c12 [file] [log] [blame]
static void THFloatVector_fill_NEON(float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
x[i] = c;
x[i+1] = c;
x[i+2] = c;
x[i+3] = c;
}
for(; i < n; i++)
x[i] = c;
}
static void THFloatVector_diff_NEON(float *z, const float *x, const float *y, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
z[i] = x[i] - y[i];
z[i+1] = x[i+1] - y[i+1];
z[i+2] = x[i+2] - y[i+2];
z[i+3] = x[i+3] - y[i+3];
}
for(; i < n; i++)
z[i] = x[i] - y[i];
}
static void THFloatVector_scale_NEON(float *y, const float c, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i +=4)
{
y[i] *= c;
y[i+1] *= c;
y[i+2] *= c;
y[i+3] *= c;
}
for(; i < n; i++)
y[i] *= c;
}
static void THFloatVector_mul_NEON(float *y, const float *x, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
y[i] *= x[i];
y[i+1] *= x[i+1];
y[i+2] *= x[i+2];
y[i+3] *= x[i+3];
}
for(; i < n; i++)
y[i] *= x[i];
}
static void THFloatVector_add_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
{
y[i] += c * x[i];
y[i+1] += c * x[i+1];
y[i+2] += c * x[i+2];
y[i+3] += c * x[i+3];
}
for(; i < n; i++)
y[i] += c * x[i];
}