| static void THFloatVector_fill_NEON(float *x, const float c, const ptrdiff_t n) { |
| long i = 0; |
| |
| for(; i < n-4; i += 4) |
| { |
| x[i] = c; |
| x[i+1] = c; |
| x[i+2] = c; |
| x[i+3] = c; |
| } |
| |
| for(; i < n; i++) |
| x[i] = c; |
| |
| } |
| |
| |
| static void THFloatVector_diff_NEON(float *z, const float *x, const float *y, const ptrdiff_t n) { |
| long i = 0; |
| |
| for(; i < n-4; i += 4) |
| { |
| z[i] = x[i] - y[i]; |
| z[i+1] = x[i+1] - y[i+1]; |
| z[i+2] = x[i+2] - y[i+2]; |
| z[i+3] = x[i+3] - y[i+3]; |
| } |
| |
| for(; i < n; i++) |
| z[i] = x[i] - y[i]; |
| |
| } |
| |
| |
| static void THFloatVector_scale_NEON(float *y, const float c, const ptrdiff_t n) { |
| long i = 0; |
| |
| for(; i < n-4; i +=4) |
| { |
| y[i] *= c; |
| y[i+1] *= c; |
| y[i+2] *= c; |
| y[i+3] *= c; |
| } |
| |
| for(; i < n; i++) |
| y[i] *= c; |
| } |
| |
| static void THFloatVector_mul_NEON(float *y, const float *x, const ptrdiff_t n) { |
| long i = 0; |
| |
| for(; i < n-4; i += 4) |
| { |
| y[i] *= x[i]; |
| y[i+1] *= x[i+1]; |
| y[i+2] *= x[i+2]; |
| y[i+3] *= x[i+3]; |
| } |
| |
| for(; i < n; i++) |
| y[i] *= x[i]; |
| } |
| |
| static void THFloatVector_add_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { |
| long i = 0; |
| |
| for(;i < n-4; i += 4) |
| { |
| y[i] += c * x[i]; |
| y[i+1] += c * x[i+1]; |
| y[i+2] += c * x[i+2]; |
| y[i+3] += c * x[i+3]; |
| } |
| |
| for(; i < n; i++) |
| y[i] += c * x[i]; |
| } |