| #ifndef TH_GENERIC_FILE |
| #define TH_GENERIC_FILE "generic/Square.c" |
| #else |
| |
| void THNN_(Square_updateOutput)( |
| THNNState *state, |
| THTensor *input, |
| THTensor *output) |
| { |
| THTensor_(resizeAs)(output, input); |
| |
| if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output)) |
| { |
| TH_TENSOR_APPLY2(real, output, real, input, |
| *output_data = (*input_data) * (*input_data); |
| ); |
| } |
| else |
| { |
| real *output_data = THTensor_(data)(output); |
| real *input_data = THTensor_(data)(input); |
| long i; |
| #pragma omp parallel for private(i) |
| for (i = 0; i < THTensor_(nElement)(input); i++) |
| output_data[i] = input_data[i]*input_data[i]; |
| } |
| } |
| |
| void THNN_(Square_updateGradInput)( |
| THNNState *state, |
| THTensor *input, |
| THTensor *gradOutput, |
| THTensor *gradInput) |
| { |
| THTensor_(resizeAs)(gradInput, input); |
| |
| if (input->nDimension == 1 || |
| !THTensor_(isContiguous)(input) || |
| !THTensor_(isContiguous)(gradOutput) || |
| !THTensor_(isContiguous)(gradInput)) |
| { |
| TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, |
| *gradInput_data = 2.0 * (*gradOutput_data) * (*input_data); |
| ); |
| } |
| else |
| { |
| real *gradOutput_data = THTensor_(data)(gradOutput); |
| real *gradInput_data = THTensor_(data)(gradInput); |
| real *input_data = THTensor_(data)(input); |
| long i; |
| #pragma omp parallel for private(i) |
| for (i = 0; i < THTensor_(nElement)(gradInput); i++) |
| gradInput_data[i] = 2.0 * gradOutput_data[i] * input_data[i]; |
| } |
| } |
| |
| #endif |