blob: 14a60e8c008ef8a417764ae44a89387aca191124 [file] [log] [blame]
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/PReLU.c"
#else
void THNN_(PReLU_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THIndex_t nOutputPlane)
{
THTensor_(resizeAs)(output, input);
if (nOutputPlane == 0)
{
// handle shared parameter case
real w = *THTensor_(data)(weight);
TH_TENSOR_APPLY2(real, output, real, input,
*output_data = (*input_data > 0) ? *input_data : w*(*input_data);
);
}
else
{
input = THTensor_(newContiguous)(input);
int64_t bs = 1, ks = 1;
{
int64_t input_ndim = THTensor_(nDimension)(input);
if (input->size[input_ndim > 1] != nOutputPlane)
THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
if (input_ndim > 1) {
bs = input->size[0];
for (int d = 2; d < input_ndim; d++) {
ks *= input->size[d];
}
}
}
real *output_data = THTensor_(data)(output);
real *input_data = THTensor_(data)(input);
real *weight_data = THTensor_(data)(weight);
THIndex_t i, j, k;
#pragma omp parallel for private(j,k)
for (i = 0; i < bs; ++i)
{
real* n_input_data = input_data + i*nOutputPlane*ks;
real* n_output_data = output_data + i*nOutputPlane*ks;
for (j = 0; j < nOutputPlane; ++j)
{
for (k = 0; k < ks; ++k)
n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : weight_data[j] * n_input_data[k];
n_input_data += ks;
n_output_data += ks;
}
}
THTensor_(free)(input);
}
}
void THNN_(PReLU_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THIndex_t nOutputPlane)
{
THNN_CHECK_NELEMENT(input, gradOutput);
THTensor_(resizeAs)(gradInput, input);
if (nOutputPlane == 0)
{
real w = THTensor_(data)(weight)[0];
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
if ((*input_data) > 0)
*gradInput_data = *gradOutput_data;
else
*gradInput_data = w * (*gradOutput_data);
);
}
else
{
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
weight = THTensor_(newContiguous)(weight);
const real *input_data = THTensor_(data)(input);
const real *gradOutput_data = THTensor_(data)(gradOutput);
const real *weight_data = THTensor_(data)(weight);
real *gradInput_data = THTensor_(data)(gradInput);
int64_t bs = 1, ks = 1;
{
int64_t input_ndim = THTensor_(nDimension)(input);
if (input->size[input_ndim > 1] != nOutputPlane)
THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
if (input_ndim > 1) {
bs = input->size[0];
for (int d = 2; d < input_ndim; d++) {
ks *= input->size[d];
}
}
}
THIndex_t i, j, k;
#pragma omp parallel for private(j,k)
for (i = 0; i < bs; ++i)
{
const real *n_input_data = input_data + i*nOutputPlane*ks;
const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks;
for (j = 0; j < nOutputPlane; ++j)
{
real w = weight_data[j];
for (k = 0; k < ks; ++k)
{
if (n_input_data[k] > 0)
n_gradInput_data[k] = n_gradOutput_data[k];
else
n_gradInput_data[k] = n_gradOutput_data[k] * w;
}
n_input_data += ks;
n_gradInput_data += ks;
n_gradOutput_data += ks;
}
}
THTensor_(free)(input);
THTensor_(free)(gradOutput);
THTensor_(free)(weight);
}
}
void THNN_(PReLU_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *gradWeight,
THTensor *gradWeightBuf,
THTensor *gradWeightBuf2,
THIndex_t nOutputPlane,
accreal scale_)
{
real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
THNN_CHECK_NELEMENT(input, gradOutput);
if (nOutputPlane == 0)
{
real *gradWeight_data = THTensor_(data)(gradWeight);
real sum = 0;
TH_TENSOR_APPLY2(real, input, real, gradOutput,
if ((*input_data) <= 0)
sum += (*input_data) * (*gradOutput_data);
);
gradWeight_data[0] += scale * sum;
}
else
{
THArgCheck(THTensor_(isContiguous)(gradWeight), 6, "gradWeight needs to be contiguous");
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
weight = THTensor_(newContiguous)(weight);
int64_t bs = 1, ks = 1;
{
int64_t input_ndim = THTensor_(nDimension)(input);
if (input->size[input_ndim > 1] != nOutputPlane)
THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
if (input_ndim > 1) {
bs = input->size[0];
for (int d = 2; d < input_ndim; d++) {
ks *= input->size[d];
}
}
}
const real *input_data = THTensor_(data)(input);
const real *gradOutput_data = THTensor_(data)(gradOutput);
const real *weight_data = THTensor_(data)(weight);
real *gradWeight_data = THTensor_(data)(gradWeight);
THIndex_t i, j, k;
for (i = 0; i < bs; ++i)
{
const real *n_input_data = input_data + i*nOutputPlane*ks;
const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
for (j = 0; j < nOutputPlane; ++j)
{
real sum = 0;
for (k = 0; k < ks; ++k)
if (n_input_data[k] <= 0)
sum += n_gradOutput_data[k] * n_input_data[k];
gradWeight_data[j] += scale * sum;
n_input_data += ks;
n_gradOutput_data += ks;
}
}
THTensor_(free)(input);
THTensor_(free)(gradOutput);
THTensor_(free)(weight);
}
}
#endif