blob: 3e7d640d8d1e45b70e5b8f0a143d474d9ea1bc93 [file] [log] [blame]
#include "caffe2/operators/distance_op.h"
namespace caffe2 {
template<>
bool SquaredL2DistanceOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(0);
auto& Y = Input(1);
auto* distance = Output(0);
CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
for (int i = 0; i < X.ndim(); ++i) {
CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i));
}
int N = X.ndim() > 0 ? X.dim32(0) : 1;
distance->Resize(N);
int D = N > 0 ? X.size() / N : 0;
float* distance_data = distance->mutable_data<float>();
const float* X_data = X.data<float>();
const float* Y_data = Y.data<float>();
for (int i = 0; i < N; ++i) {
float Xscale, Yscale, cross;
math::Dot<float, CPUContext>(
D, X_data + i * D, X_data + i * D, &Xscale, &context_);
math::Dot<float, CPUContext>(
D, Y_data + i * D, Y_data + i * D, &Yscale, &context_);
math::Dot<float, CPUContext>(
D, X_data + i * D, Y_data + i * D, &cross, &context_);
distance_data[i] = (Xscale + Yscale) * 0.5 - cross;
}
return true;
}
template <>
bool L1DistanceOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(0);
auto& Y = Input(1);
auto* distance = Output(0);
CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
for (int i = 0; i < X.ndim(); ++i) {
CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i));
}
int N = X.ndim() > 0 ? X.dim32(0) : 1;
distance->Resize(N);
int D = N > 0 ? X.size() / N : 0;
const float* X_data = X.data<float>();
const float* Y_data = Y.data<float>();
for (int i = 0; i < N; ++i) {
(distance->mutable_data<float>())[i] =
(ConstEigenVectorMap<float>(X_data + i * D, D).array() -
ConstEigenVectorMap<float>(Y_data + i * D, D).array())
.abs()
.sum();
}
return true;
}
template <>
bool L1DistanceGradientOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(0);
auto& Y = Input(1);
auto& dDistance = Input(2);
auto* dX = Output(0);
auto* dY = Output(1);
CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
for (int i = 0; i < X.ndim(); ++i) {
CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i));
}
int N = X.ndim() > 0 ? X.dim32(0) : 1;
int D = N > 0 ? X.size() / N : 0;
CAFFE_ENFORCE(X.ndim() == Y.ndim());
for (int i = 0; i < X.ndim(); ++i) {
CAFFE_ENFORCE(X.dim32(i) == Y.dim32(i));
}
CAFFE_ENFORCE(dDistance.ndim() == 1);
CAFFE_ENFORCE(dDistance.dim32(0) == N);
dX->ResizeLike(X);
dY->ResizeLike(Y);
for (int i = 0; i < N; ++i) {
auto offset = i * D;
for (int j = 0; j < D; ++j) {
const float temp =
(X.data<float>())[offset + j] - (Y.data<float>())[offset + j];
const float kEps = 1e-12;
if (temp < -kEps) {
dX->mutable_data<float>()[offset + j] = -(dDistance.data<float>())[i];
dY->mutable_data<float>()[offset + j] = (dDistance.data<float>())[i];
} else if (temp > kEps) {
dX->mutable_data<float>()[offset + j] = (dDistance.data<float>())[i];
dY->mutable_data<float>()[offset + j] = -(dDistance.data<float>())[i];
} else {
dX->mutable_data<float>()[offset + j] = 0;
dY->mutable_data<float>()[offset + j] = 0;
}
}
}
return true;
}
template <>
bool CosineSimilarityOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(X_IN);
auto& Y = Input(Y_IN);
auto* result = Output(COS_OUT);
CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
for (int i = 0; i < X.ndim(); ++i) {
CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i));
}
const int N = X.ndim() > 0 ? X.dim32(0) : 1;
const int D = X.size_from_dim(1);
result->Resize(N);
float* result_data = result->mutable_data<float>();
const float* X_data = X.data<float>();
const float* Y_data = Y.data<float>();
float X2, Y2;
const float kEps = 1e-12f;
for (int i = 0; i < N; ++i) { // TODO: multithreading
auto offset = i * D;
math::Dot<float, CPUContext>(
D, X_data + offset, X_data + offset, &X2, &context_);
math::Dot<float, CPUContext>(
D, Y_data + offset, Y_data + offset, &Y2, &context_);
math::Dot<float, CPUContext>(
D, X_data + offset, Y_data + offset, result_data + i, &context_);
result_data[i] /= std::sqrt(std::max(X2, kEps) * std::max(Y2, kEps));
}
return true;
}
template <>
bool CosineSimilarityGradientOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(X_IN);
auto& Y = Input(Y_IN);
auto& dCos = Input(DER_COS_IN);
auto* dX = Output(DER_X_OUT);
auto* dY = Output(DER_Y_OUT);
const int N = X.ndim() > 0 ? X.dim32(0) : 1;
const int D = X.size_from_dim(1);
CAFFE_ENFORCE(X.ndim() == Y.ndim());
for (int i = 0; i < X.ndim(); ++i) {
CAFFE_ENFORCE(X.dim32(i) == Y.dim32(i));
}
CAFFE_ENFORCE(dCos.ndim() == 1);
CAFFE_ENFORCE(dCos.dim32(0) == N);
dX->ResizeLike(X);
dY->ResizeLike(Y);
const auto* X_data = X.template data<float>();
const auto* Y_data = Y.template data<float>();
const auto* dCos_data = dCos.template data<float>();
auto* dX_data = dX->template mutable_data<float>();
auto* dY_data = dY->template mutable_data<float>();
float XN, YN, XY;
const float kEps = 1e-12f;
for (int i = 0; i < N; ++i) { // TODO: multithreading
auto offset = i * D;
// TODO: cache these result from the forward pass
// ||x||
math::Dot<float, CPUContext>(
D, X_data + offset, X_data + offset, &XN, &context_);
XN = std::sqrt(std::max(XN, kEps));
// ||y||
math::Dot<float, CPUContext>(
D, Y_data + offset, Y_data + offset, &YN, &context_);
YN = std::sqrt(std::max(YN, kEps));
// ||x|| * || y ||
float XYN = XN * YN;
// x^Ty
math::Dot<float, CPUContext>(
D, X_data + offset, Y_data + offset, &XY, &context_);
math::Scale<float, CPUContext>(
D, dCos_data[i] / XYN, Y_data + offset, dX_data + offset, &context_);
math::Axpy(
D,
-dCos_data[i] * XY / (XN * XN * XYN),
X_data + offset,
dX_data + offset,
&context_);
math::Scale<float, CPUContext>(
D, dCos_data[i] / XYN, X_data + offset, dY_data + offset, &context_);
math::Axpy(
D,
-dCos_data[i] * XY / (YN * YN * XYN),
Y_data + offset,
dY_data + offset,
&context_);
}
return true;
}
template <>
bool DotProductOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(X_IN);
auto& Y = Input(Y_IN);
auto* result = Output(DOT_OUT);
CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
for (int i = 0; i < X.ndim(); ++i) {
CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i), "dimension at ", i);
}
int N, D;
if (X.size() > 0) {
N = X.ndim() > 0 ? X.dim32(0) : 1;
D = X.size() / N;
} else {
N = 0;
D = 0;
}
result->Resize(N);
float* result_data = result->template mutable_data<float>();
const float* X_data = X.template data<float>();
const float* Y_data = Y.template data<float>();
for (int i = 0; i < N; ++i) { // TODO: multithreading
auto offset = i * D;
math::Dot<float, CPUContext>(
D, X_data + offset, Y_data + offset, result_data + i, &context_);
}
return true;
}
template <>
bool DotProductGradientOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(X_IN);
auto& Y = Input(Y_IN);
auto& dDot = Input(DER_DOT_IN);
auto* dX = Output(DER_X_OUT);
auto* dY = Output(DER_Y_OUT);
int N, D;
if (X.size() > 0) {
N = X.ndim() > 0 ? X.dim32(0) : 1;
D = X.size() / N;
} else {
N = 0;
D = 0;
}
CAFFE_ENFORCE(X.ndim() == Y.ndim());
for (int i = 0; i < X.ndim(); ++i) {
CAFFE_ENFORCE(X.dim32(i) == Y.dim32(i));
}
CAFFE_ENFORCE(dDot.ndim() == 1);
CAFFE_ENFORCE(dDot.dim32(0) == N);
dX->ResizeLike(X);
dY->ResizeLike(Y);
const auto* X_data = X.template data<float>();
const auto* Y_data = Y.template data<float>();
const auto* dDot_data = dDot.template data<float>();
auto* dX_data = dX->template mutable_data<float>();
auto* dY_data = dY->template mutable_data<float>();
for (int i = 0; i < N; ++i) { // TODO: multithreading
auto offset = i * D;
math::Scale<float, CPUContext>(
D, dDot_data[i], X_data + offset, dY_data + offset, &context_);
math::Scale<float, CPUContext>(
D, dDot_data[i], Y_data + offset, dX_data + offset, &context_);
}
return true;
}
template <>
bool DotProductWithPaddingOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(X_IN);
auto& Y = Input(Y_IN);
auto* result = Output(DOT_OUT);
CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
CAFFE_ENFORCE_EQ(X.dim32(0), Y.dim32(0));
int N, D, DX, DY, restD;
if (X.size() > 0) {
N = X.ndim() > 0 ? X.dim32(0) : 1;
DX = X.size() / N;
DY = Y.size() / N;
} else {
N = 0;
DX = 0;
DY = 0;
}
D = std::min(DX, DY);
restD = std::max(DX, DY) - D;
result->Resize(N);
float* result_data = result->mutable_data<float>();
const float* X_data = X.data<float>();
const float* Y_data = Y.data<float>();
for (int i = 0; i < N; ++i) { // TODO: multithreading
auto offsetX = i * DX, offsetY = i * DY;
if (replicate_) {
// L_ for longer vector and S_ for shorter vector
const float *L_data, *S_data;
int DL, DS;
if (DX > DY) {
L_data = X_data + offsetX;
S_data = Y_data + offsetY;
DL = DX;
DS = DY;
} else {
L_data = Y_data + offsetY;
S_data = X_data + offsetX;
DL = DY;
DS = DX;
}
float sum = 0.0;
float tmp = 0.0;
for (int j = 0; j < DL / DS; j++) {
math::Dot<float, CPUContext>(
DS, L_data + j * DS, S_data, &tmp, &context_);
sum += tmp;
}
*(result_data + i) = sum;
} else {
math::Dot<float, CPUContext>(
D, X_data + offsetX, Y_data + offsetY, result_data + i, &context_);
}
if (!replicate_ && DX != DY) {
const float* rest_data;
float rest_sum = 0;
if (DX > DY) {
rest_data = X_data + offsetX + D;
} else {
rest_data = Y_data + offsetY + D;
}
math::Sum<float, CPUContext>(restD, rest_data, &rest_sum, &context_);
result_data[i] += rest_sum * pad_value_;
}
}
return true;
}
// L2
REGISTER_CPU_OPERATOR(SquaredL2Distance,
SquaredL2DistanceOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(SquaredL2DistanceGradient,
SquaredL2DistanceGradientOp<float, CPUContext>);
OPERATOR_SCHEMA(SquaredL2Distance)
.NumInputs(2)
.NumOutputs(1)
.IdenticalTypeAndShapeOfInputDim(0, 0)
.SetDoc(R"DOC(
Given two input float tensors X, Y, and produces one output float tensor
of the L2 difference between X and Y that is computed as ||(X - Y)^2 / 2||.
)DOC")
.Input(0, "X", "1D or 2D input tensor")
.Input(1, "Y", "1D or 2D input tensor (must have the same shape as X)")
.Output(0, "Z", "1D output tensor");
OPERATOR_SCHEMA(SquaredL2DistanceGradient).NumInputs(3).NumOutputs(2);
class GetSquaredL2DistanceGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
"SquaredL2DistanceGradient", "",
vector<string>{I(0), I(1), GO(0)},
vector<string>{GI(0), GI(1)});
}
};
REGISTER_GRADIENT(SquaredL2Distance, GetSquaredL2DistanceGradient);
// L1
REGISTER_CPU_OPERATOR(L1Distance, L1DistanceOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(
L1DistanceGradient,
L1DistanceGradientOp<float, CPUContext>);
OPERATOR_SCHEMA(L1Distance)
.NumInputs(2)
.NumOutputs(1)
.IdenticalTypeAndShapeOfInputDim(0, 0)
.SetDoc(R"DOC(
Given two input float tensors X, Y, and produces one output float tensor
of the L1 difference between X and Y, computed as L1(x,y) = sum over |x-y|
)DOC")
.Input(0, "X", "1D or 2D input tensor")
.Input(1, "Y", "1D or 2D input tensor (must have the same shape as X)")
.Output(0, "Z", "1D output tensor");
OPERATOR_SCHEMA(L1DistanceGradient).NumInputs(3).NumOutputs(2);
class GetL1DistanceGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
"L1DistanceGradient",
"",
vector<string>{I(0), I(1), GO(0)},
vector<string>{GI(0), GI(1)});
}
};
REGISTER_GRADIENT(L1Distance, GetL1DistanceGradient);
// Dot Product
REGISTER_CPU_OPERATOR(DotProduct, DotProductOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(
DotProductGradient,
DotProductGradientOp<float, CPUContext>);
OPERATOR_SCHEMA(DotProduct)
.NumInputs(2)
.NumOutputs(1)
.IdenticalTypeAndShapeOfInputDim(0, 0)
.SetDoc(R"DOC(
Given two input float tensors X, Y, and produces one output float tensor
of the dot product between X and Y.
)DOC")
.Input(0, "X", "1D or 2D input tensor")
.Input(1, "Y", "1D or 2D input tensor (must have the same shape as X)")
.Output(0, "Z", "1D output tensor");
OPERATOR_SCHEMA(DotProductGradient).NumInputs(3).NumOutputs(2);
class GetDotProductGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
"DotProductGradient",
"",
vector<string>{I(0), I(1), GO(0)},
vector<string>{GI(0), GI(1)});
}
};
REGISTER_GRADIENT(DotProduct, GetDotProductGradient);
// Cosine Similarity
REGISTER_CPU_OPERATOR(CosineSimilarity, CosineSimilarityOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(
CosineSimilarityGradient,
CosineSimilarityGradientOp<float, CPUContext>);
OPERATOR_SCHEMA(CosineSimilarity)
.NumInputs(2)
.NumOutputs(1)
.IdenticalTypeAndShapeOfInputDim(0, 0)
.SetDoc(R"DOC(
Given two input float tensors X, Y, and produces one output float tensor
of the cosine similarity between X and Y.
)DOC")
.Input(0, "X", "1D or 2D input tensor")
.Input(1, "Y", "1D or 2D input tensor (must have the same shape as X)")
.Output(0, "Z", "1D output tensor");
OPERATOR_SCHEMA(CosineSimilarityGradient).NumInputs(3).NumOutputs(2);
class GetCosineSimilarityGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
"CosineSimilarityGradient",
"",
vector<string>{I(0), I(1), GO(0)},
vector<string>{GI(0), GI(1)});
}
};
REGISTER_GRADIENT(CosineSimilarity, GetCosineSimilarityGradient);
// Dot Product allows padding
REGISTER_CPU_OPERATOR(
DotProductWithPadding,
DotProductWithPaddingOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(
DotProductWithPaddingGradient,
DotProductWithPaddingGradientOp<float, CPUContext>);
OPERATOR_SCHEMA(DotProductWithPadding)
.NumInputs(2)
.NumOutputs(1)
.SetDoc(R"DOC(
Given two input float tensors X, Y with different shapes and produces one
output float tensor of the dot product between X and Y. We currently support
two kinds of strategies to achieve this. Before doing normal dot_product 1)
pad the smaller tensor (using pad_value) to the same shape as the other one.
2) replicate the smaller tensor to the same shape as the other one. Note the
first dimension of X, Y must be equal. Only the second dimension of X or Y
can be padded.
)DOC")
.Input(0, "X", "1D or 2D input tensor")
.Input(1, "Y", "1D or 2D input tensor")
.Output(0, "Z", "1D output tensor")
.IdenticalTypeAndShapeOfInputDim(0, 0)
.Arg("pad_value", "the padding value for tensors with smaller dimension")
.Arg("replicate", "wehther to replicate the smaller tensor or not");
OPERATOR_SCHEMA(DotProductWithPaddingGradient).NumInputs(3).NumOutputs(2);
class GetDotProductWithPaddingGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
float pad_value = 0;
bool replicate = false;
if (ArgumentHelper::HasArgument(Def(), "pad_value")) {
pad_value = GetArgument(Def(), "pad_value").f();
}
if (ArgumentHelper::HasArgument(Def(), "replicate")) {
replicate = GetArgument(Def(), "replicate").i();
}
const auto dot_arg =
vector<Argument>{MakeArgument<float>("pad_value", pad_value),
MakeArgument<bool>("replicate", replicate)};
return SingleGradientDef(
"DotProductWithPaddingGradient",
"",
vector<string>{I(0), I(1), GO(0)},
vector<string>{GI(0), GI(1)},
dot_arg);
}
};
REGISTER_GRADIENT(DotProductWithPadding, GetDotProductWithPaddingGradient);
} // namespace caffe2