Add rank loss options to mlp
Summary: This diff adds an option to use rank loss instead of cross entropy loss during training. This assumes that the data is loaded in batches which corresponds to sessions, which is something that was implemented for RNN training
Differential Revision: D4261923
fbshipit-source-id: e92a60cc9f53acc1585ac35d1fdb430c2ebbfa33
diff --git a/caffe2/operators/rank_loss_op.cc b/caffe2/operators/rank_loss_op.cc
index 7b2b259..6c6a616 100644
--- a/caffe2/operators/rank_loss_op.cc
+++ b/caffe2/operators/rank_loss_op.cc
@@ -89,9 +89,9 @@
}
// only use sigmoid loss function at the moment
auto sign = labelData[i] > labelData[j] ? 1 : -1;
- auto grad = -sign * dYdata[0] / (1 + exp(sign * (Xdata[i] - Xdata[j])));
- dXdata[i] += grad;
- dXdata[j] -= grad;
+ auto grad = sign * dYdata[0] / (1 + exp(sign * (Xdata[j] - Xdata[i])));
+ dXdata[i] -= grad;
+ dXdata[j] += grad;
}
}
return true;
diff --git a/caffe2/python/operator_test/rank_loss_operator_test.py b/caffe2/python/operator_test/rank_loss_operator_test.py
index 4fe1712..41ec14d 100644
--- a/caffe2/python/operator_test/rank_loss_operator_test.py
+++ b/caffe2/python/operator_test/rank_loss_operator_test.py
@@ -78,7 +78,7 @@
self.assertAlmostEqual(
np.asscalar(dx[0]),
np.asscalar(-dY[0] * sign / (1 + np.exp(sign * (X[0] - X[1])))),
- delta=1e-4)
+ delta=1e-3)
self.assertEqual(np.asscalar(dx[0]), np.asscalar(-dx[1]))
delta = 1e-3