fixed nits, need to add attrs to matmul grad
diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc
index 6f3dde0..4c70435 100644
--- a/tensorflow/c/eager/c_api_test_util.cc
+++ b/tensorflow/c/eager/c_api_test_util.cc
@@ -102,7 +102,7 @@
return th;
}
-TFE_TensorHandle* TestMatrixTensorHandleFloat(TFE_Context* ctx, float data[], int64_t dims [], int num_dims){
+TFE_TensorHandle* TestTensorHandleWithDimsFloat(TFE_Context* ctx, float data[], int64_t dims [], int num_dims){
TF_Status* status = TF_NewStatus();
TF_Tensor* t = TFE_AllocateHostTensor(ctx, TF_FLOAT, &dims[0],
num_dims, status);
@@ -114,7 +114,7 @@
return th;
}
-TFE_TensorHandle* TestMatrixTensorHandleInt(TFE_Context* ctx, int data[], int64_t dims [], int num_dims){
+TFE_TensorHandle* TestTensorHandleWithDimsInt(TFE_Context* ctx, int data[], int64_t dims [], int num_dims){
TF_Status* status = TF_NewStatus();
TF_Tensor* t = TFE_AllocateHostTensor(ctx, TF_INT32, &dims[0],
num_dims, status);
diff --git a/tensorflow/c/eager/c_api_test_util.h b/tensorflow/c/eager/c_api_test_util.h
index c998ab2..76d8f5c 100644
--- a/tensorflow/c/eager/c_api_test_util.h
+++ b/tensorflow/c/eager/c_api_test_util.h
@@ -41,10 +41,10 @@
int num_dims);
// Get a Matrix TensorHandle with given float values and dimensions
-TFE_TensorHandle* TestMatrixTensorHandleFloat(TFE_Context* ctx, float data[], int64_t dims [], int num_dims);
+TFE_TensorHandle* TestTensorHandleWithDimsFloat(TFE_Context* ctx, float data[], int64_t dims [], int num_dims);
// Get a Matrix TensorHandle with given int values and dimensions
-TFE_TensorHandle* TestMatrixTensorHandleInt(TFE_Context* ctx, int data[], int64_t dims [], int num_dims);
+TFE_TensorHandle* TestTensorHandleWithDimsInt(TFE_Context* ctx, int data[], int64_t dims [], int num_dims);
// Return a tensor handle containing a 100x100 matrix of floats
diff --git a/tensorflow/c/eager/mnist_gradients_test.cc b/tensorflow/c/eager/mnist_gradients_test.cc
index 7d72c2a..d4dc14b 100644
--- a/tensorflow/c/eager/mnist_gradients_test.cc
+++ b/tensorflow/c/eager/mnist_gradients_test.cc
@@ -51,23 +51,8 @@
}
// ========================= Test Util Functions ==============================
-void printArr(float data[], int n) {
- std::cout << std::endl << "[";
- for (int i = 0; i < n - 1; i++) {
- std::cout << data[i] << ", ";
- }
- std::cout << data[n - 1] << "]" << std::endl;
-}
-float sumArr(float data[], int n) {
- float sum = 0;
- for (int i = 0; i < n; i++) {
- sum += data[i];
- }
- return sum;
-}
-
-// Get a scalar TensorHandle woth given value
+// Get a scalar TensorHandle with given value
Status TestScalarTensorHandle(AbstractContext* ctx, float value,
AbstractTensorHandle** tensor) {
std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
@@ -82,7 +67,7 @@
}
// Get a Matrix TensorHandle with given float values and dimensions
-Status TestMatrixTensorHandleFloat(AbstractContext* ctx, float data[],
+Status TestTensorHandleWithDimsFloat(AbstractContext* ctx, float data[],
int64_t dims[], int num_dims,
AbstractTensorHandle** tensor) {
std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
@@ -91,14 +76,14 @@
TF_ExecutionContextGetTFEContext(wrap(ctx), status.get());
TF_RETURN_IF_ERROR(StatusFromTF_Status(status.get()));
TFE_TensorHandle* input_eager =
- TestMatrixTensorHandleFloat(eager_ctx, data, dims, num_dims);
+ TestTensorHandleWithDimsFloat(eager_ctx, data, dims, num_dims);
*tensor =
unwrap(TF_CreateAbstractTensorFromEagerTensor(input_eager, status.get()));
return Status::OK();
}
// Get a Matrix TensorHandle with given int values and dimensions
-Status TestMatrixTensorHandleInt(AbstractContext* ctx, int data[],
+Status TestTensorHandleWithDimsInt(AbstractContext* ctx, int data[],
int64_t dims[], int num_dims,
AbstractTensorHandle** tensor) {
std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
@@ -107,13 +92,13 @@
TF_ExecutionContextGetTFEContext(wrap(ctx), status.get());
TF_RETURN_IF_ERROR(StatusFromTF_Status(status.get()));
TFE_TensorHandle* input_eager =
- TestMatrixTensorHandleInt(eager_ctx, data, dims, num_dims);
+ TestTensorHandleWithDimsInt(eager_ctx, data, dims, num_dims);
*tensor =
unwrap(TF_CreateAbstractTensorFromEagerTensor(input_eager, status.get()));
return Status::OK();
}
-Status getValue(AbstractTensorHandle* t, TF_Tensor** result_tensor) {
+Status GetValue(AbstractTensorHandle* t, TF_Tensor** result_tensor) {
std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
TF_NewStatus(), TF_DeleteStatus);
TFE_TensorHandle* result_t =
@@ -123,104 +108,29 @@
return Status::OK();
}
-AbstractTensorHandlePtr getMatrixTensorHandleUtilFloat(AbstractContext* ctx,
+AbstractTensorHandlePtr GetTensorHandleUtilFloat(AbstractContext* ctx,
float vals[],
int64_t dims[],
int num_dims) {
AbstractTensorHandlePtr A;
AbstractTensorHandle* a_raw = nullptr;
- Status s = TestMatrixTensorHandleFloat(ctx, vals, dims, num_dims, &a_raw);
+ Status s = TestTensorHandleWithDimsFloat(ctx, vals, dims, num_dims, &a_raw);
A.reset(a_raw);
return A;
}
-AbstractTensorHandlePtr getMatrixTensorHandleUtilInt(AbstractContext* ctx,
+AbstractTensorHandlePtr GetTensorHandleUtilInt(AbstractContext* ctx,
int vals[], int64_t dims[],
int num_dims) {
AbstractTensorHandlePtr A;
AbstractTensorHandle* a_raw = nullptr;
- Status s = TestMatrixTensorHandleInt(ctx, vals, dims, num_dims, &a_raw);
+ Status s = TestTensorHandleWithDimsInt(ctx, vals, dims, num_dims, &a_raw);
A.reset(a_raw);
return A;
}
-void printTensor(AbstractTensorHandle* t, int size) {
- TF_Tensor* tensor;
- Status s = getValue(t, &tensor);
- ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-
- float result_data[size] = {0};
- memcpy(&result_data[0], TF_TensorData(tensor), TF_TensorByteSize(tensor));
- printArr(result_data, size);
-
- TF_DeleteTensor(tensor);
-}
-
// =========================== Start Tests ================================
-TEST_P(CppGradients, TestAddGrad) {
- std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
- TF_NewStatus(), TF_DeleteStatus);
- AbstractContextPtr ctx;
- {
- AbstractContext* ctx_raw = nullptr;
- Status s =
- BuildImmediateExecutionContext(std::get<1>(GetParam()), &ctx_raw);
- ASSERT_EQ(errors::OK, s.code()) << s.error_message();
- ctx.reset(ctx_raw);
- }
-
- AbstractTensorHandlePtr x;
- {
- AbstractTensorHandle* x_raw = nullptr;
- Status s = TestScalarTensorHandle(ctx.get(), 2.0f, &x_raw);
- ASSERT_EQ(errors::OK, s.code()) << s.error_message();
- x.reset(x_raw);
- }
-
- AbstractTensorHandlePtr y;
- {
- AbstractTensorHandle* y_raw = nullptr;
- Status s = TestScalarTensorHandle(ctx.get(), 2.0f, &y_raw);
- ASSERT_EQ(errors::OK, s.code()) << s.error_message();
- y.reset(y_raw);
- }
-
- GradientRegistry registry;
- Status s = RegisterGradients(®istry);
- ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-
- /* Pseudo-code:
- *
- * tape.watch(x)
- * tape.watch(y)
- * y = x + y
- * outputs = tape.gradient(y, [x, y])
- */
-
- std::vector<AbstractTensorHandle*> outputs(2);
- s = RunModel(AddGradModel, ctx.get(), {x.get(), y.get()},
- absl::MakeSpan(outputs),
- /*use_function=*/!std::get<2>(GetParam()), registry);
- ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-
- TF_Tensor* result_tensor;
- s = getValue(outputs[0], &result_tensor);
- ASSERT_EQ(errors::OK, s.code()) << s.error_message();
- auto result_value = static_cast<float*>(TF_TensorData(result_tensor));
- EXPECT_EQ(*result_value, 1.0);
- outputs[0]->Unref();
- TF_DeleteTensor(result_tensor);
- result_tensor = nullptr;
-
- s = getValue(outputs[1], &result_tensor);
- ASSERT_EQ(errors::OK, s.code()) << s.error_message();
- result_value = static_cast<float*>(TF_TensorData(result_tensor));
- EXPECT_EQ(*result_value, 1.0);
- outputs[1]->Unref();
- TF_DeleteTensor(result_tensor);
-}
-
TEST_P(CppGradients, TestMatMulGrad) {
std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
TF_NewStatus(), TF_DeleteStatus);
@@ -240,9 +150,9 @@
int num_dims = 2;
AbstractTensorHandlePtr A =
- getMatrixTensorHandleUtilFloat(ctx.get(), A_vals, A_dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), A_vals, A_dims, num_dims);
AbstractTensorHandlePtr B =
- getMatrixTensorHandleUtilFloat(ctx.get(), B_vals, B_dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), B_vals, B_dims, num_dims);
GradientRegistry registry;
Status s = RegisterGradients(®istry);
@@ -263,7 +173,7 @@
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
TF_Tensor* dA_tensor;
- s = getValue(outputs[0], &dA_tensor);
+ s = GetValue(outputs[0], &dA_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
float result_data[4] = {0};
@@ -277,7 +187,7 @@
}
TF_Tensor* dB_tensor;
- s = getValue(outputs[1], &dB_tensor);
+ s = GetValue(outputs[1], &dB_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
memcpy(&result_data[0], TF_TensorData(dB_tensor),
@@ -309,24 +219,24 @@
int64_t dims[] = {2, 2};
int num_dims = 2;
AbstractTensorHandlePtr X =
- getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), X_vals, dims, num_dims);
// W1 = first weights
float W1_vals[] = {-1.0f, 10.0f, .5f, 1.0f};
AbstractTensorHandlePtr W1 =
- getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
// W2 = second weights
float W2_vals[] = {.1f, .2f, .3f, -.5f};
AbstractTensorHandlePtr W2 =
- getMatrixTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
// y = labels
int y_vals[] = {1, 1};
int64_t dims_y[] = {2};
num_dims = sizeof(dims_y) / sizeof(dims_y[0]);
AbstractTensorHandlePtr y =
- getMatrixTensorHandleUtilInt(ctx.get(), y_vals, dims, num_dims);
+ GetTensorHandleUtilInt(ctx.get(), y_vals, dims, num_dims);
GradientRegistry registry;
@@ -340,7 +250,7 @@
// Verify the Results
TF_Tensor* scores_tensor;
- s = getValue(outputs[0], &scores_tensor);
+ s = GetValue(outputs[0], &scores_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
float result_data[4] = {0};
@@ -354,7 +264,7 @@
}
TF_Tensor* loss_vals_tensor;
- s = getValue(outputs[1], &loss_vals_tensor);
+ s = GetValue(outputs[1], &loss_vals_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
memcpy(&result_data[0], TF_TensorData(loss_vals_tensor),
@@ -385,25 +295,25 @@
int64_t X_dims[] = {3, 2};
int num_dims = 2;
AbstractTensorHandlePtr X =
- getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
// W1 = first weights
float W1_vals[] = {-1.0f, 10.0f, .5f, 1.0f};
int64_t dims[] = {2, 2};
AbstractTensorHandlePtr W1 =
- getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
// W2 = second weights
float W2_vals[] = {.1f, .2f, .3f, -.5f};
AbstractTensorHandlePtr W2 =
- getMatrixTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
// y = labels
int y_vals[] = {1, 1, 1};
int64_t y_dims[] = {3};
num_dims = sizeof(y_dims) / sizeof(y_dims[0]);
AbstractTensorHandlePtr y =
- getMatrixTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
+ GetTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
GradientRegistry registry;
@@ -417,7 +327,7 @@
// Verify the Results
TF_Tensor* scores_tensor;
- s = getValue(outputs[0], &scores_tensor);
+ s = GetValue(outputs[0], &scores_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
float result_data[6] = {0};
@@ -431,7 +341,7 @@
}
TF_Tensor* loss_vals_tensor;
- s = getValue(outputs[1], &loss_vals_tensor);
+ s = GetValue(outputs[1], &loss_vals_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
memcpy(&result_data[0], TF_TensorData(loss_vals_tensor),
@@ -465,13 +375,13 @@
int64_t X_dims[] = {2, 3};
int num_dims = 2;
AbstractTensorHandlePtr X =
- getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
// W1 = first weights
float W1_vals[] = {1.0f, 2.0f, 3.0f, 4.0f};
int64_t dims[] = {2, 2};
AbstractTensorHandlePtr W1 =
- getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
GradientRegistry registry;
@@ -486,7 +396,7 @@
// Verify the Results
TF_Tensor* scores_tensor;
- s = getValue(outputs[0], &scores_tensor);
+ s = GetValue(outputs[0], &scores_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
float result_data[6] = {0};
@@ -518,7 +428,7 @@
int64_t X_dims[] = {3, 3};
int num_dims = 2;
AbstractTensorHandlePtr X =
- getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
GradientRegistry registry;
Status s = RegisterGradients(®istry);
@@ -536,7 +446,7 @@
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
TF_Tensor* dX_tensor;
- s = getValue(outputs[0], &dX_tensor);
+ s = GetValue(outputs[0], &dX_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
float result_data[9] = {0};
@@ -571,14 +481,14 @@
int64_t X_dims[] = {3, 3};
int num_dims = 2;
AbstractTensorHandlePtr X =
- getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
// y = labels
int y_vals[] = {1, 0, 1};
int64_t y_dims[] = {3};
num_dims = sizeof(y_dims) / sizeof(y_dims[0]);
AbstractTensorHandlePtr y =
- getMatrixTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
+ GetTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
GradientRegistry registry;
Status s = RegisterGradients(®istry);
@@ -602,7 +512,7 @@
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
TF_Tensor* dX_tensor;
- s = getValue(outputs[0], &dX_tensor);
+ s = GetValue(outputs[0], &dX_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
float result_data[9] = {0};
@@ -638,25 +548,25 @@
int64_t X_dims[] = {2, 2};
int num_dims = 2;
AbstractTensorHandlePtr X =
- getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
// W1 = first weights
float W1_vals[] = {-1.0f, 10.0f, .5f, 1.0f};
int64_t dims[] = {2, 2};
AbstractTensorHandlePtr W1 =
- getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
// W2 = second weights
float W2_vals[] = {.1f, .2f, .3f, -.5f};
AbstractTensorHandlePtr W2 =
- getMatrixTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
// y = labels
int y_vals[] = {1, 1};
int64_t y_dims[] = {2};
num_dims = sizeof(y_dims) / sizeof(y_dims[0]);
AbstractTensorHandlePtr y =
- getMatrixTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
+ GetTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
// Register Grads
GradientRegistry registry;
@@ -684,7 +594,7 @@
float tolerance = 1e-3;
TF_Tensor* dW1_tensor;
- s = getValue(outputs[0], &dW1_tensor);
+ s = GetValue(outputs[0], &dW1_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
float result_data[4] = {0};
@@ -698,7 +608,7 @@
}
TF_Tensor* dW2_tensor;
- s = getValue(outputs[1], &dW2_tensor);
+ s = GetValue(outputs[1], &dW2_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
memcpy(&result_data[0], TF_TensorData(dW2_tensor),
@@ -742,7 +652,7 @@
int num_dims = 2;
AbstractTensorHandlePtr A =
- getMatrixTensorHandleUtilFloat(ctx.get(), A_vals, A_dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), A_vals, A_dims, num_dims);
GradientRegistry registry;
std::vector<AbstractTensorHandle*> outputs(1);
@@ -752,7 +662,7 @@
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
TF_Tensor* dA_tensor;
- s = getValue(outputs[0], &dA_tensor);
+ s = GetValue(outputs[0], &dA_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
float result_data[4] = {0};
@@ -787,25 +697,25 @@
int64_t X_dims[] = {2, 2};
int num_dims = 2;
AbstractTensorHandlePtr X =
- getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
// W1 = first weights
float W1_vals[] = {-.01f, 0.4f, 0.5f, -.2f};
int64_t dims[] = {2, 2};
AbstractTensorHandlePtr W1 =
- getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
// W2 = second weights
float W2_vals[] = {.1f, .2f, .3f, -.5f};
AbstractTensorHandlePtr W2 =
- getMatrixTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
+ GetTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
// y = labels
int y_vals[] = {1, 1};
int64_t y_dims[] = {2};
num_dims = sizeof(y_dims) / sizeof(y_dims[0]);
AbstractTensorHandlePtr y =
- getMatrixTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
+ GetTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
// Register Grads
GradientRegistry registry;
@@ -817,9 +727,9 @@
weights.push_back(W1.get());
weights.push_back(W2.get());
- // Set learning rate to be 1e-3
+ // Set learning rate to be 1e-1
AbstractTensorHandle* learning_rate = nullptr;
- s = TestScalarTensorHandle(ctx.get(), -1e-2, &learning_rate);
+ s = TestScalarTensorHandle(ctx.get(), 1e-1, &learning_rate);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
// Train
@@ -827,8 +737,7 @@
std::vector<AbstractTensorHandle*> mnist_outputs(3);
std::vector<AbstractTensorHandle*> grads(2);
for (int i = 0; i < num_iters; i++) {
- std::cout << "iter " << i << ": " << std::endl;
-
+
// Run Forward Pass
s = RunModel(MNISTGradModel, ctx.get(),
{X.get(), weights[0], weights[1], y.get()},
@@ -844,23 +753,11 @@
s = UpdateWeights(ctx.get(), grads, weights, learning_rate);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
- // Print Loss
- AbstractTensorHandle* loss_vals = mnist_outputs[2];
- TF_Tensor* loss_tensor;
- s = getValue(loss_vals, &loss_tensor);
- ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-
- float result_data[2] = {0};
- memcpy(&result_data[0], TF_TensorData(loss_tensor),
- TF_TensorByteSize(loss_tensor));
- std::cout << " loss = " << sumArr(result_data, 2) << std::endl;
- std::cout << "-----------------" << std::endl;
- TF_DeleteTensor(loss_tensor);
}
- grads[0]->Unref();
- grads[1]->Unref();
- mnist_outputs[2]->Unref();
+ grads[0]->Unref(); // release W1_grad
+ grads[1]->Unref(); // release W2_grad
+ mnist_outputs[2]->Unref(); // release loss
}
// TODO(b/160888630): Enable this test with mlir after AddInputList is
diff --git a/tensorflow/c/eager/mnist_gradients_util.cc b/tensorflow/c/eager/mnist_gradients_util.cc
index a85ae27..3fb8e9f 100644
--- a/tensorflow/c/eager/mnist_gradients_util.cc
+++ b/tensorflow/c/eager/mnist_gradients_util.cc
@@ -30,6 +30,9 @@
#include "tensorflow/c/tf_tensor.h"
#include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h"
+
+// ========================== Tape Ops ==============================
+
// Computes `inputs[0] + inputs[1]` and records it on the tape.
Status Add(AbstractContext* ctx, Tape* tape,
absl::Span<AbstractTensorHandle* const> inputs,
@@ -71,8 +74,8 @@
TF_RETURN_IF_ERROR(AddInput(matmul_op.get(), inputs[0], &forward_op));
TF_RETURN_IF_ERROR(AddInput(matmul_op.get(), inputs[1], &forward_op));
- matmul_op->SetAttrBool("transpose_a",transpose_a);
- matmul_op->SetAttrBool("transpose_b",transpose_b);
+ TF_RETURN_IF_ERROR(tensorflow::gradients::internal::SetAttrBool(matmul_op.get(), "transpose_a", transpose_a, &forward_op));
+ TF_RETURN_IF_ERROR(tensorflow::gradients::internal::SetAttrBool(matmul_op.get(), "transpose_b", transpose_b, &forward_op));
int num_retvals = 1;
return Execute(matmul_op.get(), ctx, outputs, &num_retvals, &forward_op, tape,
@@ -351,10 +354,6 @@
source_tensors_that_are_targets,
/*output_gradients=*/{}, &out_grads));
- // for (auto sm_output : sm_outputs) {
- // sm_output->Unref();
- // }
-
outputs[0] = out_grads[0];
outputs[1] = out_grads[1];
delete tape;
@@ -451,9 +450,9 @@
AbstractTensorHandle* learning_rate) {
/* Update weights one by one using gradient update rule:
*
- * w += lr*grad[w]
+ * w -= lr*grad[w]
*
- * NOTE: assuming learning rate is already negative
+ * NOTE: assuming learning rate is positive
*/
Status s;
@@ -461,6 +460,11 @@
std::vector<AbstractTensorHandle*> temp_outputs(1);
std::string update_str;
+ // Negate learning rate for gradient descent
+ TF_RETURN_IF_ERROR(ops::Neg(ctx, {learning_rate}, absl::MakeSpan(temp_outputs),
+ "neg_lr")); // Compute -lr
+ learning_rate = temp_outputs[0];
+
for (int i = 0; i < num_grads; i++) {
// Compute dW = -lr * grad(w[i])
update_str = "update_mul_" + std::to_string(i);
@@ -559,3 +563,4 @@
TFE_DeleteContextOptions(opts);
return Status::OK();
}
+
diff --git a/tensorflow/c/eager/mnist_gradients_util.h b/tensorflow/c/eager/mnist_gradients_util.h
index f0975c7..0b705f2 100644
--- a/tensorflow/c/eager/mnist_gradients_util.h
+++ b/tensorflow/c/eager/mnist_gradients_util.h
@@ -32,7 +32,7 @@
using namespace tensorflow::gradients;
using namespace tensorflow::gradients::internal;
-// ========================== tape ==============================
+// ========================== Tape Ops ==============================
// Computes `inputs[0] + inputs[1]` and records it on the tape.
Status Add(AbstractContext* ctx, Tape* tape,
diff --git a/tensorflow/c/experimental/gradients/math_grad.cc b/tensorflow/c/experimental/gradients/math_grad.cc
index b64ac99..558cb70 100644
--- a/tensorflow/c/experimental/gradients/math_grad.cc
+++ b/tensorflow/c/experimental/gradients/math_grad.cc
@@ -12,182 +12,6 @@
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-// #include "tensorflow/c/experimental/gradients/math_grad.h"
-
-// #include "tensorflow/c/eager/abstract_tensor_handle.h"
-// #include "tensorflow/c/experimental/ops/array_ops.h"
-// #include "tensorflow/c/experimental/ops/math_ops.h"
-
-// using std::vector;
-// using tensorflow::ops::Conj;
-// using tensorflow::ops::Identity;
-// using tensorflow::ops::Mul;
-// using tensorflow::ops::MatMul;
-// using tensorflow::ops::ReluGrad;
-// using tensorflow::ops::SparseSoftmaxCrossEntropyLoss;
-
-// namespace tensorflow {
-// namespace gradients {
-// namespace {
-
-// class AddGradientFunction : public GradientFunction {
-// public:
-// Status Compute(Context* ctx,
-// absl::Span<AbstractTensorHandle* const> grad_inputs,
-// vector<AbstractTensorHandle*>* grad_outputs) override {
-// grad_outputs->resize(2);
-// vector<AbstractTensorHandle*> identity_outputs(1);
-// // TODO(b/145674566): Handle name unification in tracing code.
-// // TODO(b/161805092): Support broadcasting.
-
-// std::string name = "Identity_A_" + std::to_string(counter);
-// TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {grad_inputs[0]},
-// absl::MakeSpan(identity_outputs),
-// name.c_str()));
-// (*grad_outputs)[0] = identity_outputs[0];
-
-// name = "Identity_B_" + std::to_string(counter);
-// TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {grad_inputs[0]},
-// absl::MakeSpan(identity_outputs),
-// name.c_str()));
-// (*grad_outputs)[1] = identity_outputs[0];
-
-// counter += 1;
-// return Status::OK();
-// }
-// ~AddGradientFunction() override {}
-
-// private:
-// long counter;
-// };
-
-
-
-// class MatMulGradientFunction : public GradientFunction {
-// public:
-// explicit MatMulGradientFunction(std::vector<AbstractTensorHandle*> f_inputs)
-// : forward_inputs(f_inputs) {}
-
-// Status Compute(Context* ctx,
-// absl::Span<AbstractTensorHandle* const> grad_inputs,
-// std::vector<AbstractTensorHandle*>* grad_outputs) override {
-// /* Given upstream grad U and a matmul op A*B, the gradients are:
-// *
-// * dA = U * B.T
-// * dB = A.T * U
-// *
-// * where A.T means `transpose(A)`
-// */
-
-// AbstractTensorHandle* upstream_grad = grad_inputs[0];
-// grad_outputs->resize(2);
-// std::vector<AbstractTensorHandle*> matmul_outputs(1);
-
-// // Gradient for A
-// std::string name = "mm_A_" + std::to_string(counter);
-// TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, forward_inputs[1]},
-// absl::MakeSpan(matmul_outputs), name.c_str(),
-// /*transpose_a = */ false,
-// /*transpose_b = */ true));
-
-// (*grad_outputs)[0] = matmul_outputs[0];
-
-// // Gradient for B
-// name = "mm_B_" + std::to_string(counter);
-// TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {forward_inputs[0], upstream_grad},
-// absl::MakeSpan(matmul_outputs), name.c_str(),
-// /*transpose_a = */ true,
-// /*transpose_b = */ false));
-
-// (*grad_outputs)[1] = matmul_outputs[0];
-
-// counter += 1; // update counter for names
-// return Status::OK();
-// }
-// ~MatMulGradientFunction() override {}
-
-// private:
-// long counter;
-// std::vector<AbstractTensorHandle*> forward_inputs;
-// };
-
-// class ReluGradientFunction : public GradientFunction {
-// public:
-// explicit ReluGradientFunction(std::vector<AbstractTensorHandle*> f_inputs)
-// : forward_inputs(f_inputs) {}
-
-// Status Compute(Context* ctx,
-// absl::Span<AbstractTensorHandle* const> grad_inputs,
-// std::vector<AbstractTensorHandle*>* grad_outputs) override {
-// AbstractTensorHandle* upstream_grad = grad_inputs[0];
-// AbstractTensorHandle* input_features = forward_inputs[0];
-// grad_outputs->resize(1);
-// std::vector<AbstractTensorHandle*> relugrad_outputs(1);
-
-// // Calculate Grad
-// std::string name = "relu_grad" + std::to_string(counter);
-
-// TF_RETURN_IF_ERROR(ReluGrad(ctx->ctx, {upstream_grad, input_features},
-// absl::MakeSpan(relugrad_outputs),
-// name.c_str()));
-
-// (*grad_outputs)[0] = relugrad_outputs[0];
-
-// counter += 1;
-// return Status::OK();
-// }
-// ~ReluGradientFunction() override {}
-
-// private:
-// long counter;
-// std::vector<AbstractTensorHandle*> forward_inputs;
-// };
-
-// class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction {
-// public:
-// explicit SparseSoftmaxCrossEntropyLossGradientFunction(
-// std::vector<AbstractTensorHandle*> f_inputs,
-// std::vector<AbstractTensorHandle*> f_outputs)
-// : forward_inputs(f_inputs), forward_outputs(f_outputs) {}
-
-// Status Compute(Context* ctx,
-// absl::Span<AbstractTensorHandle* const> grad_inputs,
-// std::vector<AbstractTensorHandle*>* grad_outputs) override {
-// // Forward Inputs : [scores, labels]
-
-// grad_outputs->resize(2);
-// std::vector<AbstractTensorHandle*> sm_outputs(2);
-
-// // Calculate Grad
-// std::string name = "sm_loss" + std::to_string(counter);
-
-// TF_RETURN_IF_ERROR(SparseSoftmaxCrossEntropyLoss(
-// ctx->ctx, {forward_inputs[0], forward_inputs[1]},
-// absl::MakeSpan(sm_outputs), name.c_str()));
-
-// // TODO(amturati): fix error where we have to return the softmax loss as the
-// // 2nd grad for the labels to avoid mangled stack trace. Also avoid running
-// // forward operation again, check to see if forward_outputs are being
-// // passed.
-
-// // SparseSoftmaxCrossEntropyLoss returns [loss_vals, grads], so return 2nd
-// // output.
-// (*grad_outputs)[0] = sm_outputs[1]; // return backprop for scores
-// (*grad_outputs)[1] = sm_outputs[0]; // nullptr causes Mangled Stack Trace
-
-// counter += 1;
-// return Status::OK();
-// }
-// ~SparseSoftmaxCrossEntropyLossGradientFunction() override {}
-
-// private:
-// long counter;
-// std::vector<AbstractTensorHandle*> forward_inputs;
-// std::vector<AbstractTensorHandle*> forward_outputs;
-// };
-
-// } // namespace
-
#include "tensorflow/c/experimental/gradients/math_grad.h"
#include "tensorflow/c/eager/abstract_tensor_handle.h"
@@ -236,7 +60,7 @@
~AddGradientFunction() override {}
private:
- long counter;
+ int64_t counter;
};
class ExpGradientFunction : public GradientFunction {
@@ -246,25 +70,29 @@
}
Status Compute(Context* ctx, const IncomingGradients& grad_inputs,
vector<AbstractTensorHandle*>* grad_outputs) override {
- vector<AbstractTensorHandle*> conj_outputs(1);
+ std::vector<AbstractTensorHandle*> conj_outputs(1);
+ std::string name = "Conj_Exp_Grad_" + std::to_string(counter);
TF_RETURN_IF_ERROR(
- Conj(ctx->ctx, {exp_.get()}, absl::MakeSpan(conj_outputs), "ExpConj"));
+ Conj(ctx->ctx, {exp_.get()}, absl::MakeSpan(conj_outputs), name.c_str()));
AbstractTensorHandlePtr conj_output_releaser(conj_outputs[0]);
grad_outputs->resize(1);
+
+ name = "Mul_Exp_Grad_" + std::to_string(counter);
TF_RETURN_IF_ERROR(Mul(ctx->ctx, {conj_outputs[0], grad_inputs[0]},
- absl::MakeSpan(*grad_outputs), "ExpGradMul"));
+ absl::MakeSpan(*grad_outputs), name.c_str()));
return Status::OK();
}
~ExpGradientFunction() override {}
private:
+ int64_t counter;
AbstractTensorHandlePtr exp_;
};
class MatMulGradientFunction : public GradientFunction {
public:
- explicit MatMulGradientFunction(std::vector<AbstractTensorHandle*> f_inputs)
- : forward_inputs(f_inputs) {}
+ explicit MatMulGradientFunction(std::vector<AbstractTensorHandle*> f_inputs/*, AttrBuilder f_attrs*/)
+ : forward_inputs(f_inputs)/*, attrs(f_attrs)*/ {}
Status Compute(Context* ctx,
absl::Span<AbstractTensorHandle* const> grad_inputs,
@@ -279,25 +107,85 @@
AbstractTensorHandle* upstream_grad = grad_inputs[0];
grad_outputs->resize(2);
- std::vector<AbstractTensorHandle*> matmul_outputs(1);
+
+ // // Get transpose attrs
+ // bool t_a;
+ // attrs.Get("transpose_a", &t_a);
- // Gradient for A
- std::string name = "mm_A_" + std::to_string(counter);
- TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, forward_inputs[1]},
- absl::MakeSpan(matmul_outputs), name.c_str(),
+ // bool t_b;
+ // attrs.Get("transpose_b", &t_b);
+
+ // Conj Inputs
+ std::cout << "c = " << counter << std::endl;
+ std::vector<AbstractTensorHandle*> conj_outputs(1);
+ std::string name = "Conj_A_MatMul_Grad_" + std::to_string(counter);
+ TF_RETURN_IF_ERROR(
+ Conj(ctx->ctx, {forward_inputs[0]}, absl::MakeSpan(conj_outputs), name.c_str()));
+
+ AbstractTensorHandle* A = conj_outputs[0];
+
+ name = "Conj_B_MatMul_Grad_" + std::to_string(counter);
+ TF_RETURN_IF_ERROR(
+ Conj(ctx->ctx, {forward_inputs[1]}, absl::MakeSpan(conj_outputs), name.c_str()));
+
+ AbstractTensorHandle* B = conj_outputs[0];
+
+ // Calc Grad
+ std::vector<AbstractTensorHandle*> matmul_A_outputs(1);
+ std::vector<AbstractTensorHandle*> matmul_B_outputs(1);
+ std::string name_grad_A = "MatMul_Grad_A_" + std::to_string(counter);
+ std::string name_grad_B = "MatMul_Grad_B_" + std::to_string(counter);
+ //if(!t_a && !t_b) {
+ TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, B},
+ absl::MakeSpan(matmul_A_outputs), name_grad_A.c_str(),
/*transpose_a = */ false,
/*transpose_b = */ true));
-
- (*grad_outputs)[0] = matmul_outputs[0];
-
- // Gradient for B
- name = "mm_B_" + std::to_string(counter);
- TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {forward_inputs[0], upstream_grad},
- absl::MakeSpan(matmul_outputs), name.c_str(),
+
+ TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {A, upstream_grad},
+ absl::MakeSpan(matmul_B_outputs), name_grad_B.c_str(),
/*transpose_a = */ true,
/*transpose_b = */ false));
+ // }
+ // else if(!t_a && t_b) {
+ // TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, B},
+ // absl::MakeSpan(matmul_A_outputs), name_grad_A.c_str(),
+ // /*transpose_a = */ false,
+ // /*transpose_b = */ false));
+
+ // TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, A},
+ // absl::MakeSpan(matmul_B_outputs), name_grad_B.c_str(),
+ // /*transpose_a = */ true,
+ // /*transpose_b = */ false));
- (*grad_outputs)[1] = matmul_outputs[0];
+ // }
+ // else if(t_a && !t_b) {
+ // TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {B, upstream_grad},
+ // absl::MakeSpan(matmul_A_outputs), name_grad_A.c_str(),
+ // /*transpose_a = */ false,
+ // /*transpose_b = */ true));
+
+ // TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {A, upstream_grad},
+ // absl::MakeSpan(matmul_B_outputs), name_grad_B.c_str(),
+ // /*transpose_a = */ false,
+ // /*transpose_b = */ false));
+ // }
+ // else { // t_a && t_b
+ // TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {B, upstream_grad},
+ // absl::MakeSpan(matmul_A_outputs), name_grad_A.c_str(),
+ // /*transpose_a = */ true,
+ // /*transpose_b = */ true));
+
+ // TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, A},
+ // absl::MakeSpan(matmul_B_outputs), name_grad_B.c_str(),
+ // /*transpose_a = */ true,
+ // /*transpose_b = */ true));
+ // }
+
+ // Gradient for A
+ (*grad_outputs)[0] = matmul_A_outputs[0];
+
+ // Gradient for B
+ (*grad_outputs)[1] = matmul_B_outputs[0];
counter += 1; // update counter for names
return Status::OK();
@@ -305,8 +193,9 @@
~MatMulGradientFunction() override {}
private:
- long counter;
+ int64_t counter;
std::vector<AbstractTensorHandle*> forward_inputs;
+ // AttrBuilder attrs;
};
class ReluGradientFunction : public GradientFunction {
@@ -337,12 +226,11 @@
~ReluGradientFunction() override {}
private:
- long counter;
+ int64_t counter;
std::vector<AbstractTensorHandle*> forward_outputs;
};
-// FIX ZEROSLIKE
class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction {
public:
explicit SparseSoftmaxCrossEntropyLossGradientFunction(
@@ -355,19 +243,23 @@
std::vector<AbstractTensorHandle*>* grad_outputs) override {
grad_outputs->resize(2);
- std::string name = "Identity_Softmax_Grad_A_" + std::to_string(counter);
- std::vector<AbstractTensorHandle*> id_outputs(1);
- TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {forward_outputs[1]},
- absl::MakeSpan(id_outputs),
- name.c_str()));
- (*grad_outputs)[0] = id_outputs[0];
+ // Grad for Softmax Input
+ std::string name = "Mul_Softmax_Grad_" + std::to_string(counter);
+ std::vector<AbstractTensorHandle*> mul_outputs(1);
+ TF_RETURN_IF_ERROR(ops::Mul(ctx->ctx, {grad_inputs[0], forward_outputs[1]},
+ absl::MakeSpan(mul_outputs),
+ name.c_str())); // upstream_grad * local softmax grad
+ (*grad_outputs)[0] = mul_outputs[0];
+
+ // Grad for labels
// TODO(amturati): check to see if ZerosLike is ok instead of nullptr
name = "Zeros_Softmax_Grad_" + std::to_string(counter);
+ std::vector<AbstractTensorHandle*> z_outputs(1);
TF_RETURN_IF_ERROR(ops::ZerosLike(ctx->ctx, {forward_inputs[1]},
- absl::MakeSpan(id_outputs),
+ absl::MakeSpan(z_outputs),
name.c_str()));
- (*grad_outputs)[1] = id_outputs[0]; // nullptr causes Mangled Stack Trace
+ (*grad_outputs)[1] = z_outputs[0]; // nullptr causes Mangled Stack Trace
counter += 1;
return Status::OK();
@@ -375,7 +267,7 @@
~SparseSoftmaxCrossEntropyLossGradientFunction() override {}
private:
- long counter;
+ int64_t counter;
std::vector<AbstractTensorHandle*> forward_inputs;
std::vector<AbstractTensorHandle*> forward_outputs;
};
@@ -401,7 +293,7 @@
}
GradientFunction* MatMulRegisterer(const ForwardOperation& op) {
- return new MatMulGradientFunction(op.inputs);
+ return new MatMulGradientFunction(op.inputs/*, op.attrs*/);
}
GradientFunction* ReluRegisterer(const ForwardOperation& op) {
diff --git a/tensorflow/c/experimental/ops/math_ops.cc b/tensorflow/c/experimental/ops/math_ops.cc
index cb63db6..4f408ea 100644
--- a/tensorflow/c/experimental/ops/math_ops.cc
+++ b/tensorflow/c/experimental/ops/math_ops.cc
@@ -73,7 +73,7 @@
Status MatMul(AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs, const char* name,
- bool transpose_a, bool transpose_b) {
+ bool transpose_a = false, bool transpose_b = false) {
AbstractOperationPtr matmul_op(ctx->CreateOperation());
TF_RETURN_IF_ERROR(matmul_op->Reset("MatMul", /*raw_device_name=*/nullptr));
@@ -93,5 +93,19 @@
return Status::OK();
}
+Status Neg(AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+ absl::Span<AbstractTensorHandle*> outputs, const char* name) {
+ AbstractOperationPtr neg_op(ctx->CreateOperation());
+ TF_RETURN_IF_ERROR(neg_op->Reset("Neg", /*raw_device_name=*/nullptr));
+ if (isa<TracingOperation>(neg_op.get())) {
+ TF_RETURN_IF_ERROR(
+ dyn_cast<TracingOperation>(neg_op.get())->SetOpName(name));
+ }
+ TF_RETURN_IF_ERROR(neg_op->AddInput(inputs[0]));
+
+ int num_retvals = 1;
+ return neg_op->Execute(outputs, &num_retvals);
+}
+
} // namespace ops
} // namespace tensorflow
diff --git a/tensorflow/c/experimental/ops/math_ops.h b/tensorflow/c/experimental/ops/math_ops.h
index 8f0f9f5..ed1e6c5 100644
--- a/tensorflow/c/experimental/ops/math_ops.h
+++ b/tensorflow/c/experimental/ops/math_ops.h
@@ -31,6 +31,8 @@
absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs, const char* name,
bool transpose_a, bool transpose_b);
+Status Neg(AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+ absl::Span<AbstractTensorHandle*> outputs, const char* name);
} // namespace ops
} // namespace tensorflow