fixed nits, need to add attrs to matmul grad
diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc
index 6f3dde0..4c70435 100644
--- a/tensorflow/c/eager/c_api_test_util.cc
+++ b/tensorflow/c/eager/c_api_test_util.cc
@@ -102,7 +102,7 @@
   return th;
 }
 
-TFE_TensorHandle* TestMatrixTensorHandleFloat(TFE_Context* ctx, float data[], int64_t dims [], int num_dims){ 
+TFE_TensorHandle* TestTensorHandleWithDimsFloat(TFE_Context* ctx, float data[], int64_t dims [], int num_dims){ 
   TF_Status* status = TF_NewStatus();
   TF_Tensor* t = TFE_AllocateHostTensor(ctx, TF_FLOAT, &dims[0],
                                         num_dims, status);
@@ -114,7 +114,7 @@
   return th;
 }
 
-TFE_TensorHandle* TestMatrixTensorHandleInt(TFE_Context* ctx, int data[], int64_t dims [], int num_dims){ 
+TFE_TensorHandle* TestTensorHandleWithDimsInt(TFE_Context* ctx, int data[], int64_t dims [], int num_dims){ 
   TF_Status* status = TF_NewStatus();
   TF_Tensor* t = TFE_AllocateHostTensor(ctx, TF_INT32, &dims[0],
                                         num_dims, status);
diff --git a/tensorflow/c/eager/c_api_test_util.h b/tensorflow/c/eager/c_api_test_util.h
index c998ab2..76d8f5c 100644
--- a/tensorflow/c/eager/c_api_test_util.h
+++ b/tensorflow/c/eager/c_api_test_util.h
@@ -41,10 +41,10 @@
                                                   int num_dims);
 
 // Get a Matrix TensorHandle with given float values and dimensions
-TFE_TensorHandle* TestMatrixTensorHandleFloat(TFE_Context* ctx, float data[], int64_t dims [], int num_dims);
+TFE_TensorHandle* TestTensorHandleWithDimsFloat(TFE_Context* ctx, float data[], int64_t dims [], int num_dims);
 
 // Get a Matrix TensorHandle with given int values and dimensions
-TFE_TensorHandle* TestMatrixTensorHandleInt(TFE_Context* ctx, int data[], int64_t dims [], int num_dims);
+TFE_TensorHandle* TestTensorHandleWithDimsInt(TFE_Context* ctx, int data[], int64_t dims [], int num_dims);
 
 
 // Return a tensor handle containing a 100x100 matrix of floats
diff --git a/tensorflow/c/eager/mnist_gradients_test.cc b/tensorflow/c/eager/mnist_gradients_test.cc
index 7d72c2a..d4dc14b 100644
--- a/tensorflow/c/eager/mnist_gradients_test.cc
+++ b/tensorflow/c/eager/mnist_gradients_test.cc
@@ -51,23 +51,8 @@
 }
 
 // ========================= Test Util Functions ==============================
-void printArr(float data[], int n) {
-  std::cout << std::endl << "[";
-  for (int i = 0; i < n - 1; i++) {
-    std::cout << data[i] << ", ";
-  }
-  std::cout << data[n - 1] << "]" << std::endl;
-}
 
-float sumArr(float data[], int n) {
-  float sum = 0;
-  for (int i = 0; i < n; i++) {
-    sum += data[i];
-  }
-  return sum;
-}
-
-// Get a scalar TensorHandle woth given value
+// Get a scalar TensorHandle with given value
 Status TestScalarTensorHandle(AbstractContext* ctx, float value,
                               AbstractTensorHandle** tensor) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
@@ -82,7 +67,7 @@
 }
 
 // Get a Matrix TensorHandle with given float values and dimensions
-Status TestMatrixTensorHandleFloat(AbstractContext* ctx, float data[],
+Status TestTensorHandleWithDimsFloat(AbstractContext* ctx, float data[],
                                    int64_t dims[], int num_dims,
                                    AbstractTensorHandle** tensor) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
@@ -91,14 +76,14 @@
       TF_ExecutionContextGetTFEContext(wrap(ctx), status.get());
   TF_RETURN_IF_ERROR(StatusFromTF_Status(status.get()));
   TFE_TensorHandle* input_eager =
-      TestMatrixTensorHandleFloat(eager_ctx, data, dims, num_dims);
+      TestTensorHandleWithDimsFloat(eager_ctx, data, dims, num_dims);
   *tensor =
       unwrap(TF_CreateAbstractTensorFromEagerTensor(input_eager, status.get()));
   return Status::OK();
 }
 
 // Get a Matrix TensorHandle with given int values and dimensions
-Status TestMatrixTensorHandleInt(AbstractContext* ctx, int data[],
+Status TestTensorHandleWithDimsInt(AbstractContext* ctx, int data[],
                                  int64_t dims[], int num_dims,
                                  AbstractTensorHandle** tensor) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
@@ -107,13 +92,13 @@
       TF_ExecutionContextGetTFEContext(wrap(ctx), status.get());
   TF_RETURN_IF_ERROR(StatusFromTF_Status(status.get()));
   TFE_TensorHandle* input_eager =
-      TestMatrixTensorHandleInt(eager_ctx, data, dims, num_dims);
+      TestTensorHandleWithDimsInt(eager_ctx, data, dims, num_dims);
   *tensor =
       unwrap(TF_CreateAbstractTensorFromEagerTensor(input_eager, status.get()));
   return Status::OK();
 }
 
-Status getValue(AbstractTensorHandle* t, TF_Tensor** result_tensor) {
+Status GetValue(AbstractTensorHandle* t, TF_Tensor** result_tensor) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
   TFE_TensorHandle* result_t =
@@ -123,104 +108,29 @@
   return Status::OK();
 }
 
-AbstractTensorHandlePtr getMatrixTensorHandleUtilFloat(AbstractContext* ctx,
+AbstractTensorHandlePtr GetTensorHandleUtilFloat(AbstractContext* ctx,
                                                        float vals[],
                                                        int64_t dims[],
                                                        int num_dims) {
   AbstractTensorHandlePtr A;
   AbstractTensorHandle* a_raw = nullptr;
-  Status s = TestMatrixTensorHandleFloat(ctx, vals, dims, num_dims, &a_raw);
+  Status s = TestTensorHandleWithDimsFloat(ctx, vals, dims, num_dims, &a_raw);
   A.reset(a_raw);
   return A;
 }
 
-AbstractTensorHandlePtr getMatrixTensorHandleUtilInt(AbstractContext* ctx,
+AbstractTensorHandlePtr GetTensorHandleUtilInt(AbstractContext* ctx,
                                                      int vals[], int64_t dims[],
                                                      int num_dims) {
   AbstractTensorHandlePtr A;
   AbstractTensorHandle* a_raw = nullptr;
-  Status s = TestMatrixTensorHandleInt(ctx, vals, dims, num_dims, &a_raw);
+  Status s = TestTensorHandleWithDimsInt(ctx, vals, dims, num_dims, &a_raw);
   A.reset(a_raw);
   return A;
 }
 
-void printTensor(AbstractTensorHandle* t, int size) {
-  TF_Tensor* tensor;
-  Status s = getValue(t, &tensor);
-  ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-
-  float result_data[size] = {0};
-  memcpy(&result_data[0], TF_TensorData(tensor), TF_TensorByteSize(tensor));
-  printArr(result_data, size);
-
-  TF_DeleteTensor(tensor);
-}
-
 // =========================== Start Tests ================================
 
-TEST_P(CppGradients, TestAddGrad) {
-  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
-      TF_NewStatus(), TF_DeleteStatus);
-  AbstractContextPtr ctx;
-  {
-    AbstractContext* ctx_raw = nullptr;
-    Status s =
-        BuildImmediateExecutionContext(std::get<1>(GetParam()), &ctx_raw);
-    ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-    ctx.reset(ctx_raw);
-  }
-
-  AbstractTensorHandlePtr x;
-  {
-    AbstractTensorHandle* x_raw = nullptr;
-    Status s = TestScalarTensorHandle(ctx.get(), 2.0f, &x_raw);
-    ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-    x.reset(x_raw);
-  }
-
-  AbstractTensorHandlePtr y;
-  {
-    AbstractTensorHandle* y_raw = nullptr;
-    Status s = TestScalarTensorHandle(ctx.get(), 2.0f, &y_raw);
-    ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-    y.reset(y_raw);
-  }
-
-  GradientRegistry registry;
-  Status s = RegisterGradients(&registry);
-  ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-
-  /* Pseudo-code:
-   *
-   * tape.watch(x)
-   * tape.watch(y)
-   * y = x + y
-   * outputs = tape.gradient(y, [x, y])
-   */
-
-  std::vector<AbstractTensorHandle*> outputs(2);
-  s = RunModel(AddGradModel, ctx.get(), {x.get(), y.get()},
-               absl::MakeSpan(outputs),
-               /*use_function=*/!std::get<2>(GetParam()), registry);
-  ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-
-  TF_Tensor* result_tensor;
-  s = getValue(outputs[0], &result_tensor);
-  ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-  auto result_value = static_cast<float*>(TF_TensorData(result_tensor));
-  EXPECT_EQ(*result_value, 1.0);
-  outputs[0]->Unref();
-  TF_DeleteTensor(result_tensor);
-  result_tensor = nullptr;
-
-  s = getValue(outputs[1], &result_tensor);
-  ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-  result_value = static_cast<float*>(TF_TensorData(result_tensor));
-  EXPECT_EQ(*result_value, 1.0);
-  outputs[1]->Unref();
-  TF_DeleteTensor(result_tensor);
-}
-
 TEST_P(CppGradients, TestMatMulGrad) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
@@ -240,9 +150,9 @@
   int num_dims = 2;
 
   AbstractTensorHandlePtr A =
-      getMatrixTensorHandleUtilFloat(ctx.get(), A_vals, A_dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), A_vals, A_dims, num_dims);
   AbstractTensorHandlePtr B =
-      getMatrixTensorHandleUtilFloat(ctx.get(), B_vals, B_dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), B_vals, B_dims, num_dims);
 
   GradientRegistry registry;
   Status s = RegisterGradients(&registry);
@@ -263,7 +173,7 @@
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   TF_Tensor* dA_tensor;
-  s = getValue(outputs[0], &dA_tensor);
+  s = GetValue(outputs[0], &dA_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   float result_data[4] = {0};
@@ -277,7 +187,7 @@
   }
 
   TF_Tensor* dB_tensor;
-  s = getValue(outputs[1], &dB_tensor);
+  s = GetValue(outputs[1], &dB_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   memcpy(&result_data[0], TF_TensorData(dB_tensor),
@@ -309,24 +219,24 @@
   int64_t dims[] = {2, 2};
   int num_dims = 2;
   AbstractTensorHandlePtr X =
-      getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), X_vals, dims, num_dims);
 
   // W1 = first weights
   float W1_vals[] = {-1.0f, 10.0f, .5f, 1.0f};
   AbstractTensorHandlePtr W1 =
-      getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
 
   // W2 = second weights
   float W2_vals[] = {.1f, .2f, .3f, -.5f};
   AbstractTensorHandlePtr W2 =
-      getMatrixTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
 
   // y = labels
   int y_vals[] = {1, 1};
   int64_t dims_y[] = {2};
   num_dims = sizeof(dims_y) / sizeof(dims_y[0]);
   AbstractTensorHandlePtr y =
-      getMatrixTensorHandleUtilInt(ctx.get(), y_vals, dims, num_dims);
+      GetTensorHandleUtilInt(ctx.get(), y_vals, dims, num_dims);
 
   GradientRegistry registry;
 
@@ -340,7 +250,7 @@
 
   // Verify the Results
   TF_Tensor* scores_tensor;
-  s = getValue(outputs[0], &scores_tensor);
+  s = GetValue(outputs[0], &scores_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   float result_data[4] = {0};
@@ -354,7 +264,7 @@
   }
 
   TF_Tensor* loss_vals_tensor;
-  s = getValue(outputs[1], &loss_vals_tensor);
+  s = GetValue(outputs[1], &loss_vals_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   memcpy(&result_data[0], TF_TensorData(loss_vals_tensor),
@@ -385,25 +295,25 @@
   int64_t X_dims[] = {3, 2};
   int num_dims = 2;
   AbstractTensorHandlePtr X =
-      getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
 
   // W1 = first weights
   float W1_vals[] = {-1.0f, 10.0f, .5f, 1.0f};
   int64_t dims[] = {2, 2};
   AbstractTensorHandlePtr W1 =
-      getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
 
   // W2 = second weights
   float W2_vals[] = {.1f, .2f, .3f, -.5f};
   AbstractTensorHandlePtr W2 =
-      getMatrixTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
 
   // y = labels
   int y_vals[] = {1, 1, 1};
   int64_t y_dims[] = {3};
   num_dims = sizeof(y_dims) / sizeof(y_dims[0]);
   AbstractTensorHandlePtr y =
-      getMatrixTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
+      GetTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
 
   GradientRegistry registry;
 
@@ -417,7 +327,7 @@
 
   // Verify the Results
   TF_Tensor* scores_tensor;
-  s = getValue(outputs[0], &scores_tensor);
+  s = GetValue(outputs[0], &scores_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   float result_data[6] = {0};
@@ -431,7 +341,7 @@
   }
 
   TF_Tensor* loss_vals_tensor;
-  s = getValue(outputs[1], &loss_vals_tensor);
+  s = GetValue(outputs[1], &loss_vals_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   memcpy(&result_data[0], TF_TensorData(loss_vals_tensor),
@@ -465,13 +375,13 @@
   int64_t X_dims[] = {2, 3};
   int num_dims = 2;
   AbstractTensorHandlePtr X =
-      getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
 
   // W1 = first weights
   float W1_vals[] = {1.0f, 2.0f, 3.0f, 4.0f};
   int64_t dims[] = {2, 2};
   AbstractTensorHandlePtr W1 =
-      getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
 
   GradientRegistry registry;
 
@@ -486,7 +396,7 @@
 
   // Verify the Results
   TF_Tensor* scores_tensor;
-  s = getValue(outputs[0], &scores_tensor);
+  s = GetValue(outputs[0], &scores_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   float result_data[6] = {0};
@@ -518,7 +428,7 @@
   int64_t X_dims[] = {3, 3};
   int num_dims = 2;
   AbstractTensorHandlePtr X =
-      getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
 
   GradientRegistry registry;
   Status s = RegisterGradients(&registry);
@@ -536,7 +446,7 @@
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   TF_Tensor* dX_tensor;
-  s = getValue(outputs[0], &dX_tensor);
+  s = GetValue(outputs[0], &dX_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   float result_data[9] = {0};
@@ -571,14 +481,14 @@
   int64_t X_dims[] = {3, 3};
   int num_dims = 2;
   AbstractTensorHandlePtr X =
-      getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
 
   // y = labels
   int y_vals[] = {1, 0, 1};
   int64_t y_dims[] = {3};
   num_dims = sizeof(y_dims) / sizeof(y_dims[0]);
   AbstractTensorHandlePtr y =
-      getMatrixTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
+      GetTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
 
   GradientRegistry registry;
   Status s = RegisterGradients(&registry);
@@ -602,7 +512,7 @@
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   TF_Tensor* dX_tensor;
-  s = getValue(outputs[0], &dX_tensor);
+  s = GetValue(outputs[0], &dX_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   float result_data[9] = {0};
@@ -638,25 +548,25 @@
   int64_t X_dims[] = {2, 2};
   int num_dims = 2;
   AbstractTensorHandlePtr X =
-      getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
 
   // W1 = first weights
   float W1_vals[] = {-1.0f, 10.0f, .5f, 1.0f};
   int64_t dims[] = {2, 2};
   AbstractTensorHandlePtr W1 =
-      getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
 
   // W2 = second weights
   float W2_vals[] = {.1f, .2f, .3f, -.5f};
   AbstractTensorHandlePtr W2 =
-      getMatrixTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
 
   // y = labels
   int y_vals[] = {1, 1};
   int64_t y_dims[] = {2};
   num_dims = sizeof(y_dims) / sizeof(y_dims[0]);
   AbstractTensorHandlePtr y =
-      getMatrixTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
+      GetTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
 
   // Register Grads
   GradientRegistry registry;
@@ -684,7 +594,7 @@
 
   float tolerance = 1e-3;
   TF_Tensor* dW1_tensor;
-  s = getValue(outputs[0], &dW1_tensor);
+  s = GetValue(outputs[0], &dW1_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   float result_data[4] = {0};
@@ -698,7 +608,7 @@
   }
 
   TF_Tensor* dW2_tensor;
-  s = getValue(outputs[1], &dW2_tensor);
+  s = GetValue(outputs[1], &dW2_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   memcpy(&result_data[0], TF_TensorData(dW2_tensor),
@@ -742,7 +652,7 @@
   int num_dims = 2;
 
   AbstractTensorHandlePtr A =
-      getMatrixTensorHandleUtilFloat(ctx.get(), A_vals, A_dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), A_vals, A_dims, num_dims);
 
   GradientRegistry registry;
   std::vector<AbstractTensorHandle*> outputs(1);
@@ -752,7 +662,7 @@
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   TF_Tensor* dA_tensor;
-  s = getValue(outputs[0], &dA_tensor);
+  s = GetValue(outputs[0], &dA_tensor);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   float result_data[4] = {0};
@@ -787,25 +697,25 @@
   int64_t X_dims[] = {2, 2};
   int num_dims = 2;
   AbstractTensorHandlePtr X =
-      getMatrixTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), X_vals, X_dims, num_dims);
 
   // W1 = first weights
   float W1_vals[] = {-.01f, 0.4f, 0.5f, -.2f};
   int64_t dims[] = {2, 2};
   AbstractTensorHandlePtr W1 =
-      getMatrixTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), W1_vals, dims, num_dims);
 
   // W2 = second weights
   float W2_vals[] = {.1f, .2f, .3f, -.5f};
   AbstractTensorHandlePtr W2 =
-      getMatrixTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
+      GetTensorHandleUtilFloat(ctx.get(), W2_vals, dims, num_dims);
 
   // y = labels
   int y_vals[] = {1, 1};
   int64_t y_dims[] = {2};
   num_dims = sizeof(y_dims) / sizeof(y_dims[0]);
   AbstractTensorHandlePtr y =
-      getMatrixTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
+      GetTensorHandleUtilInt(ctx.get(), y_vals, y_dims, num_dims);
 
   // Register Grads
   GradientRegistry registry;
@@ -817,9 +727,9 @@
   weights.push_back(W1.get());
   weights.push_back(W2.get());
 
-  // Set learning rate to be 1e-3
+  // Set learning rate to be 1e-1
   AbstractTensorHandle* learning_rate = nullptr;
-  s = TestScalarTensorHandle(ctx.get(), -1e-2, &learning_rate);
+  s = TestScalarTensorHandle(ctx.get(), 1e-1, &learning_rate);
   ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
   // Train
@@ -827,8 +737,7 @@
   std::vector<AbstractTensorHandle*> mnist_outputs(3);
   std::vector<AbstractTensorHandle*> grads(2);
   for (int i = 0; i < num_iters; i++) {
-    std::cout << "iter " << i << ": " << std::endl;
-
+    
     // Run Forward Pass
     s = RunModel(MNISTGradModel, ctx.get(),
                  {X.get(), weights[0], weights[1], y.get()},
@@ -844,23 +753,11 @@
     s = UpdateWeights(ctx.get(), grads, weights, learning_rate);
     ASSERT_EQ(errors::OK, s.code()) << s.error_message();
 
-    // Print Loss
-    AbstractTensorHandle* loss_vals = mnist_outputs[2];
-    TF_Tensor* loss_tensor;
-    s = getValue(loss_vals, &loss_tensor);
-    ASSERT_EQ(errors::OK, s.code()) << s.error_message();
-
-    float result_data[2] = {0};
-    memcpy(&result_data[0], TF_TensorData(loss_tensor),
-           TF_TensorByteSize(loss_tensor));
-    std::cout << "     loss = " << sumArr(result_data, 2) << std::endl;
-    std::cout << "-----------------" << std::endl;
-    TF_DeleteTensor(loss_tensor);
   }
 
-  grads[0]->Unref();
-  grads[1]->Unref();
-  mnist_outputs[2]->Unref();
+  grads[0]->Unref(); // release W1_grad
+  grads[1]->Unref(); // release W2_grad
+  mnist_outputs[2]->Unref(); // release loss
 }
 
 // TODO(b/160888630): Enable this test with mlir after AddInputList is
diff --git a/tensorflow/c/eager/mnist_gradients_util.cc b/tensorflow/c/eager/mnist_gradients_util.cc
index a85ae27..3fb8e9f 100644
--- a/tensorflow/c/eager/mnist_gradients_util.cc
+++ b/tensorflow/c/eager/mnist_gradients_util.cc
@@ -30,6 +30,9 @@
 #include "tensorflow/c/tf_tensor.h"
 #include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h"
 
+
+// ========================== Tape Ops ==============================
+
 // Computes `inputs[0] + inputs[1]` and records it on the tape.
 Status Add(AbstractContext* ctx, Tape* tape,
            absl::Span<AbstractTensorHandle* const> inputs,
@@ -71,8 +74,8 @@
 
   TF_RETURN_IF_ERROR(AddInput(matmul_op.get(), inputs[0], &forward_op));
   TF_RETURN_IF_ERROR(AddInput(matmul_op.get(), inputs[1], &forward_op));
-  matmul_op->SetAttrBool("transpose_a",transpose_a);
-  matmul_op->SetAttrBool("transpose_b",transpose_b);
+  TF_RETURN_IF_ERROR(tensorflow::gradients::internal::SetAttrBool(matmul_op.get(), "transpose_a", transpose_a, &forward_op));
+  TF_RETURN_IF_ERROR(tensorflow::gradients::internal::SetAttrBool(matmul_op.get(), "transpose_b", transpose_b, &forward_op));
 
   int num_retvals = 1;
   return Execute(matmul_op.get(), ctx, outputs, &num_retvals, &forward_op, tape,
@@ -351,10 +354,6 @@
       source_tensors_that_are_targets,
       /*output_gradients=*/{}, &out_grads));
 
-  // for (auto sm_output : sm_outputs) {
-  //   sm_output->Unref();
-  // }
-
   outputs[0] = out_grads[0];
   outputs[1] = out_grads[1];
   delete tape;
@@ -451,9 +450,9 @@
                      AbstractTensorHandle* learning_rate) {
   /* Update weights one by one using gradient update rule:
    *
-   *    w += lr*grad[w]
+   *    w -= lr*grad[w]
    *
-   *  NOTE: assuming learning rate is already negative
+   *  NOTE: assuming learning rate is positive
    */
 
   Status s;
@@ -461,6 +460,11 @@
   std::vector<AbstractTensorHandle*> temp_outputs(1);
   std::string update_str;
 
+  // Negate learning rate for gradient descent
+  TF_RETURN_IF_ERROR(ops::Neg(ctx, {learning_rate}, absl::MakeSpan(temp_outputs),
+                         "neg_lr"));  // Compute -lr
+  learning_rate = temp_outputs[0];
+
   for (int i = 0; i < num_grads; i++) {
     // Compute dW = -lr * grad(w[i])
     update_str = "update_mul_" + std::to_string(i);
@@ -559,3 +563,4 @@
   TFE_DeleteContextOptions(opts);
   return Status::OK();
 }
+
diff --git a/tensorflow/c/eager/mnist_gradients_util.h b/tensorflow/c/eager/mnist_gradients_util.h
index f0975c7..0b705f2 100644
--- a/tensorflow/c/eager/mnist_gradients_util.h
+++ b/tensorflow/c/eager/mnist_gradients_util.h
@@ -32,7 +32,7 @@
 using namespace tensorflow::gradients;
 using namespace tensorflow::gradients::internal;
 
-// ========================== tape  ==============================
+// ========================== Tape Ops ==============================
 
 // Computes `inputs[0] + inputs[1]` and records it on the tape.
 Status Add(AbstractContext* ctx, Tape* tape,
diff --git a/tensorflow/c/experimental/gradients/math_grad.cc b/tensorflow/c/experimental/gradients/math_grad.cc
index b64ac99..558cb70 100644
--- a/tensorflow/c/experimental/gradients/math_grad.cc
+++ b/tensorflow/c/experimental/gradients/math_grad.cc
@@ -12,182 +12,6 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-// #include "tensorflow/c/experimental/gradients/math_grad.h"
-
-// #include "tensorflow/c/eager/abstract_tensor_handle.h"
-// #include "tensorflow/c/experimental/ops/array_ops.h"
-// #include "tensorflow/c/experimental/ops/math_ops.h"
-
-// using std::vector;
-// using tensorflow::ops::Conj;
-// using tensorflow::ops::Identity;
-// using tensorflow::ops::Mul;
-// using tensorflow::ops::MatMul;
-// using tensorflow::ops::ReluGrad;
-// using tensorflow::ops::SparseSoftmaxCrossEntropyLoss;
-
-// namespace tensorflow {
-// namespace gradients {
-// namespace {
-
-// class AddGradientFunction : public GradientFunction {
-//  public:
-//   Status Compute(Context* ctx,
-//                  absl::Span<AbstractTensorHandle* const> grad_inputs,
-//                  vector<AbstractTensorHandle*>* grad_outputs) override {
-//     grad_outputs->resize(2);
-//     vector<AbstractTensorHandle*> identity_outputs(1);
-//     // TODO(b/145674566): Handle name unification in tracing code.
-//     // TODO(b/161805092): Support broadcasting.
-
-//     std::string name = "Identity_A_" + std::to_string(counter);
-//     TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {grad_inputs[0]},
-//                                      absl::MakeSpan(identity_outputs),
-//                                      name.c_str()));
-//     (*grad_outputs)[0] = identity_outputs[0];
-
-//     name = "Identity_B_" + std::to_string(counter);
-//     TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {grad_inputs[0]},
-//                                      absl::MakeSpan(identity_outputs),
-//                                      name.c_str()));
-//     (*grad_outputs)[1] = identity_outputs[0];
-
-//     counter += 1;
-//     return Status::OK();
-//   }
-//   ~AddGradientFunction() override {}
-
-//  private:
-//   long counter;
-// };
-
-
-
-// class MatMulGradientFunction : public GradientFunction {
-//  public:
-//   explicit MatMulGradientFunction(std::vector<AbstractTensorHandle*> f_inputs)
-//       : forward_inputs(f_inputs) {}
-
-//   Status Compute(Context* ctx,
-//                  absl::Span<AbstractTensorHandle* const> grad_inputs,
-//                  std::vector<AbstractTensorHandle*>* grad_outputs) override {
-//     /* Given upstream grad U and a matmul op A*B, the gradients are:
-//      *
-//      *    dA = U * B.T
-//      *    dB = A.T * U
-//      *
-//      *    where A.T means `transpose(A)`
-//      */
-
-//     AbstractTensorHandle* upstream_grad = grad_inputs[0];
-//     grad_outputs->resize(2);
-//     std::vector<AbstractTensorHandle*> matmul_outputs(1);
-
-//     // Gradient for A
-//     std::string name = "mm_A_" + std::to_string(counter);
-//     TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, forward_inputs[1]},
-//                               absl::MakeSpan(matmul_outputs), name.c_str(),
-//                               /*transpose_a = */ false,
-//                               /*transpose_b = */ true));
-
-//     (*grad_outputs)[0] = matmul_outputs[0];
-
-//     // Gradient for B
-//     name = "mm_B_" + std::to_string(counter);
-//     TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {forward_inputs[0], upstream_grad},
-//                               absl::MakeSpan(matmul_outputs), name.c_str(),
-//                               /*transpose_a = */ true,
-//                               /*transpose_b = */ false));
-
-//     (*grad_outputs)[1] = matmul_outputs[0];
-
-//     counter += 1;  // update counter for names
-//     return Status::OK();
-//   }
-//   ~MatMulGradientFunction() override {}
-
-//  private:
-//   long counter;
-//   std::vector<AbstractTensorHandle*> forward_inputs;
-// };
-
-// class ReluGradientFunction : public GradientFunction {
-//  public:
-//   explicit ReluGradientFunction(std::vector<AbstractTensorHandle*> f_inputs)
-//       : forward_inputs(f_inputs) {}
-
-//   Status Compute(Context* ctx,
-//                  absl::Span<AbstractTensorHandle* const> grad_inputs,
-//                  std::vector<AbstractTensorHandle*>* grad_outputs) override {
-//     AbstractTensorHandle* upstream_grad = grad_inputs[0];
-//     AbstractTensorHandle* input_features = forward_inputs[0];
-//     grad_outputs->resize(1);
-//     std::vector<AbstractTensorHandle*> relugrad_outputs(1);
-
-//     // Calculate Grad
-//     std::string name = "relu_grad" + std::to_string(counter);
-
-//     TF_RETURN_IF_ERROR(ReluGrad(ctx->ctx, {upstream_grad, input_features},
-//                                 absl::MakeSpan(relugrad_outputs),
-//                                 name.c_str()));
-
-//     (*grad_outputs)[0] = relugrad_outputs[0];
-
-//     counter += 1;
-//     return Status::OK();
-//   }
-//   ~ReluGradientFunction() override {}
-
-//  private:
-//   long counter;
-//   std::vector<AbstractTensorHandle*> forward_inputs;
-// };
-
-// class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction {
-//  public:
-//   explicit SparseSoftmaxCrossEntropyLossGradientFunction(
-//       std::vector<AbstractTensorHandle*> f_inputs,
-//       std::vector<AbstractTensorHandle*> f_outputs)
-//       : forward_inputs(f_inputs), forward_outputs(f_outputs) {}
-
-//   Status Compute(Context* ctx,
-//                  absl::Span<AbstractTensorHandle* const> grad_inputs,
-//                  std::vector<AbstractTensorHandle*>* grad_outputs) override {
-//     // Forward Inputs : [scores, labels]
-
-//     grad_outputs->resize(2);
-//     std::vector<AbstractTensorHandle*> sm_outputs(2);
-
-//     // Calculate Grad
-//     std::string name = "sm_loss" + std::to_string(counter);
-
-//     TF_RETURN_IF_ERROR(SparseSoftmaxCrossEntropyLoss(
-//         ctx->ctx, {forward_inputs[0], forward_inputs[1]},
-//         absl::MakeSpan(sm_outputs), name.c_str()));
-
-//     // TODO(amturati): fix error where we have to return the softmax loss as the
-//     // 2nd grad for the labels to avoid mangled stack trace. Also avoid running
-//     // forward operation again, check to see if forward_outputs are being
-//     // passed.
-
-//     // SparseSoftmaxCrossEntropyLoss returns [loss_vals, grads], so return 2nd
-//     // output.
-//     (*grad_outputs)[0] = sm_outputs[1];  // return backprop for scores
-//     (*grad_outputs)[1] = sm_outputs[0];  // nullptr causes Mangled Stack Trace
-
-//     counter += 1;
-//     return Status::OK();
-//   }
-//   ~SparseSoftmaxCrossEntropyLossGradientFunction() override {}
-
-//  private:
-//   long counter;
-//   std::vector<AbstractTensorHandle*> forward_inputs;
-//   std::vector<AbstractTensorHandle*> forward_outputs;
-// };
-
-// }  // namespace
-
 #include "tensorflow/c/experimental/gradients/math_grad.h"
 
 #include "tensorflow/c/eager/abstract_tensor_handle.h"
@@ -236,7 +60,7 @@
   ~AddGradientFunction() override {}
 
  private:
-  long counter;
+  int64_t counter;
 };
 
 class ExpGradientFunction : public GradientFunction {
@@ -246,25 +70,29 @@
   }
   Status Compute(Context* ctx, const IncomingGradients& grad_inputs,
                  vector<AbstractTensorHandle*>* grad_outputs) override {
-    vector<AbstractTensorHandle*> conj_outputs(1);
+    std::vector<AbstractTensorHandle*> conj_outputs(1);
+    std::string name = "Conj_Exp_Grad_" + std::to_string(counter);
     TF_RETURN_IF_ERROR(
-        Conj(ctx->ctx, {exp_.get()}, absl::MakeSpan(conj_outputs), "ExpConj"));
+        Conj(ctx->ctx, {exp_.get()}, absl::MakeSpan(conj_outputs), name.c_str()));
     AbstractTensorHandlePtr conj_output_releaser(conj_outputs[0]);
     grad_outputs->resize(1);
+
+    name = "Mul_Exp_Grad_" + std::to_string(counter);
     TF_RETURN_IF_ERROR(Mul(ctx->ctx, {conj_outputs[0], grad_inputs[0]},
-                           absl::MakeSpan(*grad_outputs), "ExpGradMul"));
+                           absl::MakeSpan(*grad_outputs), name.c_str()));
     return Status::OK();
   }
   ~ExpGradientFunction() override {}
 
  private:
+  int64_t counter;
   AbstractTensorHandlePtr exp_;
 };
 
 class MatMulGradientFunction : public GradientFunction {
  public:
-  explicit MatMulGradientFunction(std::vector<AbstractTensorHandle*> f_inputs)
-      : forward_inputs(f_inputs) {}
+  explicit MatMulGradientFunction(std::vector<AbstractTensorHandle*> f_inputs/*, AttrBuilder f_attrs*/)
+      : forward_inputs(f_inputs)/*, attrs(f_attrs)*/ {}
 
   Status Compute(Context* ctx,
                  absl::Span<AbstractTensorHandle* const> grad_inputs,
@@ -279,25 +107,85 @@
 
     AbstractTensorHandle* upstream_grad = grad_inputs[0];
     grad_outputs->resize(2);
-    std::vector<AbstractTensorHandle*> matmul_outputs(1);
+    
+    // // Get transpose attrs
+    // bool t_a;
+    // attrs.Get("transpose_a", &t_a);
 
-    // Gradient for A
-    std::string name = "mm_A_" + std::to_string(counter);
-    TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, forward_inputs[1]},
-                              absl::MakeSpan(matmul_outputs), name.c_str(),
+    // bool t_b;
+    // attrs.Get("transpose_b", &t_b);
+   
+    // Conj Inputs
+    std::cout << "c = " << counter << std::endl;
+    std::vector<AbstractTensorHandle*> conj_outputs(1);
+    std::string name = "Conj_A_MatMul_Grad_" + std::to_string(counter);
+    TF_RETURN_IF_ERROR(
+        Conj(ctx->ctx, {forward_inputs[0]}, absl::MakeSpan(conj_outputs), name.c_str()));
+    
+    AbstractTensorHandle* A = conj_outputs[0];
+
+    name = "Conj_B_MatMul_Grad_" + std::to_string(counter);
+    TF_RETURN_IF_ERROR(
+        Conj(ctx->ctx, {forward_inputs[1]}, absl::MakeSpan(conj_outputs), name.c_str()));
+    
+    AbstractTensorHandle* B = conj_outputs[0];
+
+    // Calc Grad
+    std::vector<AbstractTensorHandle*> matmul_A_outputs(1);
+    std::vector<AbstractTensorHandle*> matmul_B_outputs(1);
+    std::string name_grad_A = "MatMul_Grad_A_" + std::to_string(counter);
+    std::string name_grad_B = "MatMul_Grad_B_" + std::to_string(counter);
+    //if(!t_a && !t_b) {
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, B},
+                              absl::MakeSpan(matmul_A_outputs), name_grad_A.c_str(),
                               /*transpose_a = */ false,
                               /*transpose_b = */ true));
-
-    (*grad_outputs)[0] = matmul_outputs[0];
-
-    // Gradient for B
-    name = "mm_B_" + std::to_string(counter);
-    TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {forward_inputs[0], upstream_grad},
-                              absl::MakeSpan(matmul_outputs), name.c_str(),
+  
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {A, upstream_grad},
+                              absl::MakeSpan(matmul_B_outputs), name_grad_B.c_str(),
                               /*transpose_a = */ true,
                               /*transpose_b = */ false));
+    // }
+    // else if(!t_a && t_b) {
+    //   TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, B},
+    //                             absl::MakeSpan(matmul_A_outputs), name_grad_A.c_str(),
+    //                             /*transpose_a = */ false,
+    //                             /*transpose_b = */ false));
+    
+    //   TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, A},
+    //                             absl::MakeSpan(matmul_B_outputs), name_grad_B.c_str(),
+    //                             /*transpose_a = */ true,
+    //                             /*transpose_b = */ false));
 
-    (*grad_outputs)[1] = matmul_outputs[0];
+    // }
+    // else if(t_a && !t_b)  {
+    //   TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {B, upstream_grad},
+    //                             absl::MakeSpan(matmul_A_outputs), name_grad_A.c_str(),
+    //                             /*transpose_a = */ false,
+    //                             /*transpose_b = */ true));
+    
+    //   TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {A, upstream_grad},
+    //                             absl::MakeSpan(matmul_B_outputs), name_grad_B.c_str(),
+    //                             /*transpose_a = */ false,
+    //                             /*transpose_b = */ false));
+    // }
+    // else { // t_a && t_b 
+    //   TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {B, upstream_grad},
+    //                             absl::MakeSpan(matmul_A_outputs), name_grad_A.c_str(),
+    //                             /*transpose_a = */ true,
+    //                             /*transpose_b = */ true));
+    
+    //   TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, A},
+    //                             absl::MakeSpan(matmul_B_outputs), name_grad_B.c_str(),
+    //                             /*transpose_a = */ true,
+    //                             /*transpose_b = */ true));
+    // }
+
+    // Gradient for A
+    (*grad_outputs)[0] = matmul_A_outputs[0];
+
+    // Gradient for B
+    (*grad_outputs)[1] = matmul_B_outputs[0];
 
     counter += 1;  // update counter for names
     return Status::OK();
@@ -305,8 +193,9 @@
   ~MatMulGradientFunction() override {}
 
  private:
-  long counter;
+  int64_t counter;
   std::vector<AbstractTensorHandle*> forward_inputs;
+  // AttrBuilder attrs;
 };
 
 class ReluGradientFunction : public GradientFunction {
@@ -337,12 +226,11 @@
   ~ReluGradientFunction() override {}
 
  private:
-  long counter;
+  int64_t counter;
   std::vector<AbstractTensorHandle*> forward_outputs;
 };
 
 
-// FIX ZEROSLIKE
 class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction {
  public:
   explicit SparseSoftmaxCrossEntropyLossGradientFunction(
@@ -355,19 +243,23 @@
                  std::vector<AbstractTensorHandle*>* grad_outputs) override {
   
     grad_outputs->resize(2);
-    std::string name = "Identity_Softmax_Grad_A_" + std::to_string(counter);
-    std::vector<AbstractTensorHandle*> id_outputs(1);
-    TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {forward_outputs[1]},
-                                     absl::MakeSpan(id_outputs),
-                                     name.c_str()));
-    (*grad_outputs)[0] = id_outputs[0];
 
+    // Grad for Softmax Input 
+    std::string name = "Mul_Softmax_Grad_" + std::to_string(counter);
+    std::vector<AbstractTensorHandle*> mul_outputs(1);
+    TF_RETURN_IF_ERROR(ops::Mul(ctx->ctx, {grad_inputs[0], forward_outputs[1]},
+                                     absl::MakeSpan(mul_outputs),
+                                     name.c_str())); // upstream_grad * local softmax grad
+    (*grad_outputs)[0] = mul_outputs[0];
+
+    // Grad for labels
     // TODO(amturati): check to see if ZerosLike is ok instead of nullptr
     name = "Zeros_Softmax_Grad_" + std::to_string(counter);
+    std::vector<AbstractTensorHandle*> z_outputs(1);
     TF_RETURN_IF_ERROR(ops::ZerosLike(ctx->ctx, {forward_inputs[1]},
-                                     absl::MakeSpan(id_outputs),
+                                     absl::MakeSpan(z_outputs),
                                      name.c_str()));
-    (*grad_outputs)[1] = id_outputs[0];  // nullptr causes Mangled Stack Trace
+    (*grad_outputs)[1] = z_outputs[0];  // nullptr causes Mangled Stack Trace
 
     counter += 1;
     return Status::OK();
@@ -375,7 +267,7 @@
   ~SparseSoftmaxCrossEntropyLossGradientFunction() override {}
 
  private:
-  long counter;
+  int64_t counter;
   std::vector<AbstractTensorHandle*> forward_inputs;
   std::vector<AbstractTensorHandle*> forward_outputs;
 };
@@ -401,7 +293,7 @@
 }
 
 GradientFunction* MatMulRegisterer(const ForwardOperation& op) {
-  return new MatMulGradientFunction(op.inputs);
+  return new MatMulGradientFunction(op.inputs/*, op.attrs*/);
 }
 
 GradientFunction* ReluRegisterer(const ForwardOperation& op) {
diff --git a/tensorflow/c/experimental/ops/math_ops.cc b/tensorflow/c/experimental/ops/math_ops.cc
index cb63db6..4f408ea 100644
--- a/tensorflow/c/experimental/ops/math_ops.cc
+++ b/tensorflow/c/experimental/ops/math_ops.cc
@@ -73,7 +73,7 @@
 Status MatMul(AbstractContext* ctx,
               absl::Span<AbstractTensorHandle* const> inputs,
               absl::Span<AbstractTensorHandle*> outputs, const char* name,
-              bool transpose_a, bool transpose_b) {
+              bool transpose_a = false, bool transpose_b = false) {
   AbstractOperationPtr matmul_op(ctx->CreateOperation());
   TF_RETURN_IF_ERROR(matmul_op->Reset("MatMul", /*raw_device_name=*/nullptr));
 
@@ -93,5 +93,19 @@
   return Status::OK();
 }
 
+Status Neg(AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+           absl::Span<AbstractTensorHandle*> outputs, const char* name) {
+  AbstractOperationPtr neg_op(ctx->CreateOperation());
+  TF_RETURN_IF_ERROR(neg_op->Reset("Neg", /*raw_device_name=*/nullptr));
+  if (isa<TracingOperation>(neg_op.get())) {
+    TF_RETURN_IF_ERROR(
+        dyn_cast<TracingOperation>(neg_op.get())->SetOpName(name));
+  }
+  TF_RETURN_IF_ERROR(neg_op->AddInput(inputs[0]));
+ 
+  int num_retvals = 1;
+  return neg_op->Execute(outputs, &num_retvals);
+}
+
 }  // namespace ops
 }  // namespace tensorflow
diff --git a/tensorflow/c/experimental/ops/math_ops.h b/tensorflow/c/experimental/ops/math_ops.h
index 8f0f9f5..ed1e6c5 100644
--- a/tensorflow/c/experimental/ops/math_ops.h
+++ b/tensorflow/c/experimental/ops/math_ops.h
@@ -31,6 +31,8 @@
               absl::Span<AbstractTensorHandle* const> inputs,
               absl::Span<AbstractTensorHandle*> outputs, const char* name,
               bool transpose_a, bool transpose_b);
+Status Neg(AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+           absl::Span<AbstractTensorHandle*> outputs, const char* name);
 
 }  // namespace ops
 }  // namespace tensorflow