Optimize TF_Output construction when building `tf.Tensor` objects.

Each `tf.Tensor` caches a SWIG-wrapped `TF_Output` object to map that tensor to the C API's graph. We currently lazily cache this object in `tf.Tensor` on its first consumption, but it turns out that we already build the object for all tensors as a by-product of calculating tensor types. This change saves the original object in each `tf.Tensor` when it is constructed, which decreases the overhead of graph construction.

PiperOrigin-RevId: 265484399
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 5b671cf..ced541c 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -394,6 +394,12 @@
     self._id = uid()
     self._name = None
 
+  @staticmethod
+  def _create_with_tf_output(op, value_index, dtype, tf_output):
+    ret = Tensor(op, value_index, dtype)
+    ret._tf_output = tf_output
+    return ret
+
   @property
   def op(self):
     """The `Operation` that produces this tensor as an output."""
@@ -1774,14 +1780,12 @@
 
     # Initialize self._outputs.
     num_outputs = c_api.TF_OperationNumOutputs(self._c_op)
-    output_types = [
-        c_api.TF_OperationOutputType(c_api_util.tf_output(self._c_op, i))
-        for i in range(num_outputs)
-    ]
-    self._outputs = [
-        Tensor(self, i, output_type)
-        for i, output_type in enumerate(output_types)
-    ]
+    self._outputs = []
+    for i in range(num_outputs):
+      tf_output = c_api_util.tf_output(self._c_op, i)
+      output_type = c_api.TF_OperationOutputType(tf_output)
+      tensor = Tensor._create_with_tf_output(self, i, output_type, tf_output)  # pylint: disable=protected-access
+      self._outputs.append(tensor)
 
     self._graph._add_op(self)  # pylint: disable=protected-access