Add int16 support for tf.math.minimum and tf.math.maximum

I was working on lossless audio data which is primary int16 PCM data,
when trying to do a simple minimum to my surprise I noticed that in16
is not supported:
```
>>> x = tf.constant(1, tf.int16)
>>> y = tf.constant(2, tf.int16)
>>> tf.math.minimum(x, y)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Library/Python/3.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 5961, in minimum
    _ops.raise_from_not_ok_status(e, name)
  File "/Library/Python/3.7/site-packages/tensorflow/python/framework/ops.py", line 6653, in raise_from_not_ok_status
    six.raise_from(core._status_to_exception(e.code, message), None)
  File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.NotFoundError: Could not find valid device for node.
Node:{{node Minimum}}
All kernels registered for op Minimum :
  device='XLA_CPU_JIT'; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_BFLOAT16, DT_HALF]
  device='CPU'; T in [DT_FLOAT]
  device='CPU'; T in [DT_HALF]
  device='CPU'; T in [DT_BFLOAT16]
  device='CPU'; T in [DT_DOUBLE]
  device='CPU'; T in [DT_INT32]
  device='CPU'; T in [DT_INT64]
  device='XLA_CPU'; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_BFLOAT16, DT_HALF]
 [Op:Minimum]
```

As such I used a workardound which looks quite awkward:
```
>>> x = tf.constant(1, tf.int16)
>>> y = tf.constant(2, tf.int16)
>>> z = tf.stack([x, y])
>>> tf.math.reduce_min(z)
<tf.Tensor: shape=(), dtype=int16, numpy=1>
>>>
```

This PR adds int16 support for `tf.math.minimum` and `tf.math.maximum`.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
diff --git a/tensorflow/core/kernels/cwise_op_maximum.cc b/tensorflow/core/kernels/cwise_op_maximum.cc
index 347022c..24d90d1 100644
--- a/tensorflow/core/kernels/cwise_op_maximum.cc
+++ b/tensorflow/core/kernels/cwise_op_maximum.cc
@@ -16,8 +16,8 @@
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "Maximum", functor::maximum, float, Eigen::half,
-          bfloat16, double, int32, int64);
+REGISTER7(BinaryOp, CPU, "Maximum", functor::maximum, float, Eigen::half,
+          bfloat16, double, int16, int32, int64);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "Maximum", functor::maximum, float, Eigen::half,
           double, int64);
diff --git a/tensorflow/core/kernels/cwise_op_minimum.cc b/tensorflow/core/kernels/cwise_op_minimum.cc
index 15491b8..32315f7 100644
--- a/tensorflow/core/kernels/cwise_op_minimum.cc
+++ b/tensorflow/core/kernels/cwise_op_minimum.cc
@@ -16,8 +16,8 @@
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "Minimum", functor::minimum, float, Eigen::half,
-          bfloat16, double, int32, int64);
+REGISTER7(BinaryOp, CPU, "Minimum", functor::minimum, float, Eigen::half,
+          bfloat16, double, int16, int32, int64);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "Minimum", functor::minimum, float, Eigen::half,
           double, int64);
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index e441c73..b55772e 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -549,7 +549,7 @@
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {bfloat16, half, float, double, int32, int64}")
+    .Attr("T: {bfloat16, half, float, double, int16, int32, int64}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 // Note: This op is not commutative w.r.t. to all its inputs.
@@ -573,7 +573,7 @@
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {bfloat16, half, float, double, int32, int64}")
+    .Attr("T: {bfloat16, half, float, double, int16, int32, int64}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Mod")