Support float16 in Range and Linspace.
PiperOrigin-RevId: 275572836
Change-Id: I7987a3bdfdefe0e29f50bd50e044cdf4815de6d5
diff --git a/tensorflow/core/framework/ops_util.h b/tensorflow/core/framework/ops_util.h
index 7e4f169..feaab10 100644
--- a/tensorflow/core/framework/ops_util.h
+++ b/tensorflow/core/framework/ops_util.h
@@ -111,44 +111,6 @@
return strides;
}
-namespace internal {
-// Overloads to circumvent the problem that the C++ standard library does not
-// support half and Eigen does not support bfloat16.
-template <typename T>
-inline T tceil(const T& x) {
- return std::ceil(x);
-}
-
-template <>
-inline Eigen::half tceil(const Eigen::half& x) {
- return Eigen::numext::ceil(x);
-}
-} // namespace internal
-
-// Returns the number of elements generated by RangeOp with the given
-// start, limit, and delta arguments and type T.
-template <typename T>
-Status RangeSize(T start, T limit, T delta, int64* size) {
- if (start > limit && delta > T(0)) {
- return errors::InvalidArgument(
- "Requires start <= limit when delta > 0: ", start, "/", limit);
- }
- if (start < limit && delta < T(0)) {
- return errors::InvalidArgument(
- "Requires start >= limit when delta < 0: ", start, "/", limit);
- }
- if (delta == T(0)) {
- return errors::InvalidArgument("Requires delta != 0");
- }
- const T abs_size = delta > T(0) ? limit - start : start - limit;
- const T abs_delta = delta > T(0) ? delta : -delta;
- *size = std::is_integral<T>::value
- ? static_cast<int64>((abs_size + abs_delta - T(1)) / abs_delta)
- : static_cast<int64>(internal::tceil<T>(abs_size / abs_delta));
-
- return Status::OK();
-}
-
} // namespace tensorflow
#endif // TENSORFLOW_CORE_FRAMEWORK_OPS_UTIL_H_
diff --git a/tensorflow/core/kernels/sequence_ops.cc b/tensorflow/core/kernels/sequence_ops.cc
index 4d54c9c..02dcc1e 100644
--- a/tensorflow/core/kernels/sequence_ops.cc
+++ b/tensorflow/core/kernels/sequence_ops.cc
@@ -18,7 +18,6 @@
#include <cmath>
#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/ops_util.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
@@ -49,8 +48,23 @@
const T start = start_in.scalar<T>()();
const T limit = limit_in.scalar<T>()();
const T delta = delta_in.scalar<T>()();
- int64 size;
- OP_REQUIRES_OK(context, RangeSize<T>(start, limit, delta, &size));
+ OP_REQUIRES(context, delta != 0,
+ errors::InvalidArgument("Requires delta != 0: ", delta));
+ if (delta > 0) {
+ OP_REQUIRES(
+ context, start <= limit,
+ errors::InvalidArgument(
+ "Requires start <= limit when delta > 0: ", start, "/", limit));
+ } else {
+ OP_REQUIRES(
+ context, start >= limit,
+ errors::InvalidArgument(
+ "Requires start >= limit when delta < 0: ", start, "/", limit));
+ }
+ int64 size = (std::is_integral<T>::value
+ ? ((std::abs(limit - start) + std::abs(delta) - 1) /
+ std::abs(delta))
+ : std::ceil(std::abs((limit - start) / delta)));
Tensor* out = nullptr;
OP_REQUIRES_OK(context,
context->allocate_output(0, TensorShape({size}), &out));
@@ -84,7 +98,6 @@
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
-TF_CALL_half(REGISTER_CPU_KERNEL);
TF_CALL_float(REGISTER_CPU_KERNEL);
TF_CALL_double(REGISTER_CPU_KERNEL);
TF_CALL_int32(REGISTER_CPU_KERNEL);
@@ -92,7 +105,6 @@
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-TF_CALL_half(REGISTER_GPU_KERNEL);
TF_CALL_float(REGISTER_GPU_KERNEL);
TF_CALL_double(REGISTER_GPU_KERNEL);
TF_CALL_int32(REGISTER_GPU_KERNEL);
@@ -134,8 +146,7 @@
flat(0) = start;
if (num > 1) {
const T step = (stop - start) / (num - 1);
- for (Tnum i = 1; i < num - 1; ++i)
- flat(i) = start + step * static_cast<T>(i);
+ for (Tnum i = 1; i < num - 1; ++i) flat(i) = start + step * i;
// Ensure final value == stop; float arithmetic won't guarantee this.
flat(num - 1) = stop;
}
@@ -158,14 +169,12 @@
REGISTER_KERNEL(dev, T, int64)
#define REGISTER_CPU_KERNEL(T) REGISTER_KERNEL_ALL_NUMS(DEVICE_CPU, T)
-TF_CALL_half(REGISTER_CPU_KERNEL);
TF_CALL_float(REGISTER_CPU_KERNEL);
TF_CALL_double(REGISTER_CPU_KERNEL);
// NOTE(touts): We register the op on GPU but it still runs on CPU
// because its inputs and outputs are tagged as HostMemory.
#define REGISTER_GPU_KERNEL(T) REGISTER_KERNEL_ALL_NUMS(DEVICE_GPU, T)
-TF_CALL_half(REGISTER_GPU_KERNEL);
TF_CALL_float(REGISTER_GPU_KERNEL);
TF_CALL_double(REGISTER_GPU_KERNEL);
#undef REGISTER_GPU_KERNEL
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 0654f15..7d6aedb 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -16,7 +16,6 @@
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/ops_util.h"
#include "tensorflow/core/framework/shape_inference.h"
namespace tensorflow {
@@ -1390,14 +1389,27 @@
namespace {
template <typename T>
-Status SetRangeSizeFromTensors(const Tensor& start_t, const Tensor& limit_t,
- const Tensor& delta_t,
- InferenceContext* const c) {
- const T start = start_t.scalar<T>()();
- const T limit = limit_t.scalar<T>()();
- const T delta = delta_t.scalar<T>()();
- int64 size;
- TF_RETURN_IF_ERROR(RangeSize(start, limit, delta, &size));
+Status RangeSize(const Tensor* start_t, const Tensor* limit_t,
+ const Tensor* delta_t, InferenceContext* const c) {
+ T start = start_t->scalar<T>()();
+ T limit = limit_t->scalar<T>()();
+ T delta = delta_t->scalar<T>()();
+ if (start > limit && delta > T(0)) {
+ return errors::InvalidArgument(
+ "Requires start <= limit when delta > 0: ", start, "/", limit);
+ }
+ if (start < limit && delta < T(0)) {
+ return errors::InvalidArgument(
+ "Requires start >= limit when delta < 0: ", start, "/", limit);
+ }
+ if (delta == T(0)) {
+ return errors::InvalidArgument("Requires delta != 0");
+ }
+
+ auto size = (std::is_integral<T>::value
+ ? ((std::abs(limit - start) + std::abs(delta) - T(1)) /
+ std::abs(delta))
+ : (std::ceil(std::abs((limit - start) / delta))));
c->set_output(0, c->Vector(static_cast<int64>(size)));
return Status::OK();
}
@@ -1409,7 +1421,7 @@
.Input("limit: Tidx")
.Input("delta: Tidx")
.Output("output: Tidx")
- .Attr("Tidx: {bfloat16, half, float, double, int32, int64} = DT_INT32")
+ .Attr("Tidx: {bfloat16, float, double, int32, int64} = DT_INT32")
.SetShapeFn([](InferenceContext* c) {
ShapeHandle unused;
TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(c->input(0), 0, &unused),
@@ -1428,19 +1440,15 @@
return Status::OK();
}
if (dtype == DT_INT32) {
- return SetRangeSizeFromTensors<int32>(*start_t, *limit_t, *delta_t, c);
+ return RangeSize<int32>(start_t, limit_t, delta_t, c);
} else if (dtype == DT_INT64) {
- return SetRangeSizeFromTensors<int64>(*start_t, *limit_t, *delta_t, c);
+ return RangeSize<int64>(start_t, limit_t, delta_t, c);
} else if (dtype == DT_FLOAT) {
- return SetRangeSizeFromTensors<float>(*start_t, *limit_t, *delta_t, c);
+ return RangeSize<float>(start_t, limit_t, delta_t, c);
} else if (dtype == DT_DOUBLE) {
- return SetRangeSizeFromTensors<double>(*start_t, *limit_t, *delta_t, c);
+ return RangeSize<double>(start_t, limit_t, delta_t, c);
} else if (dtype == DT_BFLOAT16) {
- return SetRangeSizeFromTensors<bfloat16>(*start_t, *limit_t, *delta_t,
- c);
- } else if (dtype == DT_HALF) {
- return SetRangeSizeFromTensors<Eigen::half>(*start_t, *limit_t,
- *delta_t, c);
+ return RangeSize<bfloat16>(start_t, limit_t, delta_t, c);
} else {
return errors::InvalidArgument("Unsupported dtype", dtype);
}
@@ -1452,7 +1460,7 @@
.Input("stop: T")
.Input("num: Tidx")
.Output("output: T")
- .Attr("T: {bfloat16, half, float, double}")
+ .Attr("T: {bfloat16, float, double}")
.Attr("Tidx: {int32, int64} = DT_INT32")
.SetShapeFn([](InferenceContext* c) {
ShapeHandle unused;
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index c2d1ce7..3822b4b 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -457,61 +457,63 @@
# TODO(vrv): move to sequence_ops_test?
class RangeTest(test.TestCase):
- def _Range(self, start, limit=None, delta=1, expected=None):
- expected = expected or []
- for dtype in [np.int32, np.int64, np.float32, np.float64, np.float16]:
- with self.session(use_gpu=True):
- cast_start = math_ops.cast(start, dtype=dtype)
- if limit is None:
- tf_ans = math_ops.range(cast_start, dtype=dtype, name="range")
- else:
- if dtype in [np.int32, np.int64] and np.floor(delta) != delta:
- continue
- cast_limit = math_ops.cast(limit, dtype=dtype)
- cast_delta = math_ops.cast(delta, dtype=dtype)
- tf_ans = math_ops.range(
- cast_start, cast_limit, cast_delta, name="range")
- tf_val = self.evaluate(tf_ans)
- self.assertAllClose(tf_val, np.array(expected, dtype=dtype))
+ def _Range(self, start, limit, delta):
+ with self.cached_session(use_gpu=True):
+ tf_ans = math_ops.range(start, limit, delta, name="range")
+ self.assertEqual([len(np.arange(start, limit, delta))],
+ tf_ans.get_shape())
+ return self.evaluate(tf_ans)
def testBasic(self):
- self._Range(0, 5, 1, [0, 1, 2, 3, 4])
- self._Range(0, 5, 2, [0, 2, 4])
- self._Range(0, 6, 2, [0, 2, 4])
- self._Range(13, 32, 7, [13, 20, 27])
- self._Range(100, 500, 100, [100, 200, 300, 400])
+ self.assertTrue(
+ np.array_equal(self._Range(0, 5, 1), np.array([0, 1, 2, 3, 4])))
+ self.assertTrue(np.array_equal(self._Range(0, 5, 2), np.array([0, 2, 4])))
+ self.assertTrue(np.array_equal(self._Range(0, 6, 2), np.array([0, 2, 4])))
+ self.assertTrue(
+ np.array_equal(self._Range(13, 32, 7), np.array([13, 20, 27])))
+ self.assertTrue(
+ np.array_equal(
+ self._Range(100, 500, 100), np.array([100, 200, 300, 400])))
+ self.assertEqual(math_ops.range(0, 5, 1).dtype, dtypes.int32)
@test_util.run_deprecated_v1
def testLimitOnly(self):
- self._Range(5, expected=[0, 1, 2, 3, 4])
+ with self.session(use_gpu=True):
+ self.assertAllEqual(np.arange(5), math_ops.range(5).eval())
def testEmpty(self):
for start in 0, 5:
- self._Range(start, start, 1, [])
+ self.assertTrue(np.array_equal(self._Range(start, start, 1), []))
- def testNonIntegerDelta(self):
- self._Range(0, 2, 0.5, [0, 0.5, 1, 1.5])
- self._Range(0, 5, 2.5, [0, 2.5])
- self._Range(0, 3, 0.9, [0, 0.9, 1.8, 2.7])
- self._Range(100., 500., 100., [100, 200, 300, 400])
+ def testNonInteger(self):
+ self.assertTrue(
+ np.allclose(self._Range(0, 2, 0.5), np.array([0, 0.5, 1, 1.5])))
+ self.assertTrue(np.allclose(self._Range(0, 5, 2.5), np.array([0, 2.5])))
+ self.assertTrue(
+ np.allclose(self._Range(0, 3, 0.9), np.array([0, 0.9, 1.8, 2.7])))
+ self.assertTrue(
+ np.allclose(
+ self._Range(100., 500., 100.), np.array([100, 200, 300, 400])))
+ self.assertEqual(math_ops.range(0., 5., 1.).dtype, dtypes.float32)
def testNegativeDelta(self):
- self._Range(5, -1, -1, [5, 4, 3, 2, 1, 0])
- self._Range(2.5, 0, -0.5, [2.5, 2, 1.5, 1, 0.5])
- self._Range(-5, -10, -3, [-5, -8])
+ self.assertTrue(
+ np.array_equal(self._Range(5, -1, -1), np.array([5, 4, 3, 2, 1, 0])))
+ self.assertTrue(
+ np.allclose(self._Range(2.5, 0, -0.5), np.array([2.5, 2, 1.5, 1, 0.5])))
+ self.assertTrue(
+ np.array_equal(self._Range(-5, -10, -3), np.array([-5, -8])))
def testDType(self):
zero_int32 = math_ops.cast(0, dtypes.int32)
zero_int64 = math_ops.cast(0, dtypes.int64)
zero_float32 = math_ops.cast(0, dtypes.float32)
zero_float64 = math_ops.cast(0, dtypes.float64)
- zero_half = math_ops.cast(0, dtypes.half)
self.assertEqual(math_ops.range(zero_int32, 0, 1).dtype, dtypes.int32)
self.assertEqual(math_ops.range(zero_int64, 0, 1).dtype, dtypes.int64)
self.assertEqual(math_ops.range(zero_float32, 0, 1).dtype, dtypes.float32)
self.assertEqual(math_ops.range(zero_float64, 0, 1).dtype, dtypes.float64)
- self.assertEqual(math_ops.range(zero_half, 0, 1).dtype, dtypes.half)
self.assertEqual(
math_ops.range(zero_int32, zero_int64, 1).dtype, dtypes.int64)
@@ -545,52 +547,66 @@
else:
return [False]
- def _test_linspace(self, start, stop, num, expected):
- for idx_type in [np.int32, np.int64]:
- for dtype in [np.float32, np.float64, np.float16]:
- with ops.Graph().as_default() as graph:
- with self.session(graph=graph, force_gpu=self.force_gpu):
- cast_start = math_ops.cast(start, dtype=dtype)
- cast_stop = math_ops.cast(stop, dtype=dtype)
- cast_num = math_ops.cast(num, dtype=idx_type)
- tf_ans = math_ops.linspace(
- cast_start, cast_stop, cast_num, name="linspace")
- self.assertEqual([num], tf_ans.get_shape())
- tf_val = self.evaluate(tf_ans).astype(dtype)
- cast_expected = np.array(expected, dtype=dtype)
- tol = 1e-3 if dtype == np.float16 else 1e-6
- self.assertAllClose(tf_val, cast_expected, rtol=tol)
- # Endpoints should be exact.
- self.assertEqual(tf_val[0], cast_expected[0])
- self.assertEqual(tf_val[-1], cast_expected[-1])
+ def _LinSpace(self, start, stop, num):
+ with ops.Graph().as_default() as graph:
+ with self.session(graph=graph, force_gpu=self.force_gpu):
+ tf_ans = math_ops.linspace(start, stop, num, name="linspace")
+ self.assertEqual([num], tf_ans.get_shape())
+ return self.evaluate(tf_ans)
def testPositive(self):
for self.force_gpu in self._gpu_modes():
- self._test_linspace(1., 5., 1, [1.])
- self._test_linspace(1., 5., 2, [1., 5.])
- self._test_linspace(1., 5., 3, [1., 3., 5.])
- self._test_linspace(1., 5., 4, [1., 7. / 3., 11. / 3., 5.])
+ self.assertArrayNear(self._LinSpace(1., 5., 1), np.array([1.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(1., 5., 2), np.array([1., 5.]), 1e-5)
+ self.assertArrayNear(
+ self._LinSpace(1., 5., 3), np.array([1., 3., 5.]), 1e-5)
+ self.assertArrayNear(
+ self._LinSpace(1., 5., 4), np.array([1., 7. / 3., 11. / 3., 5.]),
+ 1e-5)
def testNegative(self):
for self.force_gpu in self._gpu_modes():
- self._test_linspace(-1., -5., 1, [-1.])
- self._test_linspace(-1., -5., 2, [-1., -5.])
- self._test_linspace(-1., -5., 3, [-1., -3., -5.])
- self._test_linspace(-1., -5., 4, [-1., -7. / 3., -11. / 3., -5.])
+ self.assertArrayNear(self._LinSpace(-1., -5., 1), np.array([-1.]), 1e-5)
+ self.assertArrayNear(
+ self._LinSpace(-1., -5., 2), np.array([-1., -5.]), 1e-5)
+ self.assertArrayNear(
+ self._LinSpace(-1., -5., 3), np.array([-1., -3., -5.]), 1e-5)
+ self.assertArrayNear(
+ self._LinSpace(-1., -5., 4),
+ np.array([-1., -7. / 3., -11. / 3., -5.]), 1e-5)
def testNegativeToPositive(self):
for self.force_gpu in self._gpu_modes():
- self._test_linspace(-1., 5., 1, [-1.])
- self._test_linspace(-1., 5., 2, [-1., 5.])
- self._test_linspace(-1., 5., 3, [-1., 2., 5.])
- self._test_linspace(-1., 5., 4, [-1., 1., 3., 5.])
+ self.assertArrayNear(self._LinSpace(-1., 5., 1), np.array([-1.]), 1e-5)
+ self.assertArrayNear(
+ self._LinSpace(-1., 5., 2), np.array([-1., 5.]), 1e-5)
+ self.assertArrayNear(
+ self._LinSpace(-1., 5., 3), np.array([-1., 2., 5.]), 1e-5)
+ self.assertArrayNear(
+ self._LinSpace(-1., 5., 4), np.array([-1., 1., 3., 5.]), 1e-5)
def testPoint(self):
for self.force_gpu in self._gpu_modes():
- self._test_linspace(5., 5., 1, [5.])
- self._test_linspace(5., 5., 2, [5.] * 2)
- self._test_linspace(5., 5., 3, [5.] * 3)
- self._test_linspace(5., 5., 4, [5.] * 4)
+ self.assertArrayNear(self._LinSpace(5., 5., 1), np.array([5.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(5., 5., 2), np.array([5.] * 2), 1e-5)
+ self.assertArrayNear(self._LinSpace(5., 5., 3), np.array([5.] * 3), 1e-5)
+ self.assertArrayNear(self._LinSpace(5., 5., 4), np.array([5.] * 4), 1e-5)
+
+ def testEndpointsAreExact(self):
+ for self.force_gpu in self._gpu_modes():
+ # Test some cases that produce last values not equal to "stop" when
+ # computed via start + (num - 1) * ((stop - start) / (num - 1)), since
+ # float arithmetic will introduce error through precision loss.
+ self.assertAllEqual(
+ self._LinSpace(0., 1., 42)[[0, -1]], np.array([0., 1.], np.float32))
+ self.assertAllEqual(
+ self._LinSpace(-1., 0., 42)[[0, -1]], np.array([-1., 0.], np.float32))
+ self.assertAllEqual(
+ self._LinSpace(.1, .2, 4)[[0, -1]], np.array([.1, .2], np.float32))
+ # Check a case for float64 error too.
+ self.assertAllEqual(
+ self._LinSpace(np.array(0., np.float64), .1, 12)[[0, -1]],
+ np.array([0., .1], np.float64))
class DeviceTest(test.TestCase):
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 547e8ba..e80514a 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -1432,8 +1432,7 @@
# infer dtype if not explicitly provided
if dtype is None:
dtype_hierarchy = [
- dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64,
- dtypes.half
+ dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
]
assert all(arg.dtype in dtype_hierarchy for arg in [start, limit, delta])
inferred_dtype = max([arg.dtype for arg in [start, limit, delta]],