Speed up creation of tensors from compressed TensorProtos by 2-3x. This should speed up some TF models optimized by Grappler in particular, since Grappler tries to compress all constants in a graph. Run on XXXXX (72 X 2991 MHz CPUs); 2019-09-13T15:55:01.194485871-07:00 CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB Benchmark Base (ns) New (ns) Improvement ------------------------------------------------------------------ BM_FromProto/512 114 116 -1.8% BM_FromProto/4k 692 671 +3.0% BM_FromProto/32k 8675 8713 -0.4% BM_FromProto/256k 183931 184131 -0.1% BM_FromProto/1M 640952 638278 +0.4% BM_FromProtoCompressed/512 215 118 +45.1% BM_FromProtoCompressed/4k 1283 490 +61.8% BM_FromProtoCompressed/32k 14115 8324 +41.0% BM_FromProtoCompressed/256k 76930 32191 +58.2% BM_FromProtoCompressed/1M 326284 170167 +47.8% BM_FromProtoCompressedZero/512 215 119 +44.7% BM_FromProtoCompressedZero/4k 1302 490 +62.4% BM_FromProtoCompressedZero/32k 14333 8160 +43.1% BM_FromProtoCompressedZero/256k 77032 32110 +58.3% BM_FromProtoCompressedZero/1M 329943 171449 +48.0% PiperOrigin-RevId: 269027674

commit: 4bd8a4270638196debca7a139e748bdd1157560f [log] [tgz]
author: A. Unique TensorFlower <gardener@tensorflow.org> Fri Sep 13 19:39:38 2019 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> Fri Sep 13 22:53:10 2019 -0700
tree: bada99bb4e66cd997810d0573a2db07f21d19f7c
parent: df1b3b396bbf7dbb831f0a9066847eb7c08f3d04 [diff]
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index b91c3f6..799945f 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc

@@ -514,8 +514,13 @@
       std::copy_n(begin, n, data);
     } else {
       std::copy_n(begin, in_n, data);
-      const T& last = *(data + in_n - 1);
-      std::fill_n(data + in_n, n - in_n, last);
+      if (std::is_trivially_copyable<T>::value) {
+        const T last = *(data + in_n - 1);
+        std::fill_n(data + in_n, n - in_n, last);
+      } else {
+        const T& last = *(data + in_n - 1);
+        std::fill_n(data + in_n, n - in_n, last);
+      }
     }
   }
 
@@ -648,14 +653,14 @@
 }
 
 void Tensor::CheckType(DataType expected_dtype) const {
-  CHECK_EQ(dtype(), expected_dtype) << " "
-      << DataTypeString(expected_dtype) << " expected, got "
+  CHECK_EQ(dtype(), expected_dtype)
+      << " " << DataTypeString(expected_dtype) << " expected, got "
       << DataTypeString(dtype());
 }
 
 void Tensor::CheckTypeAndIsAligned(DataType expected_dtype) const {
-  CHECK_EQ(dtype(), expected_dtype) << " "
-      << DataTypeString(expected_dtype) << " expected, got "
+  CHECK_EQ(dtype(), expected_dtype)
+      << " " << DataTypeString(expected_dtype) << " expected, got "
       << DataTypeString(dtype());
   CHECK(IsAligned()) << "ptr = " << base<void>();
 }

diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index 01a0971..4658216 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc

@@ -17,6 +17,7 @@
 
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
@@ -1518,5 +1519,59 @@
 }
 BENCHMARK(BM_CreateAndDestroyHostScalarOptimized);
 
+static void BM_FromProto(int iters, int size) {
+  testing::StopTiming();
+  TensorShape shape({size});
+  Allocator* allocator = cpu_allocator();
+  Tensor a(allocator, DT_FLOAT, shape);
+  std::fill_n(a.flat<float>().data(), size, 42.0);
+  TensorProto p;
+  a.AsProtoField(&p);
+  testing::StartTiming();
+  while (--iters) {
+    Tensor b;
+    ASSERT_TRUE(b.FromProto(p));
+  }
+  testing::StopTiming();
+}
+BENCHMARK(BM_FromProto)->Range(1, 1 << 20);
+
+static void BM_FromProtoCompressed(int iters, int size) {
+  testing::StopTiming();
+  TensorShape shape({size});
+  Allocator* allocator = cpu_allocator();
+  Tensor a(allocator, DT_FLOAT, shape);
+  std::fill_n(a.flat<float>().data(), size, 42.0f);
+  TensorProto p;
+  a.AsProtoField(&p);
+  tensor::CompressTensorProtoInPlace(&p);
+  testing::StartTiming();
+  while (--iters) {
+    Tensor b;
+    ASSERT_TRUE(b.FromProto(p));
+  }
+  testing::StopTiming();
+}
+BENCHMARK(BM_FromProtoCompressed)->Range(1, 1 << 20);
+
+static void BM_FromProtoCompressedZero(int iters, int size) {
+  testing::StopTiming();
+  TensorShape shape({size});
+  Allocator* allocator = cpu_allocator();
+  Tensor a(allocator, DT_FLOAT, shape);
+  std::fill_n(a.flat<float>().data(), size, 0);
+  a.flat<float>()(0) = 1;
+  TensorProto p;
+  a.AsProtoField(&p);
+  tensor::CompressTensorProtoInPlace(&p);
+  testing::StartTiming();
+  while (--iters) {
+    Tensor b;
+    ASSERT_TRUE(b.FromProto(p));
+  }
+  testing::StopTiming();
+}
+BENCHMARK(BM_FromProtoCompressedZero)->Range(1, 1 << 20);
+
 }  // namespace
 }  // namespace tensorflow

diff --git a/tensorflow/core/framework/tensor_util.cc b/tensorflow/core/framework/tensor_util.cc
index 896d83f..cff33ac 100644
--- a/tensorflow/core/framework/tensor_util.cc
+++ b/tensorflow/core/framework/tensor_util.cc

@@ -243,6 +243,12 @@
     }
     tensor->clear_tensor_content();
   }
+  if (new_num_values == 1) {
+    const T value = TypeHelper::GetValue(0, *tensor);
+    if (value == T()) {
+      TypeHelper::Truncate(0, tensor);
+    }
+  }
   return true;
 }
 
@@ -287,7 +293,8 @@
       last_index = i + 1;
     }
   }
-  const int64 num_truncated_proto_values = last_index + 1;
+  const int64 num_truncated_proto_values =
+      (last_value == T() && last_index == 0) ? 0 : last_index + 1;
   const int64 num_bytes_as_field =
       num_truncated_proto_values * sizeof(FieldType);
   const int64 num_bytes_as_tensor_content = num_tensor_values * sizeof(T);

diff --git a/tensorflow/core/framework/tensor_util_test.cc b/tensorflow/core/framework/tensor_util_test.cc
index fe98801..afb5b15 100644
--- a/tensorflow/core/framework/tensor_util_test.cc
+++ b/tensorflow/core/framework/tensor_util_test.cc

@@ -455,43 +455,83 @@
   EXPECT_FALSE(tensor::CompressTensorProtoInPlace(&tensor_proto));
 }
 
-TEST(TensorProtoUtil, CompressTensorProtoInPlaceAllEqual) {
+TEST(TensorProtoUtil, CompressTensorProtoInPlaceAllZero) {
   const int kLength = 64;
   TensorProto tensor_proto =
       tensor::CreateTensorProto(std::vector<float>(kLength), {kLength});
   EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
   EXPECT_EQ(tensor::internal::TensorProtoHelper<float>::NumValues(tensor_proto),
-            1);
+            0);
 
   tensor_proto =
       tensor::CreateTensorProto(std::vector<int>(kLength), {kLength});
   EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
   EXPECT_EQ(tensor::internal::TensorProtoHelper<int>::NumValues(tensor_proto),
-            1);
+            0);
 
   tensor_proto =
       tensor::CreateTensorProto(std::vector<uint8>(kLength), {kLength});
   EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
   EXPECT_EQ(tensor::internal::TensorProtoHelper<uint8>::NumValues(tensor_proto),
-            1);
+            0);
   tensor_proto =
       tensor::CreateTensorProto(std::vector<bool>(kLength), {kLength});
   EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
   EXPECT_EQ(tensor::internal::TensorProtoHelper<bool>::NumValues(tensor_proto),
-            1);
+            0);
 
   tensor_proto =
       tensor::CreateTensorProto(std::vector<Eigen::half>(kLength), {kLength});
   EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
   EXPECT_EQ(
       tensor::internal::TensorProtoHelper<Eigen::half>::NumValues(tensor_proto),
-      1);
+      0);
 
   tensor_proto = tensor::CreateTensorProto(
       std::vector<std::complex<float>>(kLength), {kLength});
   EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
   EXPECT_EQ(tensor::internal::TensorProtoHelper<std::complex<float>>::NumValues(
                 tensor_proto),
+            0);
+}
+
+TEST(TensorProtoUtil, CompressTensorProtoInPlaceAllOnes) {
+  const int kLength = 64;
+  TensorProto tensor_proto =
+      tensor::CreateTensorProto(std::vector<float>(kLength, 1), {kLength});
+  EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
+  EXPECT_EQ(tensor::internal::TensorProtoHelper<float>::NumValues(tensor_proto),
+            1);
+
+  tensor_proto =
+      tensor::CreateTensorProto(std::vector<int>(kLength, 1), {kLength});
+  EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
+  EXPECT_EQ(tensor::internal::TensorProtoHelper<int>::NumValues(tensor_proto),
+            1);
+
+  tensor_proto =
+      tensor::CreateTensorProto(std::vector<uint8>(kLength, 1), {kLength});
+  EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
+  EXPECT_EQ(tensor::internal::TensorProtoHelper<uint8>::NumValues(tensor_proto),
+            1);
+  tensor_proto =
+      tensor::CreateTensorProto(std::vector<bool>(kLength, true), {kLength});
+  EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
+  EXPECT_EQ(tensor::internal::TensorProtoHelper<bool>::NumValues(tensor_proto),
+            1);
+
+  tensor_proto = tensor::CreateTensorProto(
+      std::vector<Eigen::half>(kLength, Eigen::half(1.0)), {kLength});
+  EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
+  EXPECT_EQ(
+      tensor::internal::TensorProtoHelper<Eigen::half>::NumValues(tensor_proto),
+      1);
+
+  tensor_proto = tensor::CreateTensorProto(
+      std::vector<std::complex<float>>(kLength, 1), {kLength});
+  EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
+  EXPECT_EQ(tensor::internal::TensorProtoHelper<std::complex<float>>::NumValues(
+                tensor_proto),
             1);
 }
 

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
index 0b40363..42dd636 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc

@@ -991,8 +991,7 @@
       found_zeros = true;
       EXPECT_EQ(node.op(), "Const");
       const TensorProto& zeroes_t = node.attr().at("value").tensor();
-      EXPECT_EQ(zeroes_t.float_val_size(), 1);
-      EXPECT_EQ(zeroes_t.float_val(0), 0.0f);
+      EXPECT_EQ(zeroes_t.float_val_size(), 0);
     } else if (node.name() == "host_ones") {
       found_host_ones = true;
       EXPECT_EQ(node.op(), "HostConst");
commit	4bd8a4270638196debca7a139e748bdd1157560f	[log] [tgz]
author	A. Unique TensorFlower <gardener@tensorflow.org>	Fri Sep 13 19:39:38 2019 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	Fri Sep 13 22:53:10 2019 -0700
tree	bada99bb4e66cd997810d0573a2db07f21d19f7c
parent	df1b3b396bbf7dbb831f0a9066847eb7c08f3d04 [diff]