Internal change
PiperOrigin-RevId: 346924792
Change-Id: I835b6685484806dc9bebc462659013bcacde508b
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index c131307..5ff3f31 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -497,6 +497,11 @@
CallFrameInterface* call_frame, ExecutorsAndKeys* executors_and_keys,
RunMetadata* run_metadata,
const thread::ThreadPoolOptions& threadpool_options) {
+ // This is a temporary flag for controlling whether to always track the kernel
+ // execution cost. We will remove this once the feature is validated.
+ if (run_options.experimental().always_track_kernel_execution_cost())
+ EnableAlwaysTrackKernelExecutionCost();
+
const uint64 start_time_usecs = options_.env->NowMicros();
const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1);
RunState run_state(step_id, &devices_);
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 03c23f3..443d588 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -73,6 +73,13 @@
#include "tensorflow/core/util/tensor_slice_reader_cache.h"
namespace tensorflow {
+
+// Temporary flag for controlling whether to always track kernel execution
+// costs.
+static bool always_track_kernel_execution_cost = false;
+void EnableAlwaysTrackKernelExecutionCost() {
+ always_track_kernel_execution_cost = true;
+}
namespace {
// 1-D, 0 element tensor.
@@ -179,12 +186,6 @@
// Updates the dynamic cost estimate, which is used to determine whether the
// given node is expensive. The new cost estimate is a weighted average of
// the old cost estimate and the latest cost.
- //
- // NOTE: We currently only expect updates to the cost estimate when
- // `is_expensive_[node.node_id]` is true (or at least, it *was* true, when
- // we started to execute the kernel. As a result, we expect that a kernel
- // can only ever transition from "expensive" to "inexpensive", but not vice
- // versa.
void UpdateCostEstimate(const NodeItem& node, uint64 elapsed_cycles) {
// N.B. Updates to `cost_estimate` are atomic but unlocked. Simultaneous
// updates may result in one or more updates being ignored. This does not
@@ -195,9 +196,10 @@
kCostDecay +
(elapsed_cycles / kCostDecay);
cost_estimate.store(new_estimate, std::memory_order_relaxed);
- if (new_estimate < kOpIsExpensiveThresholdCycles) {
- is_expensive_[node.node_id].store(false, std::memory_order_relaxed);
- }
+
+ bool new_is_expensive = (new_estimate >= kOpIsExpensiveThresholdCycles);
+ is_expensive_[node.node_id].store(new_is_expensive,
+ std::memory_order_relaxed);
}
private:
@@ -573,6 +575,15 @@
KernelTimer timer;
device->Compute(op_kernel, &ctx);
kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
+ } else if (always_track_kernel_execution_cost) {
+ KernelTimer timer;
+ device->Compute(op_kernel, &ctx);
+ // If always_track_kernel_execution_cost is set, update the cost estimate
+ // for inexpensive kernels with ~1/8 probability. This assumes that the
+ // last 3 bits of the CPU cycle count is uniformly distributed.
+ constexpr int kKernelExecutionTrackingInvocationSkipCount = 8;
+ if (timer.start_cycles % kKernelExecutionTrackingInvocationSkipCount == 0)
+ kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
} else {
device->Compute(op_kernel, &ctx);
}
diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h
index d590ae0..d8ea85f 100644
--- a/tensorflow/core/common_runtime/executor.h
+++ b/tensorflow/core/common_runtime/executor.h
@@ -33,6 +33,15 @@
class StepStatsCollector;
+// If this is called, we will sample execution cost for "inexpensive" kernels
+// and switch them to "expensive" when the estimated cost exceeds expensive-ness
+// threshold.
+// This is a temporary flag for validating the performance impact of
+// this feature. For simplicity, a global flag is used and once the flag
+// is turned on, it cannot be turned off. We will remove this flag once this
+// feature is validated.
+void EnableAlwaysTrackKernelExecutionCost();
+
// Executor runs a graph computation.
// Example:
// Graph* graph = ...;
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index 9b50d5e..569fe92 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -695,6 +695,12 @@
int64 priority = 1;
}
RunHandlerPoolOptions run_handler_pool_options = 3;
+
+ // If true, always track kernel execution cost. This allows the executor to
+ // transit kernels from "inexpensive" to "expensive" during the execution.
+ // This is a temporary flag for validating this feature. We will remove this
+ // flag once the feature is validated.
+ bool always_track_kernel_execution_cost = 4;
}
Experimental experimental = 8;
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
index 913d82f..9b2b7f7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
@@ -21,6 +21,12 @@
type: TYPE_MESSAGE
type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
}
+ field {
+ name: "always_track_kernel_execution_cost"
+ number: 4
+ label: LABEL_OPTIONAL
+ type: TYPE_BOOL
+ }
nested_type {
name: "RunHandlerPoolOptions"
field {
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
index 9020b61..d250ba3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
@@ -68,6 +68,12 @@
type: TYPE_MESSAGE
type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
}
+ field {
+ name: "always_track_kernel_execution_cost"
+ number: 4
+ label: LABEL_OPTIONAL
+ type: TYPE_BOOL
+ }
nested_type {
name: "RunHandlerPoolOptions"
field {