blob: e43711fdcf2f544627ecf57e356f014a17a18660 [file] [log] [blame]
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "tensorflow/core/common_runtime/direct_session.h"
#include "tensorflow/core/common_runtime/step_stats_collector.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_testutil.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/graph/testlib.h"
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/profiler/internal/profiler_interface.h"
#include "tensorflow/core/public/session_options.h"
#include "tensorflow/core/util/device_name_utils.h"
namespace tensorflow {
#if GOOGLE_CUDA
std::unique_ptr<profiler::ProfilerInterface> CreateDeviceTracer();
#else
// We don't have device tracer for non-cuda case.
std::unique_ptr<profiler::ProfilerInterface> CreateDeviceTracer() {
return nullptr;
}
#endif
namespace {
std::unique_ptr<Session> CreateSession() {
SessionOptions options;
(*options.config.mutable_device_count())["CPU"] = 1;
(*options.config.mutable_device_count())["GPU"] = 1;
options.config.set_allow_soft_placement(true);
return std::unique_ptr<Session>(NewSession(options));
}
class DeviceTracerTest : public ::testing::Test {
public:
void Initialize(std::initializer_list<float> a_values) {
Graph graph(OpRegistry::Global());
Tensor a_tensor(DT_FLOAT, TensorShape({2, 2}));
test::FillValues<float>(&a_tensor, a_values);
Node* a = test::graph::Constant(&graph, a_tensor);
a->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0");
Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
test::FillValues<float>(&x_tensor, {1, 1});
Node* x = test::graph::Constant(&graph, x_tensor);
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
x_ = x->name();
// y = A * x
Node* y = test::graph::Matmul(&graph, a, x, false, false);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
y_ = y->name();
// Use an Identity op to force a memcpy to CPU and back to GPU.
Node* i = test::graph::Identity(&graph, y);
i->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0");
Node* y_neg = test::graph::Unary(&graph, "Neg", i);
y_neg_ = y_neg->name();
y_neg->set_assigned_device_name(
"/job:localhost/replica:0/task:0/device:GPU:0");
test::graph::ToGraphDef(&graph, &def_);
}
protected:
void ExpectFailure(const Status& status, error::Code code) {
EXPECT_FALSE(status.ok()) << status.ToString();
if (!status.ok()) {
LOG(INFO) << "Status message: " << status.error_message();
EXPECT_EQ(code, status.code()) << status.ToString();
}
}
string x_;
string y_;
string y_neg_;
GraphDef def_;
};
TEST_F(DeviceTracerTest, StartStop) {
auto tracer = CreateDeviceTracer();
if (!tracer) return;
TF_EXPECT_OK(tracer->Start());
TF_EXPECT_OK(tracer->Stop());
}
TEST_F(DeviceTracerTest, StopBeforeStart) {
auto tracer = CreateDeviceTracer();
if (!tracer) return;
TF_EXPECT_OK(tracer->Stop());
TF_EXPECT_OK(tracer->Stop());
}
TEST_F(DeviceTracerTest, CollectBeforeStart) {
auto tracer = CreateDeviceTracer();
if (!tracer) return;
RunMetadata run_metadata;
TF_EXPECT_OK(tracer->CollectData(&run_metadata));
EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 0);
}
TEST_F(DeviceTracerTest, CollectBeforeStop) {
auto tracer = CreateDeviceTracer();
if (!tracer) return;
TF_EXPECT_OK(tracer->Start());
RunMetadata run_metadata;
Status status = tracer->CollectData(&run_metadata);
ExpectFailure(status, tensorflow::error::FAILED_PRECONDITION);
TF_EXPECT_OK(tracer->Stop());
}
TEST_F(DeviceTracerTest, StartTwoTracers) {
auto tracer1 = CreateDeviceTracer();
auto tracer2 = CreateDeviceTracer();
if (!tracer1 || !tracer2) return;
TF_EXPECT_OK(tracer1->Start());
Status status = tracer2->Start();
ExpectFailure(status, tensorflow::error::UNAVAILABLE);
TF_EXPECT_OK(tracer1->Stop());
TF_EXPECT_OK(tracer2->Start());
TF_EXPECT_OK(tracer2->Stop());
}
TEST_F(DeviceTracerTest, RunWithTracer) {
// On non-GPU platforms, we may not support DeviceTracer.
auto tracer = CreateDeviceTracer();
if (!tracer) return;
Initialize({3, 2, -1, 0});
auto session = CreateSession();
ASSERT_TRUE(session != nullptr);
TF_ASSERT_OK(session->Create(def_));
std::vector<std::pair<string, Tensor>> inputs;
// Request two targets: one fetch output and one non-fetched output.
std::vector<string> output_names = {y_ + ":0"};
std::vector<string> target_nodes = {y_neg_};
std::vector<Tensor> outputs;
TF_ASSERT_OK(tracer->Start());
Status s = session->Run(inputs, output_names, target_nodes, &outputs);
TF_ASSERT_OK(s);
TF_ASSERT_OK(tracer->Stop());
ASSERT_EQ(1, outputs.size());
// The first output should be initialized and have the correct
// output.
auto mat = outputs[0].matrix<float>();
ASSERT_TRUE(outputs[0].IsInitialized());
EXPECT_FLOAT_EQ(5.0, mat(0, 0));
}
TEST_F(DeviceTracerTest, TraceToStepStatsCollector) {
auto tracer = CreateDeviceTracer();
if (!tracer) return;
Initialize({3, 2, -1, 0});
auto session = CreateSession();
ASSERT_TRUE(session != nullptr);
TF_ASSERT_OK(session->Create(def_));
std::vector<std::pair<string, Tensor>> inputs;
// Request two targets: one fetch output and one non-fetched output.
std::vector<string> output_names = {y_ + ":0"};
std::vector<string> target_nodes = {y_neg_};
std::vector<Tensor> outputs;
TF_ASSERT_OK(tracer->Start());
Status s = session->Run(inputs, output_names, target_nodes, &outputs);
TF_ASSERT_OK(s);
TF_ASSERT_OK(tracer->Stop());
RunMetadata run_metadata;
TF_ASSERT_OK(tracer->CollectData(&run_metadata));
// Depending on whether this runs on CPU or GPU, we will have a
// different number of devices.
EXPECT_GE(run_metadata.step_stats().dev_stats_size(), 1)
<< "Saw stats: " << run_metadata.DebugString();
}
TEST_F(DeviceTracerTest, RunWithTraceOption) {
Initialize({3, 2, -1, 0});
auto session = CreateSession();
ASSERT_TRUE(session != nullptr);
TF_ASSERT_OK(session->Create(def_));
std::vector<std::pair<string, Tensor>> inputs;
// Request two targets: one fetch output and one non-fetched output.
std::vector<string> output_names = {y_ + ":0"};
std::vector<string> target_nodes = {y_neg_};
std::vector<Tensor> outputs;
// Prepares RunOptions and RunOutputs
RunOptions run_options;
run_options.set_trace_level(RunOptions::FULL_TRACE);
RunMetadata run_metadata;
Status s = session->Run(run_options, inputs, output_names, target_nodes,
&outputs, &run_metadata);
TF_ASSERT_OK(s);
ASSERT_TRUE(run_metadata.has_step_stats());
// Depending on whether this runs on CPU or GPU, we will have a
// different number of devices.
EXPECT_GE(run_metadata.step_stats().dev_stats_size(), 1);
}
} // namespace
} // namespace tensorflow