blob: 0405ac77bfad98f9e8cb630fa99573d89c21d8ad [file] [log] [blame]
/* Copyright 2020 Google LLC. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <chrono>
#include <random>
#include <thread>
#include "ruy/gtest_wrapper.h"
#include "ruy/profiler/profiler.h"
#include "ruy/profiler/test_instrumented_library.h"
#include "ruy/profiler/treeview.h"
namespace ruy {
namespace profiler {
namespace {
void DoSomeMergeSort(int size) {
std::vector<int> data(size);
std::default_random_engine engine;
for (auto& val : data) {
val = engine();
}
MergeSort(size, data.data());
}
// The purpose of this basic test is to cover the basic path that will be taken
// by a majority of users, not inspecting treeviews but just implicitly printing
// them on stdout, and to have this test enabled even when RUY_PROFILER is not
// defined, so that we have coverage for the non-RUY_PROFILER case.
TEST(ProfilerTest, MergeSortSingleThreadBasicTestEvenWithoutProfiler) {
{
ScopeProfile profile;
DoSomeMergeSort(1 << 20);
}
}
#ifdef RUY_PROFILER
TEST(ProfilerTest, MergeSortSingleThread) {
TreeView treeview;
{
ScopeProfile profile;
profile.SetUserTreeView(&treeview);
DoSomeMergeSort(1 << 20);
}
Print(treeview);
EXPECT_EQ(treeview.thread_roots().size(), 1);
const auto& thread_root = *treeview.thread_roots().begin()->second;
EXPECT_EQ(DepthOfTreeBelow(thread_root), 22);
EXPECT_GE(
WeightBelowNodeMatchingUnformatted(thread_root, "Merging sorted halves"),
0.1 * thread_root.weight);
EXPECT_GE(WeightBelowNodeMatchingFormatted(
thread_root, "MergeSortRecurse (level=20, size=1)"),
0.01 * thread_root.weight);
TreeView treeview_collapsed;
CollapseNodesMatchingUnformatted(treeview, 5, "MergeSort (size=%d)",
&treeview_collapsed);
Print(treeview_collapsed);
const auto& collapsed_thread_root =
*treeview_collapsed.thread_roots().begin()->second;
EXPECT_EQ(DepthOfTreeBelow(collapsed_thread_root), 6);
EXPECT_EQ(
WeightBelowNodeMatchingUnformatted(thread_root, "MergeSort (size=%d)"),
WeightBelowNodeMatchingUnformatted(collapsed_thread_root,
"MergeSort (size=%d)"));
}
TEST(ProfilerTest, MemcpyFourThreads) {
TreeView treeview;
{
ScopeProfile profile;
profile.SetUserTreeView(&treeview);
std::vector<std::unique_ptr<std::thread>> threads;
for (int i = 0; i < 4; i++) {
threads.emplace_back(new std::thread([i]() {
ScopeLabel thread_label("worker thread #%d", i);
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
ScopeLabel some_more_work_label("some more work");
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
}));
}
for (int i = 0; i < 4; i++) {
threads[i]->join();
}
}
Print(treeview);
// Since we cleared GlobalAllThreadStacks and the current thread hasn't
// created any ScopeLabel, only the 4 worker threads should be recorded.
EXPECT_EQ(treeview.thread_roots().size(), 4);
for (const auto& thread_root : treeview.thread_roots()) {
const TreeView::Node& root_node = *thread_root.second;
// The root node may have 1 or 2 children depending on whether there is
// an "[other]" child.
EXPECT_GE(root_node.children.size(), 1);
EXPECT_LE(root_node.children.size(), 2);
const TreeView::Node& child_node = *root_node.children[0];
EXPECT_EQ(child_node.label.format(), "worker thread #%d");
// There must be 2 children, since roughly half the time will be in
// "some more work" leaving the other half in "[other]".
EXPECT_EQ(child_node.children.size(), 2);
const TreeView::Node& child_child_node = *child_node.children[0];
// Since we sample every millisecond and the threads run for >= 2000
// milliseconds, the "thread func" label should get roughly 2000 samples.
// Not very rigorous, as we're depending on the profiler thread getting
// scheduled, so to avoid this test being flaky, we use a much more
// conservative value of 500, one quarter of that normal value 2000.
EXPECT_GE(child_node.weight, 500);
// Likewise, allow up to four times more than the normal value 2000.
EXPECT_LE(child_node.weight, 8000);
// Roughly half of time should be spent under the "some more work" label.
float some_more_work_percentage =
100.f * child_child_node.weight / child_node.weight;
EXPECT_GE(some_more_work_percentage, 40.0f);
EXPECT_LE(some_more_work_percentage, 60.0f);
}
}
TEST(ProfilerTest, OneThreadAfterAnother) {
TreeView treeview;
{
ScopeProfile profile;
profile.SetUserTreeView(&treeview);
{
std::thread thread([]() {
ScopeLabel thread_label("thread 0");
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
});
thread.join();
}
{
std::thread thread([]() {
ScopeLabel thread_label("thread 1");
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
});
thread.join();
}
}
Print(treeview);
EXPECT_EQ(treeview.thread_roots().size(), 2);
}
#endif // RUY_PROFILER
} // namespace
} // namespace profiler
} // namespace ruy
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}