diff options
-rw-r--r-- | vespalib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | vespalib/src/tests/execution_profiler/CMakeLists.txt | 9 | ||||
-rw-r--r-- | vespalib/src/tests/execution_profiler/execution_profiler_test.cpp | 151 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/CMakeLists.txt | 9 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/execution_profiler.cpp | 122 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/util/execution_profiler.h | 81 |
6 files changed, 369 insertions, 4 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index 609c825dafa..b2ce3011296 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -70,6 +70,7 @@ vespa_define_module( src/tests/encoding/base64 src/tests/eventbarrier src/tests/exception_classes + src/tests/execution_profiler src/tests/executor src/tests/executor_idle_tracking src/tests/explore_modern_cpp diff --git a/vespalib/src/tests/execution_profiler/CMakeLists.txt b/vespalib/src/tests/execution_profiler/CMakeLists.txt new file mode 100644 index 00000000000..e303762a110 --- /dev/null +++ b/vespalib/src/tests/execution_profiler/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_execution_profiler_test_app TEST + SOURCES + execution_profiler_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_execution_profiler_test_app COMMAND vespalib_execution_profiler_test_app) diff --git a/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp b/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp new file mode 100644 index 00000000000..565b8572201 --- /dev/null +++ b/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp @@ -0,0 +1,151 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/util/execution_profiler.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <thread> + +using Profiler = vespalib::ExecutionProfiler; +using vespalib::Slime; +using vespalib::slime::Cursor; +using vespalib::slime::Inspector; + +void fox(Profiler &profiler) { + profiler.start(profiler.resolve("fox")); + std::this_thread::sleep_for(1ms); + profiler.complete(); +} + +void baz(Profiler &profiler) { + profiler.start(profiler.resolve("baz")); + fox(profiler); + fox(profiler); + fox(profiler); + profiler.complete(); +} + +void bar(Profiler &profiler) { + profiler.start(profiler.resolve("bar")); + baz(profiler); + fox(profiler); + baz(profiler); + fox(profiler); + profiler.complete(); +} + +void foo(Profiler &profiler) { + profiler.start(profiler.resolve("foo")); + bar(profiler); + baz(profiler); + fox(profiler); + profiler.complete(); +} + +const Inspector *find_child(const Inspector &arr, const std::pair<vespalib::string,size_t> &entry) { + struct MyArrayTraverser : public vespalib::slime::ArrayTraverser { + const Inspector *node = nullptr; + const std::pair<vespalib::string,size_t> &needle; + MyArrayTraverser(const std::pair<vespalib::string,size_t> &needle_in) : needle(needle_in) {} + void entry(size_t, const Inspector &obj) override { + if ((obj["name"].asString().make_string() == needle.first) && + (obj["count"].asLong() == int64_t(needle.second))) + { + assert(node == nullptr); + node = &obj; + } + } + }; + MyArrayTraverser traverser(entry); + arr.traverse(traverser); + return traverser.node; +} + +bool verify_path(const Inspector &root, const std::vector<std::pair<vespalib::string,size_t>> &path) { + const Inspector *pos = &root; + bool first = true; + for (const auto &entry: path) { + if (first) { + pos = find_child((*pos)["roots"], entry); + } else { + pos = find_child((*pos)["children"], entry); + } + first = false; + if (pos == nullptr) { + fprintf(stderr, "could not find entry [%s, %zu]\n", entry.first.c_str(), entry.second); + return false; + } + } + if ((*pos)["roots"].valid() || (*pos)["children"].valid()) { + fprintf(stderr, "path too shallow\n"); + return false; + } + return true; +} + +TEST(ExecutionProfilerTest, resolve_names) { + Profiler profiler(64); + EXPECT_EQ(profiler.resolve("foo"), 0); + EXPECT_EQ(profiler.resolve("bar"), 1); + EXPECT_EQ(profiler.resolve("baz"), 2); + EXPECT_EQ(profiler.resolve("foo"), 0); + EXPECT_EQ(profiler.resolve("bar"), 1); + EXPECT_EQ(profiler.resolve("baz"), 2); +} + +TEST(ExecutionProfilerTest, empty_report) { + Profiler profiler(64); + profiler.resolve("foo"); + profiler.resolve("bar"); + profiler.resolve("baz"); + Slime slime; + profiler.report(slime.setObject()); + fprintf(stderr, "%s\n", slime.toString().c_str()); + EXPECT_EQ(slime["roots"].entries(), 0); + EXPECT_TRUE(verify_path(slime.get(), {})); +} + +TEST(ExecutionProfilerTest, perform_dummy_profiling) { + Profiler profiler(64); + for (int i = 0; i < 3; ++i) { + foo(profiler); + bar(profiler); + baz(profiler); + fox(profiler); + } + Slime slime; + profiler.report(slime.setObject()); + fprintf(stderr, "%s\n", slime.toString().c_str()); + EXPECT_EQ(slime["roots"].entries(), 4); + EXPECT_TRUE(verify_path(slime.get(), {{"foo", 3}, {"bar", 3}, {"baz", 6}, {"fox", 18}})); + EXPECT_TRUE(verify_path(slime.get(), {{"foo", 3}, {"bar", 3}, {"fox", 6}})); + EXPECT_TRUE(verify_path(slime.get(), {{"foo", 3}, {"baz", 3}, {"fox", 9}})); + EXPECT_TRUE(verify_path(slime.get(), {{"foo", 3}, {"fox", 3}})); + EXPECT_TRUE(verify_path(slime.get(), {{"bar", 3}, {"baz", 6}, {"fox", 18}})); + EXPECT_TRUE(verify_path(slime.get(), {{"bar", 3}, {"fox", 6}})); + EXPECT_TRUE(verify_path(slime.get(), {{"baz", 3}, {"fox", 9}})); + EXPECT_TRUE(verify_path(slime.get(), {{"fox", 3}})); +} + +TEST(ExecutionProfilerTest, perform_shallow_dummy_profiling) { + Profiler profiler(2); + for (int i = 0; i < 3; ++i) { + foo(profiler); + bar(profiler); + baz(profiler); + fox(profiler); + } + Slime slime; + profiler.report(slime.setObject()); + fprintf(stderr, "%s\n", slime.toString().c_str()); + EXPECT_EQ(slime["roots"].entries(), 4); + EXPECT_TRUE(verify_path(slime.get(), {{"foo", 3}, {"bar", 3}})); + EXPECT_TRUE(verify_path(slime.get(), {{"foo", 3}, {"bar", 3}})); + EXPECT_TRUE(verify_path(slime.get(), {{"foo", 3}, {"baz", 3}})); + EXPECT_TRUE(verify_path(slime.get(), {{"foo", 3}, {"fox", 3}})); + EXPECT_TRUE(verify_path(slime.get(), {{"bar", 3}, {"baz", 6}})); + EXPECT_TRUE(verify_path(slime.get(), {{"bar", 3}, {"fox", 6}})); + EXPECT_TRUE(verify_path(slime.get(), {{"baz", 3}, {"fox", 9}})); + EXPECT_TRUE(verify_path(slime.get(), {{"fox", 3}})); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt index 59c05dffbc6..05682337982 100644 --- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt @@ -25,12 +25,13 @@ vespa_add_library(vespalib_vespalib_util OBJECT cpu_usage.cpp crc.cpp destructor_callbacks.cpp - doom.cpp document_runnable.cpp + doom.cpp dual_merge_director.cpp error.cpp exception.cpp exceptions.cpp + execution_profiler.cpp executor_idle_tracking.cpp file_area_freelist.cpp foregroundtaskexecutor.cpp @@ -41,13 +42,13 @@ vespa_add_library(vespalib_vespalib_util OBJECT growablebytebuffer.cpp hdr_abort.cpp host_name.cpp - jsonexception.cpp - jsonstream.cpp - jsonwriter.cpp invokeserviceimpl.cpp isequencedtaskexecutor.cpp issue.cpp joinable.cpp + jsonexception.cpp + jsonstream.cpp + jsonwriter.cpp latch.cpp left_right_heap.cpp lz4compressor.cpp diff --git a/vespalib/src/vespa/vespalib/util/execution_profiler.cpp b/vespalib/src/vespa/vespalib/util/execution_profiler.cpp new file mode 100644 index 00000000000..3a0e4a77ce3 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/execution_profiler.cpp @@ -0,0 +1,122 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "execution_profiler.h" +#include <vespa/vespalib/data/slime/slime.h> +#include <cassert> + +namespace vespalib { + +namespace { + +double as_ms(duration d) { + return (count_ns(d) / 1000000.0); +} + +} + +duration +ExecutionProfiler::get_children_time(const Edges &edges) const +{ + duration result = duration::zero(); + for (const auto &entry: edges) { + result += _nodes[entry.second].total_time; + } + return result; +} + +std::vector<uint32_t> +ExecutionProfiler::get_sorted_children(const Edges &edges) const +{ + std::vector<uint32_t> children; + for (const auto &entry: edges) { + children.push_back(entry.second); + } + std::sort(children.begin(), children.end(), + [&](const auto &a, const auto &b) { + return (_nodes[a].total_time > _nodes[b].total_time); + }); + return children; +} + +void +ExecutionProfiler::render_node(slime::Cursor &obj, NodeId node) const +{ + obj.setString("name", _names[_nodes[node].task]); + obj.setLong("count", _nodes[node].count); + obj.setDouble("total_time_ms", as_ms(_nodes[node].total_time)); + if (!_nodes[node].children.empty()) { + auto children_time = get_children_time(_nodes[node].children); + obj.setDouble("self_time_ms", as_ms(_nodes[node].total_time - children_time)); + render_children(obj.setArray("children"), _nodes[node].children); + } +} + +void +ExecutionProfiler::render_children(slime::Cursor &arr, const Edges &edges) const +{ + auto children = get_sorted_children(edges); + for (NodeId child: children) { + render_node(arr.addObject(), child); + } +} + +void +ExecutionProfiler::track_start(TaskId task) +{ + auto &edges = _state.empty() ? _roots : _nodes[_state.back().node].children; + auto [pos, was_new] = edges.insert(std::make_pair(task, _nodes.size())); + NodeId node = pos->second; // extending _nodes might invalidate lookup result + if (was_new) { + assert(node == _nodes.size()); + _nodes.emplace_back(task); + } + assert(node < _nodes.size()); + _state.emplace_back(node); +} + +void +ExecutionProfiler::track_complete() +{ + assert(!_state.empty()); + auto &node = _nodes[_state.back().node]; + auto elapsed = steady_clock::now() - _state.back().start; + ++node.count; + node.total_time += elapsed; + _state.pop_back(); +} + +ExecutionProfiler::ExecutionProfiler(size_t max_depth) + : _max_depth(max_depth), + _names(), + _name_map(), + _nodes(), + _roots(), + _state(), + _level(0) +{ +} + +ExecutionProfiler::~ExecutionProfiler() = default; + +ExecutionProfiler::TaskId +ExecutionProfiler::resolve(const vespalib::string &name) +{ + auto [pos, was_new] = _name_map.insert(std::make_pair(name, _names.size())); + if (was_new) { + assert(pos->second == _names.size()); + _names.push_back(name); + } + assert(pos->second < _names.size()); + return pos->second; +} + +void +ExecutionProfiler::report(slime::Cursor &obj) const +{ + obj.setDouble("total_time_ms", as_ms(get_children_time(_roots))); + if (!_roots.empty()) { + render_children(obj.setArray("roots"), _roots); + } +} + +} diff --git a/vespalib/src/vespa/vespalib/util/execution_profiler.h b/vespalib/src/vespa/vespalib/util/execution_profiler.h new file mode 100644 index 00000000000..89f39631b4c --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/execution_profiler.h @@ -0,0 +1,81 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "time.h" + +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/stllike/hash_map.h> + +namespace vespalib { + +namespace slime { class Cursor; } + +/** + * A simple single-threaded profiler used to measure where time is + * spent when executing tasks that may depend on each other (doing one + * task includes doing another task; like one function calls another + * function). Each task is identified by a unique name. Data is + * collected in real-time using signals about when a task is started + * and when it completes. Any sub-task must complete before any parent + * task. Any task may be executed any number of times and may depend + * on any other task. + **/ +class ExecutionProfiler { +public: + using TaskId = uint32_t; +private: + using NodeId = uint32_t; + using Edges = vespalib::hash_map<TaskId, NodeId>; + struct Node { + TaskId task; + size_t count; + duration total_time; + Edges children; + Node(TaskId task_in) noexcept + : task(task_in), + count(0), + total_time(), + children() {} + }; + struct Frame { + NodeId node; + steady_time start; + Frame(NodeId node_in) noexcept + : node(node_in), start(steady_clock::now()) {} + }; + + size_t _max_depth; + std::vector<vespalib::string> _names; + vespalib::hash_map<vespalib::string,size_t> _name_map; + std::vector<Node> _nodes; + Edges _roots; + std::vector<Frame> _state; + size_t _level; + + duration get_children_time(const Edges &edges) const; + std::vector<NodeId> get_sorted_children(const Edges &edges) const; + void render_node(slime::Cursor &obj, NodeId node) const; + void render_children(slime::Cursor &arr, const Edges &edges) const; + + void track_start(TaskId task); + void track_complete(); + +public: + ExecutionProfiler(size_t max_depth); + ~ExecutionProfiler(); + TaskId resolve(const vespalib::string &name); + void start(TaskId task) { + if (++_level <= _max_depth) { + track_start(task); + } + } + void complete() { + if (--_level < _max_depth) { + track_complete(); + } + } + void report(slime::Cursor &obj) const; +}; + +} |