diff options
author | Håvard Pettersen <havardpe@yahooinc.com> | 2023-01-19 12:47:36 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@yahooinc.com> | 2023-01-20 10:17:52 +0000 |
commit | 3e7206c9f0ceb404edf378fb3d31a0ca12b6ad21 (patch) | |
tree | 93207ba5170071d0a1834df01a1bfbb706d4af2a | |
parent | f01c1607b1637eecb40a45c32b851ebeec539fe7 (diff) |
support flat profiling
3 files changed, 275 insertions, 123 deletions
diff --git a/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp b/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp index f41aaffd90d..969521a2a9e 100644 --- a/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp +++ b/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp @@ -73,7 +73,7 @@ TEST(ExecutionProfilerTest, resolve_names) { EXPECT_EQ(profiler.resolve("baz"), 2); } -TEST(ExecutionProfilerTest, empty_report) { +TEST(ExecutionProfilerTest, empty_tree_report) { Profiler profiler(64); profiler.resolve("foo"); profiler.resolve("bar"); @@ -81,6 +81,24 @@ TEST(ExecutionProfilerTest, empty_report) { Slime slime; profiler.report(slime.setObject()); fprintf(stderr, "%s\n", slime.toString().c_str()); + EXPECT_EQ(slime["profiler"].asString().make_string(), "tree"); + EXPECT_EQ(slime["depth"].asLong(), 64); + EXPECT_EQ(slime["total_time_ms"].asDouble(), 0.0); + EXPECT_EQ(slime["roots"].entries(), 0); + EXPECT_TRUE(find_path(slime, {})); +} + +TEST(ExecutionProfilerTest, empty_flat_report) { + Profiler profiler(-64); + profiler.resolve("foo"); + profiler.resolve("bar"); + profiler.resolve("baz"); + Slime slime; + profiler.report(slime.setObject()); + fprintf(stderr, "%s\n", slime.toString().c_str()); + EXPECT_EQ(slime["profiler"].asString().make_string(), "flat"); + EXPECT_EQ(slime["topn"].asLong(), 64); + EXPECT_EQ(slime["total_time_ms"].asDouble(), 0.0); EXPECT_EQ(slime["roots"].entries(), 0); EXPECT_TRUE(find_path(slime, {})); } @@ -96,6 +114,8 @@ TEST(ExecutionProfilerTest, perform_dummy_profiling) { Slime slime; profiler.report(slime.setObject()); fprintf(stderr, "%s\n", slime.toString().c_str()); + EXPECT_EQ(slime["profiler"].asString().make_string(), "tree"); + EXPECT_EQ(slime["depth"].asLong(), 64); EXPECT_EQ(slime["roots"].entries(), 4); EXPECT_TRUE(find_path(slime, {{"foo", 3}, {"bar", 3}, {"baz", 6}, {"fox", 18}})); EXPECT_TRUE(find_path(slime, {{"foo", 3}, {"bar", 3}, {"fox", 6}})); @@ -107,6 +127,42 @@ TEST(ExecutionProfilerTest, perform_dummy_profiling) { EXPECT_TRUE(find_path(slime, {{"fox", 3}})); } +TEST(ExecutionProfilerTest, perform_flat_dummy_profiling) { + Profiler profiler(-64); + for (int i = 0; i < 3; ++i) { + foo(profiler); + bar(profiler); + baz(profiler); + fox(profiler); + } + Slime slime; + profiler.report(slime.setObject()); + fprintf(stderr, "%s\n", slime.toString().c_str()); + EXPECT_EQ(slime["profiler"].asString().make_string(), "flat"); + EXPECT_EQ(slime["topn"].asLong(), 64); + EXPECT_EQ(slime["roots"].entries(), 4); + EXPECT_TRUE(find_path(slime, {{"foo", 3}})); + EXPECT_TRUE(find_path(slime, {{"bar", 6}})); + EXPECT_TRUE(find_path(slime, {{"baz", 18}})); + EXPECT_TRUE(find_path(slime, {{"fox", 72}})); +} + +TEST(ExecutionProfilerTest, perform_limited_flat_dummy_profiling) { + Profiler profiler(-2); + for (int i = 0; i < 3; ++i) { + foo(profiler); + bar(profiler); + baz(profiler); + fox(profiler); + } + Slime slime; + profiler.report(slime.setObject()); + fprintf(stderr, "%s\n", slime.toString().c_str()); + EXPECT_EQ(slime["profiler"].asString().make_string(), "flat"); + EXPECT_EQ(slime["topn"].asLong(), 2); + EXPECT_EQ(slime["roots"].entries(), 2); +} + TEST(ExecutionProfilerTest, perform_shallow_dummy_profiling) { Profiler profiler(2); for (int i = 0; i < 3; ++i) { @@ -118,6 +174,8 @@ TEST(ExecutionProfilerTest, perform_shallow_dummy_profiling) { Slime slime; profiler.report(slime.setObject()); fprintf(stderr, "%s\n", slime.toString().c_str()); + EXPECT_EQ(slime["profiler"].asString().make_string(), "tree"); + EXPECT_EQ(slime["depth"].asLong(), 2); EXPECT_EQ(slime["roots"].entries(), 4); EXPECT_TRUE(find_path(slime, {{"foo", 3}, {"bar", 3}})); EXPECT_TRUE(find_path(slime, {{"foo", 3}, {"bar", 3}})); diff --git a/vespalib/src/vespa/vespalib/util/execution_profiler.cpp b/vespalib/src/vespa/vespalib/util/execution_profiler.cpp index 3c56901adb7..6c5af6cbe41 100644 --- a/vespalib/src/vespa/vespalib/util/execution_profiler.cpp +++ b/vespalib/src/vespa/vespalib/util/execution_profiler.cpp @@ -2,113 +2,231 @@ #include "execution_profiler.h" #include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/data/slime/slime.h> #include <cassert> namespace vespalib { -namespace { - -double as_ms(duration d) { - return (count_ns(d) / 1000000.0); -} - -} - struct ExecutionProfiler::ReportContext { + const ExecutionProfiler &profiler; const ExecutionProfiler::NameMapper &name_mapper; - vespalib::hash_map<vespalib::string,vespalib::string> name_cache; - ReportContext(const ExecutionProfiler::NameMapper &name_mapper_in, size_t num_names) - : name_mapper(name_mapper_in), name_cache(num_names * 2) {} - const vespalib::string &resolve_name(const vespalib::string &name) { - auto pos = name_cache.find(name); + vespalib::hash_map<TaskId,vespalib::string> name_cache; + ReportContext(const ExecutionProfiler &profiler_in, + const ExecutionProfiler::NameMapper &name_mapper_in, size_t num_names) + : profiler(profiler_in), name_mapper(name_mapper_in), name_cache(num_names * 2) {} + size_t get_max_depth() const { return profiler._max_depth; } + const vespalib::string &resolve_name(TaskId task) { + auto pos = name_cache.find(task); if (pos == name_cache.end()) { - pos = name_cache.insert(std::make_pair(name, name_mapper(name))).first; + pos = name_cache.insert(std::make_pair(task, name_mapper(profiler.name_of(task)))).first; } return pos->second; } }; -duration -ExecutionProfiler::get_children_time(const Edges &edges) const -{ - duration result = duration::zero(); - for (const auto &entry: edges) { - result += _nodes[entry.second].total_time; - } - return result; -} +namespace { -std::vector<uint32_t> -ExecutionProfiler::get_sorted_children(const Edges &edges) const -{ - std::vector<uint32_t> children; - for (const auto &entry: edges) { - children.push_back(entry.second); - } - std::sort(children.begin(), children.end(), - [&](const auto &a, const auto &b) { - return (_nodes[a].total_time > _nodes[b].total_time); - }); - return children; +double as_ms(duration d) { + return (count_ns(d) / 1000000.0); } -void -ExecutionProfiler::render_node(slime::Cursor &obj, NodeId node, ReportContext &ctx) const +class TreeProfiler : public ExecutionProfiler::Impl { - obj.setString("name", ctx.resolve_name(_names[_nodes[node].task])); - obj.setLong("count", _nodes[node].count); - obj.setDouble("total_time_ms", as_ms(_nodes[node].total_time)); - if (!_nodes[node].children.empty()) { - auto children_time = get_children_time(_nodes[node].children); - obj.setDouble("self_time_ms", as_ms(_nodes[node].total_time - children_time)); - render_children(obj.setArray("children"), _nodes[node].children, ctx); +private: + using ReportContext = ExecutionProfiler::ReportContext; + using TaskId = ExecutionProfiler::TaskId; + using NodeId = uint32_t; + using Edges = vespalib::hash_map<TaskId, NodeId>; + struct Node { + TaskId task; + size_t count; + duration total_time; + Edges children; + Node(TaskId task_in) noexcept + : task(task_in), + count(0), + total_time(), + children() {} + }; + struct Frame { + NodeId node; + steady_time start; + Frame(NodeId node_in) noexcept + : node(node_in), start(steady_clock::now()) {} + }; + + std::vector<Node> _nodes; + Edges _roots; + std::vector<Frame> _state; + + duration get_children_time(const Edges &edges) const { + duration result = duration::zero(); + for (const auto &entry: edges) { + result += _nodes[entry.second].total_time; + } + return result; } -} - -void -ExecutionProfiler::render_children(slime::Cursor &arr, const Edges &edges, ReportContext &ctx) const -{ - auto children = get_sorted_children(edges); - for (NodeId child: children) { - render_node(arr.addObject(), child, ctx); + std::vector<NodeId> get_sorted_children(const Edges &edges) const { + std::vector<uint32_t> children; + for (const auto &entry: edges) { + children.push_back(entry.second); + } + std::sort(children.begin(), children.end(), + [&](const auto &a, const auto &b) { + return (_nodes[a].total_time > _nodes[b].total_time); + }); + return children; } -} + void render_node(slime::Cursor &obj, NodeId node, ReportContext &ctx) const { + obj.setString("name", ctx.resolve_name(_nodes[node].task)); + obj.setLong("count", _nodes[node].count); + obj.setDouble("total_time_ms", as_ms(_nodes[node].total_time)); + if (!_nodes[node].children.empty()) { + auto children_time = get_children_time(_nodes[node].children); + obj.setDouble("self_time_ms", as_ms(_nodes[node].total_time - children_time)); + render_children(obj.setArray("children"), _nodes[node].children, ctx); + } + } + void render_children(slime::Cursor &arr, const Edges &edges, ReportContext &ctx) const { + auto children = get_sorted_children(edges); + for (NodeId child: children) { + render_node(arr.addObject(), child, ctx); + } + } +public: + TreeProfiler() : _nodes(), _roots(), _state() {} + void track_start(TaskId task) override { + auto &edges = _state.empty() ? _roots : _nodes[_state.back().node].children; + auto [pos, was_new] = edges.insert(std::make_pair(task, _nodes.size())); + NodeId node = pos->second; // extending _nodes might invalidate lookup result + if (was_new) { + assert(node == _nodes.size()); + _nodes.emplace_back(task); + } + assert(node < _nodes.size()); + _state.emplace_back(node); + } + void track_complete() override { + assert(!_state.empty()); + auto &node = _nodes[_state.back().node]; + auto elapsed = steady_clock::now() - _state.back().start; + ++node.count; + node.total_time += elapsed; + _state.pop_back(); + } + void report(slime::Cursor &obj, ReportContext &ctx) const override { + obj.setString("profiler", "tree"); + obj.setLong("depth", ctx.get_max_depth()); + obj.setDouble("total_time_ms", as_ms(get_children_time(_roots))); + if (!_roots.empty()) { + render_children(obj.setArray("roots"), _roots, ctx); + } + } +}; -void -ExecutionProfiler::track_start(TaskId task) +class FlatProfiler : public ExecutionProfiler::Impl { - auto &edges = _state.empty() ? _roots : _nodes[_state.back().node].children; - auto [pos, was_new] = edges.insert(std::make_pair(task, _nodes.size())); - NodeId node = pos->second; // extending _nodes might invalidate lookup result - if (was_new) { - assert(node == _nodes.size()); - _nodes.emplace_back(task); +private: + using ReportContext = ExecutionProfiler::ReportContext; + using TaskId = ExecutionProfiler::TaskId; + struct Node { + size_t count; + duration self_time; + Node() noexcept + : count(0), + self_time() {} + }; + struct Frame { + TaskId task; + steady_time start; + duration overlap; + Frame(TaskId task_in) noexcept + : task(task_in), start(steady_clock::now()), overlap() {} + }; + + size_t _topn; + std::vector<Node> _nodes; + std::vector<Frame> _state; + + duration get_total_time() const { + duration result = duration::zero(); + for (const auto &node: _nodes) { + result += node.self_time; + } + return result; } - assert(node < _nodes.size()); - _state.emplace_back(node); -} + std::vector<uint32_t> get_sorted_nodes() const { + std::vector<uint32_t> nodes; + nodes.reserve(_nodes.size()); + for (uint32_t i = 0; i < _nodes.size(); ++i) { + nodes.push_back(i); + } + std::sort(nodes.begin(), nodes.end(), + [&](const auto &a, const auto &b) { + return (_nodes[a].self_time > _nodes[b].self_time); + }); + return nodes; + } + void render_node(slime::Cursor &obj, uint32_t node, ReportContext &ctx) const { + obj.setString("name", ctx.resolve_name(node)); + obj.setLong("count", _nodes[node].count); + obj.setDouble("self_time_ms", as_ms(_nodes[node].self_time)); + } +public: + FlatProfiler(size_t topn) : _topn(topn), _nodes(), _state() { + _nodes.reserve(256); + _state.reserve(64); + } + void track_start(TaskId task) override { + if (task >= _nodes.size()) { + _nodes.resize(task + 1); + } + _state.emplace_back(task); + } + void track_complete() override { + assert(!_state.empty()); + auto &state = _state.back(); + auto &node = _nodes[state.task]; + auto elapsed = steady_clock::now() - state.start; + ++node.count; + node.self_time += (elapsed - state.overlap); + _state.pop_back(); + if (!_state.empty()) { + _state.back().overlap += elapsed; + } + } + void report(slime::Cursor &obj, ReportContext &ctx) const override { + obj.setString("profiler", "flat"); + obj.setLong("topn", _topn); + obj.setDouble("total_time_ms", as_ms(get_total_time())); + auto list = get_sorted_nodes(); + if (auto limit = std::min(list.size(), _topn); limit > 0) { + auto &arr = obj.setArray("roots"); + for (uint32_t i = 0; i < limit; ++i) { + render_node(arr.addObject(), list[i], ctx); + } + } + } +}; -void -ExecutionProfiler::track_complete() -{ - assert(!_state.empty()); - auto &node = _nodes[_state.back().node]; - auto elapsed = steady_clock::now() - _state.back().start; - ++node.count; - node.total_time += elapsed; - _state.pop_back(); } -ExecutionProfiler::ExecutionProfiler(size_t max_depth) - : _max_depth(max_depth), +ExecutionProfiler::ExecutionProfiler(int32_t profile_depth) + : _level(0), + _max_depth(), _names(), _name_map(), - _nodes(), - _roots(), - _state(), - _level(0) + _impl() { + if (profile_depth < 0) { + _max_depth = -1; + size_t topn = -profile_depth; + _impl = std::make_unique<FlatProfiler>(topn); + } else { + _max_depth = profile_depth; + _impl = std::make_unique<TreeProfiler>(); + } } ExecutionProfiler::~ExecutionProfiler() = default; @@ -128,11 +246,8 @@ ExecutionProfiler::resolve(const vespalib::string &name) void ExecutionProfiler::report(slime::Cursor &obj, const NameMapper &name_mapper) const { - ReportContext ctx(name_mapper, std::min(_names.size(), _nodes.size())); - obj.setDouble("total_time_ms", as_ms(get_children_time(_roots))); - if (!_roots.empty()) { - render_children(obj.setArray("roots"), _roots, ctx); - } + ReportContext ctx(*this, name_mapper, _names.size()); + _impl->report(obj, ctx); } } diff --git a/vespalib/src/vespa/vespalib/util/execution_profiler.h b/vespalib/src/vespa/vespalib/util/execution_profiler.h index b5060e9a3c2..d69bd5ffa1a 100644 --- a/vespalib/src/vespa/vespalib/util/execution_profiler.h +++ b/vespalib/src/vespa/vespalib/util/execution_profiler.h @@ -25,60 +25,39 @@ namespace slime { struct Cursor; } class ExecutionProfiler { public: using TaskId = uint32_t; - using NameMapper = std::function<vespalib::string(const vespalib::string &)>; -private: - using NodeId = uint32_t; - using Edges = vespalib::hash_map<TaskId, NodeId>; - struct Node { - TaskId task; - size_t count; - duration total_time; - Edges children; - Node(TaskId task_in) noexcept - : task(task_in), - count(0), - total_time(), - children() {} - }; - struct Frame { - NodeId node; - steady_time start; - Frame(NodeId node_in) noexcept - : node(node_in), start(steady_clock::now()) {} - }; struct ReportContext; + struct Impl { + virtual ~Impl() = default; + virtual void track_start(TaskId task) = 0; + virtual void track_complete() = 0; + virtual void report(slime::Cursor &obj, ReportContext &ctx) const = 0; + }; + using NameMapper = std::function<vespalib::string(const vespalib::string &)>; +private: + size_t _level; size_t _max_depth; std::vector<vespalib::string> _names; vespalib::hash_map<vespalib::string,size_t> _name_map; - std::vector<Node> _nodes; - Edges _roots; - std::vector<Frame> _state; - size_t _level; - - duration get_children_time(const Edges &edges) const; - std::vector<NodeId> get_sorted_children(const Edges &edges) const; - void render_node(slime::Cursor &obj, NodeId node, ReportContext &ctx) const; - void render_children(slime::Cursor &arr, const Edges &edges, ReportContext &ctx) const; - - void track_start(TaskId task); - void track_complete(); - + std::unique_ptr<Impl> _impl; + public: - ExecutionProfiler(size_t max_depth); + ExecutionProfiler(int32_t profile_depth); ~ExecutionProfiler(); TaskId resolve(const vespalib::string &name); + const vespalib::string &name_of(TaskId task) const { return _names[task]; } void start(TaskId task) { if (++_level <= _max_depth) { - track_start(task); + _impl->track_start(task); } } void complete() { if (--_level < _max_depth) { - track_complete(); + _impl->track_complete(); } } - void report(slime::Cursor &obj, const NameMapper & = [](const vespalib::string &name) noexcept { return name; }) const; + void report(slime::Cursor &obj, const NameMapper &name_mapper = + [](const vespalib::string &name) noexcept { return name; }) const; }; } |