summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-01-20 14:13:37 +0100
committerGitHub <noreply@github.com>2023-01-20 14:13:37 +0100
commit286b29be89ed6431acca9a74f057af0a412dbd3e (patch)
tree48caa2825c4e6e6022b8279a4fe219e0feeeff59 /vespalib
parent0bd21f594e95e2b3d6123f554aab80fab1f54917 (diff)
parent3e7206c9f0ceb404edf378fb3d31a0ca12b6ad21 (diff)
Merge pull request #25649 from vespa-engine/havardpe/flat-profiling
support flat profiling
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/tests/execution_profiler/execution_profiler_test.cpp60
-rw-r--r--vespalib/src/vespa/vespalib/util/execution_profiler.cpp283
-rw-r--r--vespalib/src/vespa/vespalib/util/execution_profiler.h55
3 files changed, 275 insertions, 123 deletions
diff --git a/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp b/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp
index f41aaffd90d..969521a2a9e 100644
--- a/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp
+++ b/vespalib/src/tests/execution_profiler/execution_profiler_test.cpp
@@ -73,7 +73,7 @@ TEST(ExecutionProfilerTest, resolve_names) {
EXPECT_EQ(profiler.resolve("baz"), 2);
}
-TEST(ExecutionProfilerTest, empty_report) {
+TEST(ExecutionProfilerTest, empty_tree_report) {
Profiler profiler(64);
profiler.resolve("foo");
profiler.resolve("bar");
@@ -81,6 +81,24 @@ TEST(ExecutionProfilerTest, empty_report) {
Slime slime;
profiler.report(slime.setObject());
fprintf(stderr, "%s\n", slime.toString().c_str());
+ EXPECT_EQ(slime["profiler"].asString().make_string(), "tree");
+ EXPECT_EQ(slime["depth"].asLong(), 64);
+ EXPECT_EQ(slime["total_time_ms"].asDouble(), 0.0);
+ EXPECT_EQ(slime["roots"].entries(), 0);
+ EXPECT_TRUE(find_path(slime, {}));
+}
+
+TEST(ExecutionProfilerTest, empty_flat_report) {
+ Profiler profiler(-64);
+ profiler.resolve("foo");
+ profiler.resolve("bar");
+ profiler.resolve("baz");
+ Slime slime;
+ profiler.report(slime.setObject());
+ fprintf(stderr, "%s\n", slime.toString().c_str());
+ EXPECT_EQ(slime["profiler"].asString().make_string(), "flat");
+ EXPECT_EQ(slime["topn"].asLong(), 64);
+ EXPECT_EQ(slime["total_time_ms"].asDouble(), 0.0);
EXPECT_EQ(slime["roots"].entries(), 0);
EXPECT_TRUE(find_path(slime, {}));
}
@@ -96,6 +114,8 @@ TEST(ExecutionProfilerTest, perform_dummy_profiling) {
Slime slime;
profiler.report(slime.setObject());
fprintf(stderr, "%s\n", slime.toString().c_str());
+ EXPECT_EQ(slime["profiler"].asString().make_string(), "tree");
+ EXPECT_EQ(slime["depth"].asLong(), 64);
EXPECT_EQ(slime["roots"].entries(), 4);
EXPECT_TRUE(find_path(slime, {{"foo", 3}, {"bar", 3}, {"baz", 6}, {"fox", 18}}));
EXPECT_TRUE(find_path(slime, {{"foo", 3}, {"bar", 3}, {"fox", 6}}));
@@ -107,6 +127,42 @@ TEST(ExecutionProfilerTest, perform_dummy_profiling) {
EXPECT_TRUE(find_path(slime, {{"fox", 3}}));
}
+TEST(ExecutionProfilerTest, perform_flat_dummy_profiling) {
+ Profiler profiler(-64);
+ for (int i = 0; i < 3; ++i) {
+ foo(profiler);
+ bar(profiler);
+ baz(profiler);
+ fox(profiler);
+ }
+ Slime slime;
+ profiler.report(slime.setObject());
+ fprintf(stderr, "%s\n", slime.toString().c_str());
+ EXPECT_EQ(slime["profiler"].asString().make_string(), "flat");
+ EXPECT_EQ(slime["topn"].asLong(), 64);
+ EXPECT_EQ(slime["roots"].entries(), 4);
+ EXPECT_TRUE(find_path(slime, {{"foo", 3}}));
+ EXPECT_TRUE(find_path(slime, {{"bar", 6}}));
+ EXPECT_TRUE(find_path(slime, {{"baz", 18}}));
+ EXPECT_TRUE(find_path(slime, {{"fox", 72}}));
+}
+
+TEST(ExecutionProfilerTest, perform_limited_flat_dummy_profiling) {
+ Profiler profiler(-2);
+ for (int i = 0; i < 3; ++i) {
+ foo(profiler);
+ bar(profiler);
+ baz(profiler);
+ fox(profiler);
+ }
+ Slime slime;
+ profiler.report(slime.setObject());
+ fprintf(stderr, "%s\n", slime.toString().c_str());
+ EXPECT_EQ(slime["profiler"].asString().make_string(), "flat");
+ EXPECT_EQ(slime["topn"].asLong(), 2);
+ EXPECT_EQ(slime["roots"].entries(), 2);
+}
+
TEST(ExecutionProfilerTest, perform_shallow_dummy_profiling) {
Profiler profiler(2);
for (int i = 0; i < 3; ++i) {
@@ -118,6 +174,8 @@ TEST(ExecutionProfilerTest, perform_shallow_dummy_profiling) {
Slime slime;
profiler.report(slime.setObject());
fprintf(stderr, "%s\n", slime.toString().c_str());
+ EXPECT_EQ(slime["profiler"].asString().make_string(), "tree");
+ EXPECT_EQ(slime["depth"].asLong(), 2);
EXPECT_EQ(slime["roots"].entries(), 4);
EXPECT_TRUE(find_path(slime, {{"foo", 3}, {"bar", 3}}));
EXPECT_TRUE(find_path(slime, {{"foo", 3}, {"bar", 3}}));
diff --git a/vespalib/src/vespa/vespalib/util/execution_profiler.cpp b/vespalib/src/vespa/vespalib/util/execution_profiler.cpp
index 3c56901adb7..6c5af6cbe41 100644
--- a/vespalib/src/vespa/vespalib/util/execution_profiler.cpp
+++ b/vespalib/src/vespa/vespalib/util/execution_profiler.cpp
@@ -2,113 +2,231 @@
#include "execution_profiler.h"
#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/data/slime/slime.h>
#include <cassert>
namespace vespalib {
-namespace {
-
-double as_ms(duration d) {
- return (count_ns(d) / 1000000.0);
-}
-
-}
-
struct ExecutionProfiler::ReportContext {
+ const ExecutionProfiler &profiler;
const ExecutionProfiler::NameMapper &name_mapper;
- vespalib::hash_map<vespalib::string,vespalib::string> name_cache;
- ReportContext(const ExecutionProfiler::NameMapper &name_mapper_in, size_t num_names)
- : name_mapper(name_mapper_in), name_cache(num_names * 2) {}
- const vespalib::string &resolve_name(const vespalib::string &name) {
- auto pos = name_cache.find(name);
+ vespalib::hash_map<TaskId,vespalib::string> name_cache;
+ ReportContext(const ExecutionProfiler &profiler_in,
+ const ExecutionProfiler::NameMapper &name_mapper_in, size_t num_names)
+ : profiler(profiler_in), name_mapper(name_mapper_in), name_cache(num_names * 2) {}
+ size_t get_max_depth() const { return profiler._max_depth; }
+ const vespalib::string &resolve_name(TaskId task) {
+ auto pos = name_cache.find(task);
if (pos == name_cache.end()) {
- pos = name_cache.insert(std::make_pair(name, name_mapper(name))).first;
+ pos = name_cache.insert(std::make_pair(task, name_mapper(profiler.name_of(task)))).first;
}
return pos->second;
}
};
-duration
-ExecutionProfiler::get_children_time(const Edges &edges) const
-{
- duration result = duration::zero();
- for (const auto &entry: edges) {
- result += _nodes[entry.second].total_time;
- }
- return result;
-}
+namespace {
-std::vector<uint32_t>
-ExecutionProfiler::get_sorted_children(const Edges &edges) const
-{
- std::vector<uint32_t> children;
- for (const auto &entry: edges) {
- children.push_back(entry.second);
- }
- std::sort(children.begin(), children.end(),
- [&](const auto &a, const auto &b) {
- return (_nodes[a].total_time > _nodes[b].total_time);
- });
- return children;
+double as_ms(duration d) {
+ return (count_ns(d) / 1000000.0);
}
-void
-ExecutionProfiler::render_node(slime::Cursor &obj, NodeId node, ReportContext &ctx) const
+class TreeProfiler : public ExecutionProfiler::Impl
{
- obj.setString("name", ctx.resolve_name(_names[_nodes[node].task]));
- obj.setLong("count", _nodes[node].count);
- obj.setDouble("total_time_ms", as_ms(_nodes[node].total_time));
- if (!_nodes[node].children.empty()) {
- auto children_time = get_children_time(_nodes[node].children);
- obj.setDouble("self_time_ms", as_ms(_nodes[node].total_time - children_time));
- render_children(obj.setArray("children"), _nodes[node].children, ctx);
+private:
+ using ReportContext = ExecutionProfiler::ReportContext;
+ using TaskId = ExecutionProfiler::TaskId;
+ using NodeId = uint32_t;
+ using Edges = vespalib::hash_map<TaskId, NodeId>;
+ struct Node {
+ TaskId task;
+ size_t count;
+ duration total_time;
+ Edges children;
+ Node(TaskId task_in) noexcept
+ : task(task_in),
+ count(0),
+ total_time(),
+ children() {}
+ };
+ struct Frame {
+ NodeId node;
+ steady_time start;
+ Frame(NodeId node_in) noexcept
+ : node(node_in), start(steady_clock::now()) {}
+ };
+
+ std::vector<Node> _nodes;
+ Edges _roots;
+ std::vector<Frame> _state;
+
+ duration get_children_time(const Edges &edges) const {
+ duration result = duration::zero();
+ for (const auto &entry: edges) {
+ result += _nodes[entry.second].total_time;
+ }
+ return result;
}
-}
-
-void
-ExecutionProfiler::render_children(slime::Cursor &arr, const Edges &edges, ReportContext &ctx) const
-{
- auto children = get_sorted_children(edges);
- for (NodeId child: children) {
- render_node(arr.addObject(), child, ctx);
+ std::vector<NodeId> get_sorted_children(const Edges &edges) const {
+ std::vector<uint32_t> children;
+ for (const auto &entry: edges) {
+ children.push_back(entry.second);
+ }
+ std::sort(children.begin(), children.end(),
+ [&](const auto &a, const auto &b) {
+ return (_nodes[a].total_time > _nodes[b].total_time);
+ });
+ return children;
}
-}
+ void render_node(slime::Cursor &obj, NodeId node, ReportContext &ctx) const {
+ obj.setString("name", ctx.resolve_name(_nodes[node].task));
+ obj.setLong("count", _nodes[node].count);
+ obj.setDouble("total_time_ms", as_ms(_nodes[node].total_time));
+ if (!_nodes[node].children.empty()) {
+ auto children_time = get_children_time(_nodes[node].children);
+ obj.setDouble("self_time_ms", as_ms(_nodes[node].total_time - children_time));
+ render_children(obj.setArray("children"), _nodes[node].children, ctx);
+ }
+ }
+ void render_children(slime::Cursor &arr, const Edges &edges, ReportContext &ctx) const {
+ auto children = get_sorted_children(edges);
+ for (NodeId child: children) {
+ render_node(arr.addObject(), child, ctx);
+ }
+ }
+public:
+ TreeProfiler() : _nodes(), _roots(), _state() {}
+ void track_start(TaskId task) override {
+ auto &edges = _state.empty() ? _roots : _nodes[_state.back().node].children;
+ auto [pos, was_new] = edges.insert(std::make_pair(task, _nodes.size()));
+ NodeId node = pos->second; // extending _nodes might invalidate lookup result
+ if (was_new) {
+ assert(node == _nodes.size());
+ _nodes.emplace_back(task);
+ }
+ assert(node < _nodes.size());
+ _state.emplace_back(node);
+ }
+ void track_complete() override {
+ assert(!_state.empty());
+ auto &node = _nodes[_state.back().node];
+ auto elapsed = steady_clock::now() - _state.back().start;
+ ++node.count;
+ node.total_time += elapsed;
+ _state.pop_back();
+ }
+ void report(slime::Cursor &obj, ReportContext &ctx) const override {
+ obj.setString("profiler", "tree");
+ obj.setLong("depth", ctx.get_max_depth());
+ obj.setDouble("total_time_ms", as_ms(get_children_time(_roots)));
+ if (!_roots.empty()) {
+ render_children(obj.setArray("roots"), _roots, ctx);
+ }
+ }
+};
-void
-ExecutionProfiler::track_start(TaskId task)
+class FlatProfiler : public ExecutionProfiler::Impl
{
- auto &edges = _state.empty() ? _roots : _nodes[_state.back().node].children;
- auto [pos, was_new] = edges.insert(std::make_pair(task, _nodes.size()));
- NodeId node = pos->second; // extending _nodes might invalidate lookup result
- if (was_new) {
- assert(node == _nodes.size());
- _nodes.emplace_back(task);
+private:
+ using ReportContext = ExecutionProfiler::ReportContext;
+ using TaskId = ExecutionProfiler::TaskId;
+ struct Node {
+ size_t count;
+ duration self_time;
+ Node() noexcept
+ : count(0),
+ self_time() {}
+ };
+ struct Frame {
+ TaskId task;
+ steady_time start;
+ duration overlap;
+ Frame(TaskId task_in) noexcept
+ : task(task_in), start(steady_clock::now()), overlap() {}
+ };
+
+ size_t _topn;
+ std::vector<Node> _nodes;
+ std::vector<Frame> _state;
+
+ duration get_total_time() const {
+ duration result = duration::zero();
+ for (const auto &node: _nodes) {
+ result += node.self_time;
+ }
+ return result;
}
- assert(node < _nodes.size());
- _state.emplace_back(node);
-}
+ std::vector<uint32_t> get_sorted_nodes() const {
+ std::vector<uint32_t> nodes;
+ nodes.reserve(_nodes.size());
+ for (uint32_t i = 0; i < _nodes.size(); ++i) {
+ nodes.push_back(i);
+ }
+ std::sort(nodes.begin(), nodes.end(),
+ [&](const auto &a, const auto &b) {
+ return (_nodes[a].self_time > _nodes[b].self_time);
+ });
+ return nodes;
+ }
+ void render_node(slime::Cursor &obj, uint32_t node, ReportContext &ctx) const {
+ obj.setString("name", ctx.resolve_name(node));
+ obj.setLong("count", _nodes[node].count);
+ obj.setDouble("self_time_ms", as_ms(_nodes[node].self_time));
+ }
+public:
+ FlatProfiler(size_t topn) : _topn(topn), _nodes(), _state() {
+ _nodes.reserve(256);
+ _state.reserve(64);
+ }
+ void track_start(TaskId task) override {
+ if (task >= _nodes.size()) {
+ _nodes.resize(task + 1);
+ }
+ _state.emplace_back(task);
+ }
+ void track_complete() override {
+ assert(!_state.empty());
+ auto &state = _state.back();
+ auto &node = _nodes[state.task];
+ auto elapsed = steady_clock::now() - state.start;
+ ++node.count;
+ node.self_time += (elapsed - state.overlap);
+ _state.pop_back();
+ if (!_state.empty()) {
+ _state.back().overlap += elapsed;
+ }
+ }
+ void report(slime::Cursor &obj, ReportContext &ctx) const override {
+ obj.setString("profiler", "flat");
+ obj.setLong("topn", _topn);
+ obj.setDouble("total_time_ms", as_ms(get_total_time()));
+ auto list = get_sorted_nodes();
+ if (auto limit = std::min(list.size(), _topn); limit > 0) {
+ auto &arr = obj.setArray("roots");
+ for (uint32_t i = 0; i < limit; ++i) {
+ render_node(arr.addObject(), list[i], ctx);
+ }
+ }
+ }
+};
-void
-ExecutionProfiler::track_complete()
-{
- assert(!_state.empty());
- auto &node = _nodes[_state.back().node];
- auto elapsed = steady_clock::now() - _state.back().start;
- ++node.count;
- node.total_time += elapsed;
- _state.pop_back();
}
-ExecutionProfiler::ExecutionProfiler(size_t max_depth)
- : _max_depth(max_depth),
+ExecutionProfiler::ExecutionProfiler(int32_t profile_depth)
+ : _level(0),
+ _max_depth(),
_names(),
_name_map(),
- _nodes(),
- _roots(),
- _state(),
- _level(0)
+ _impl()
{
+ if (profile_depth < 0) {
+ _max_depth = -1;
+ size_t topn = -profile_depth;
+ _impl = std::make_unique<FlatProfiler>(topn);
+ } else {
+ _max_depth = profile_depth;
+ _impl = std::make_unique<TreeProfiler>();
+ }
}
ExecutionProfiler::~ExecutionProfiler() = default;
@@ -128,11 +246,8 @@ ExecutionProfiler::resolve(const vespalib::string &name)
void
ExecutionProfiler::report(slime::Cursor &obj, const NameMapper &name_mapper) const
{
- ReportContext ctx(name_mapper, std::min(_names.size(), _nodes.size()));
- obj.setDouble("total_time_ms", as_ms(get_children_time(_roots)));
- if (!_roots.empty()) {
- render_children(obj.setArray("roots"), _roots, ctx);
- }
+ ReportContext ctx(*this, name_mapper, _names.size());
+ _impl->report(obj, ctx);
}
}
diff --git a/vespalib/src/vespa/vespalib/util/execution_profiler.h b/vespalib/src/vespa/vespalib/util/execution_profiler.h
index b5060e9a3c2..d69bd5ffa1a 100644
--- a/vespalib/src/vespa/vespalib/util/execution_profiler.h
+++ b/vespalib/src/vespa/vespalib/util/execution_profiler.h
@@ -25,60 +25,39 @@ namespace slime { struct Cursor; }
class ExecutionProfiler {
public:
using TaskId = uint32_t;
- using NameMapper = std::function<vespalib::string(const vespalib::string &)>;
-private:
- using NodeId = uint32_t;
- using Edges = vespalib::hash_map<TaskId, NodeId>;
- struct Node {
- TaskId task;
- size_t count;
- duration total_time;
- Edges children;
- Node(TaskId task_in) noexcept
- : task(task_in),
- count(0),
- total_time(),
- children() {}
- };
- struct Frame {
- NodeId node;
- steady_time start;
- Frame(NodeId node_in) noexcept
- : node(node_in), start(steady_clock::now()) {}
- };
struct ReportContext;
+ struct Impl {
+ virtual ~Impl() = default;
+ virtual void track_start(TaskId task) = 0;
+ virtual void track_complete() = 0;
+ virtual void report(slime::Cursor &obj, ReportContext &ctx) const = 0;
+ };
+ using NameMapper = std::function<vespalib::string(const vespalib::string &)>;
+private:
+ size_t _level;
size_t _max_depth;
std::vector<vespalib::string> _names;
vespalib::hash_map<vespalib::string,size_t> _name_map;
- std::vector<Node> _nodes;
- Edges _roots;
- std::vector<Frame> _state;
- size_t _level;
-
- duration get_children_time(const Edges &edges) const;
- std::vector<NodeId> get_sorted_children(const Edges &edges) const;
- void render_node(slime::Cursor &obj, NodeId node, ReportContext &ctx) const;
- void render_children(slime::Cursor &arr, const Edges &edges, ReportContext &ctx) const;
-
- void track_start(TaskId task);
- void track_complete();
-
+ std::unique_ptr<Impl> _impl;
+
public:
- ExecutionProfiler(size_t max_depth);
+ ExecutionProfiler(int32_t profile_depth);
~ExecutionProfiler();
TaskId resolve(const vespalib::string &name);
+ const vespalib::string &name_of(TaskId task) const { return _names[task]; }
void start(TaskId task) {
if (++_level <= _max_depth) {
- track_start(task);
+ _impl->track_start(task);
}
}
void complete() {
if (--_level < _max_depth) {
- track_complete();
+ _impl->track_complete();
}
}
- void report(slime::Cursor &obj, const NameMapper & = [](const vespalib::string &name) noexcept { return name; }) const;
+ void report(slime::Cursor &obj, const NameMapper &name_mapper =
+ [](const vespalib::string &name) noexcept { return name; }) const;
};
}