diff options
author | Geir Storli <geirst@vespa.ai> | 2024-05-30 16:23:12 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-30 16:23:12 +0200 |
commit | aab83078477914bcbb2ee374a0d334c3bc79c8f9 (patch) | |
tree | 26b2a3e889d92533c5377716515be751259460df | |
parent | def19e2e37ed229212a649ba200d8c72ca75874f (diff) | |
parent | c55f26a12b1889172b827e9814a5b86772693414 (diff) |
Merge pull request #31343 from vespa-engine/havardpe/analyze-query-trace
initial version of program to analyze query trace
-rw-r--r-- | searchlib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | searchlib/src/apps/vespa-query-analyzer/.gitignore | 3 | ||||
-rw-r--r-- | searchlib/src/apps/vespa-query-analyzer/CMakeLists.txt | 9 | ||||
-rw-r--r-- | searchlib/src/apps/vespa-query-analyzer/vespa-query-analyzer.cpp | 361 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/data/slime/type.h | 1 |
5 files changed, 375 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 570bffa59c2..43e17417c51 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -60,6 +60,7 @@ vespa_define_module( src/apps/vespa-attribute-inspect src/apps/vespa-fileheader-inspect src/apps/vespa-index-inspect + src/apps/vespa-query-analyzer src/apps/vespa-ranking-expression-analyzer TESTS diff --git a/searchlib/src/apps/vespa-query-analyzer/.gitignore b/searchlib/src/apps/vespa-query-analyzer/.gitignore new file mode 100644 index 00000000000..e5a31caab09 --- /dev/null +++ b/searchlib/src/apps/vespa-query-analyzer/.gitignore @@ -0,0 +1,3 @@ +/.depend +/Makefile +/vespa-query-analyzer diff --git a/searchlib/src/apps/vespa-query-analyzer/CMakeLists.txt b/searchlib/src/apps/vespa-query-analyzer/CMakeLists.txt new file mode 100644 index 00000000000..f84a413ee70 --- /dev/null +++ b/searchlib/src/apps/vespa-query-analyzer/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_vespa-query-analyzer_app + SOURCES + vespa-query-analyzer.cpp + OUTPUT_NAME vespa-query-analyzer + INSTALL bin + DEPENDS + searchlib +) diff --git a/searchlib/src/apps/vespa-query-analyzer/vespa-query-analyzer.cpp b/searchlib/src/apps/vespa-query-analyzer/vespa-query-analyzer.cpp new file mode 100644 index 00000000000..178c09c02ac --- /dev/null +++ b/searchlib/src/apps/vespa-query-analyzer/vespa-query-analyzer.cpp @@ -0,0 +1,361 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/data/simple_buffer.h> +#include <vespa/vespalib/data/slime/json_format.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/io/mapped_file_input.h> +#include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/util/signalhandler.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/queryeval/flow.h> +#include <variant> +#include <vector> +#include <map> + +using namespace vespalib::slime::convenience; +using vespalib::make_string_short::fmt; +using vespalib::slime::JsonFormat; +using vespalib::slime::ARRAY; +using vespalib::slime::OBJECT; +using vespalib::slime::STRING; +using vespalib::slime::DOUBLE; +using vespalib::slime::BOOL; +using search::queryeval::FlowStats; +using search::queryeval::InFlow; + +//----------------------------------------------------------------------------- + +using Path = std::vector<std::variant<size_t,vespalib::stringref>>; +using Paths = std::vector<Path>; + +template <typename F> +struct Matcher : vespalib::slime::ObjectTraverser { + Path path; + Paths result; + F match; + ~Matcher(); + Matcher(F match_in) noexcept : path(), result(), match(match_in) {} + void search(const Inspector &node) { + if (path.empty() && match(path, node)) { + result.push_back(path); + } + if (node.type() == OBJECT()) { + node.traverse(*this); + } + if (node.type() == ARRAY()) { + size_t size = node.entries(); + for (size_t i = 0; i < size; ++i) { + path.emplace_back(i); + if (match(path, node[i])) { + result.push_back(path); + } + search(node[i]); + path.pop_back(); + } + } + } + void field(const Memory &symbol, const Inspector &inspector) final { + path.emplace_back(symbol.make_stringref()); + if (match(path, inspector)) { + result.push_back(path); + } + search(inspector); + path.pop_back(); + } +}; +template <typename F> Matcher<F>::~Matcher() = default; + +std::vector<Path> find_field(const Inspector &root, const vespalib::string &name) { + auto matcher = Matcher([&](const Path &path, const Inspector &){ + return ((path.size() > 0) && + (std::holds_alternative<vespalib::stringref>(path.back())) && + (std::get<vespalib::stringref>(path.back()) == name)); + }); + matcher.search(root); + return matcher.result; +} + +std::vector<Path> find_tag(const Inspector &root, const vespalib::string &name) { + auto matcher = Matcher([&](const Path &path, const Inspector &value){ + return ((path.size() > 0) && + (std::holds_alternative<vespalib::stringref>(path.back())) && + (std::get<vespalib::stringref>(path.back()) == "tag") && + (value.asString().make_stringref() == name)); + }); + matcher.search(root); + return matcher.result; +} + +vespalib::string path_to_str(const Path &path) { + size_t cnt = 0; + vespalib::string str("["); + for (const auto &item: path) { + if (cnt++ > 0) { + str.append(","); + } + std::visit(vespalib::overload{ + [&str](size_t value)noexcept{ str.append(fmt("%zu", value)); }, + [&str](vespalib::stringref value)noexcept{ str.append(value); }}, item); + } + str.append("]"); + return str; +} + +vespalib::string strip_name(vespalib::stringref name) { + auto end = name.find("<"); + auto ns = name.rfind("::", end); + size_t begin = (ns > name.size()) ? 0 : ns + 2; + return name.substr(begin, end - begin); +} + +const Inspector &apply_path(const Inspector &node, const Path &path, size_t max = -1) { + size_t cnt = 0; + const Inspector *ptr = &node; + for (const auto &elem: path) { + if (cnt++ >= max) { + return *ptr; + } + if (std::holds_alternative<size_t>(elem)) { + ptr = &((*ptr)[std::get<size_t>(elem)]); + } + if (std::holds_alternative<vespalib::stringref>(elem)) { + auto ref = std::get<vespalib::stringref>(elem); + ptr = &((*ptr)[Memory(ref.data(), ref.size())]); + } + } + return *ptr; +} + +void extract(vespalib::string &value, const Inspector &data) { + if (data.valid() && data.type() == STRING()) { + value = data.asString().make_stringref(); + } +} + +struct Sample { + enum class Type { INVALID, INIT, SEEK, UNPACK, TERMWISE }; + Type type = Type::INVALID; + std::vector<size_t> path; + double self_time_ms = 0.0; + double total_time_ms = 0.0; + size_t count = 0; + Sample(const Inspector &sample) { + auto name = sample["name"].asString().make_stringref(); + if (ends_with(name, "/init")) { + type = Type::INIT; + } + if (ends_with(name, "/seek")) { + type = Type::SEEK; + } + if (ends_with(name, "/unpack")) { + type = Type::UNPACK; + } + if (ends_with(name, "/termwise")) { + type = Type::TERMWISE; + } + if (starts_with(name, "/")) { + size_t child = 0; + for (size_t pos = 1; pos < name.size(); ++pos) { + char c = name[pos]; + if (c == '/') { + path.push_back(child); + child = 0; + } else { + if (c < '0' || c > '9') { + break; + } + child = child * 10 + (c - '0'); + } + } + } + self_time_ms = sample["self_time_ms"].asDouble(); + total_time_ms = sample["total_time_ms"].asDouble(); + count = sample["count"].asLong(); + } + static vespalib::string type_to_str(Type type) { + switch(type) { + case Type::INVALID: return "<invalid>"; + case Type::INIT: return "init"; + case Type::SEEK: return "seek"; + case Type::UNPACK: return "unpack"; + case Type::TERMWISE: return "termwise"; + } + abort(); + } + static vespalib::string path_to_str(const std::vector<size_t> &path) { + vespalib::string result("/"); + for (size_t elem: path) { + result += fmt("%zu/", elem); + } + return result; + } + vespalib::string to_string() const { + return fmt("type: %s, path: %s, count: %zu, total_time_ms: %g\n", + type_to_str(type).c_str(), path_to_str(path).c_str(), count, total_time_ms); + } +}; + +struct Node { + vespalib::string type = "unknown"; + bool strict = false; + FlowStats flow_stats = FlowStats(0.0, 0.0, 0.0); + InFlow in_flow = InFlow(0.0); + size_t count = 0; + double self_time_ms = 0.0; + double total_time_ms = 0.0; + std::vector<Node> children; + Node(const Inspector &obj) { + extract(type, obj["[type]"]); + type = strip_name(type); + strict = obj["strict"].asBool(); + flow_stats.estimate = obj["relative_estimate"].asDouble(); + flow_stats.cost = obj["cost"].asDouble(); + flow_stats.strict_cost = obj["strict_cost"].asDouble(); + const Inspector &list = obj["children"]; + for (size_t i = 0; true; ++i) { + const Inspector &child = list[fmt("[%zu]", i)]; + if (child.valid()) { + children.emplace_back(child); + } else { + break; + } + } + } + ~Node(); + void add_sample(const Sample &sample) { + Node *node = this; + for (size_t child: sample.path) { + if (child < node->children.size()) { + node = &node->children[child]; + } else { + fprintf(stderr, "... ignoring bad sample: %s\n", sample.to_string().c_str()); + return; + } + } + node->count += sample.count; + node->self_time_ms += sample.self_time_ms; + node->total_time_ms += sample.total_time_ms; + } + void dump_line(size_t indent) const { + fprintf(stderr, "|%10zu ", count); + fprintf(stderr, "|%11.3f ", total_time_ms); + fprintf(stderr, "|%10.3f | ", self_time_ms); + for (size_t i = 0; i < indent; ++i) { + fprintf(stderr, " "); + } + fprintf(stderr, "%s\n", type.c_str()); + for (const Node &child: children) { + child.dump_line(indent + 1); + } + } + void dump() const { + fprintf(stderr, "| count | total_time | self_time | structure\n"); + fprintf(stderr, "+-----------+------------+-----------+-------------------------------\n"); + dump_line(0); + fprintf(stderr, "+-----------+------------+-----------+-------------------------------\n"); + } +}; +Node::~Node() = default; + +void each_sample_list(const Inspector &list, auto f) { + for (size_t i = 0; i < list.entries(); ++i) { + f(Sample(list[i])); + each_sample_list(list[i]["children"], f); + } +} + +void each_sample(const Inspector &prof, auto f) { + each_sample_list(prof["roots"], f); +} + +struct State { + void analyze(const Inspector &root) { + auto bp_list = find_field(root, "optimized"); + for (const Path &path: bp_list) { + const Inspector &node = apply_path(root, path, path.size()-3); + const Inspector &key_field = node["distribution-key"]; + if (key_field.valid()) { + int key = key_field.asLong(); + Node data(apply_path(root, path)); + auto prof_list = find_tag(node, "match_profiling"); + double total_ms = 0.0; + std::map<Sample::Type,double> time_map; + for (const Path &prof_path: prof_list) { + const Inspector &prof = apply_path(node, prof_path, prof_path.size()-1); + if (prof["profiler"].asString().make_stringref() == "tree") { + total_ms += prof["total_time_ms"].asDouble(); + each_sample(prof, [&](const Sample &sample) { + if (sample.type == Sample::Type::SEEK) { + data.add_sample(sample); + } + if (sample.path.empty()) { + time_map[sample.type] += sample.total_time_ms; + } + }); + } + } + data.dump(); + fprintf(stderr, "distribution key: %d, total_time_ms: %g\n", key, total_ms); + for (auto [type, time]: time_map) { + fprintf(stderr, "sample type %s used %g ms total\n", Sample::type_to_str(type).c_str(), time); + } + } + } + } +}; + +//----------------------------------------------------------------------------- + +void usage(const char *self) { + fprintf(stderr, "usage: %s <json query result file>\n", self); + fprintf(stderr, " analyze query cost (planning vs profiling)\n"); + fprintf(stderr, " query result must contain optimized blueprint dump\n"); + fprintf(stderr, " query result must contain match phase tree profiling\n\n"); +} + +struct MyApp { + vespalib::string file_name; + bool parse_params(int argc, char **argv); + int main(); +}; + +bool +MyApp::parse_params(int argc, char **argv) { + if (argc != 2) { + return false; + } + file_name = argv[1]; + return true; +} + +int +MyApp::main() +{ + vespalib::MappedFileInput file(file_name); + if (!file.valid()) { + fprintf(stderr, "could not read input file: '%s'\n", + file_name.c_str()); + return 1; + } + Slime slime; + if(JsonFormat::decode(file, slime) == 0) { + fprintf(stderr, "file contains invalid json: '%s'\n", + file_name.c_str()); + return 1; + } + State state; + state.analyze(slime.get()); + return 0; +} + +int main(int argc, char **argv) { + MyApp my_app; + vespalib::SignalHandler::PIPE.ignore(); + if (!my_app.parse_params(argc, argv)) { + usage(argv[0]); + return 1; + } + return my_app.main(); +} + +//----------------------------------------------------------------------------- diff --git a/vespalib/src/vespa/vespalib/data/slime/type.h b/vespalib/src/vespa/vespalib/data/slime/type.h index d3d15526f12..90e79acccd0 100644 --- a/vespalib/src/vespa/vespalib/data/slime/type.h +++ b/vespalib/src/vespa/vespalib/data/slime/type.h @@ -19,6 +19,7 @@ protected: public: uint32_t getId() const noexcept { return _id; } + bool operator==(const Type &rhs) const noexcept { return _id == rhs._id; } }; /** |