aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt2
-rw-r--r--searchlib/src/apps/vespa-query-analyzer/.gitignore3
-rw-r--r--searchlib/src/apps/vespa-query-analyzer/CMakeLists.txt9
-rw-r--r--searchlib/src/apps/vespa-query-analyzer/vespa-query-analyzer.cpp361
-rw-r--r--searchlib/src/tests/attribute/bitvector_search_cache/bitvector_search_cache_test.cpp13
-rw-r--r--searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp4
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp77
-rw-r--r--searchlib/src/tests/features/first_phase_rank/CMakeLists.txt11
-rw-r--r--searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp143
-rw-r--r--searchlib/src/tests/features/prod_features_test.cpp27
-rw-r--r--searchlib/src/tests/hitcollector/CMakeLists.txt2
-rw-r--r--searchlib/src/tests/hitcollector/hitcollector_test.cpp269
-rw-r--r--searchlib/src/tests/hitcollector/sorted_hit_sequence_test.cpp14
-rw-r--r--searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp22
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp12
-rw-r--r--searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp2
-rw-r--r--searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp30
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp42
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h25
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp178
-rw-r--r--searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp50
-rw-r--r--searchlib/src/tests/queryeval/sourceblender/sourceblender_test.cpp35
-rw-r--r--searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp8
-rw-r--r--searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp112
-rw-r--r--searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp23
-rw-r--r--searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp9
-rw-r--r--searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp23
-rw-r--r--searchlib/src/tests/ranksetup/ranksetup_test.cpp15
-rw-r--r--searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp1
-rw-r--r--searchlib/src/tests/tensor/distance_functions/CMakeLists.txt7
-rw-r--r--searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp129
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp38
-rw-r--r--searchlib/src/tests/util/token_extractor/token_extractor_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h3
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt3
-rw-r--r--searchlib/src/vespa/searchlib/features/bm25_feature.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/features/bm25_feature.h2
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h40
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp67
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h32
-rw-r--r--searchlib/src/vespa/searchlib/features/second_phase_feature.cpp57
-rw-r--r--searchlib/src/vespa/searchlib/features/second_phase_feature.h35
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h12
-rw-r--r--searchlib/src/vespa/searchlib/fef/objectstore.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/fef/objectstore.h3
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h3
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h10
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.h8
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_simple.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp79
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h17
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.h25
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h10
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp288
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.h16
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp43
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h14
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp44
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h32
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h44
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h72
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h33
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp66
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h15
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/bound_distance_function.h5
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h1
-rw-r--r--searchlib/src/vespa/searchlib/test/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h60
-rw-r--r--searchlib/src/vespa/searchlib/util/token_extractor.cpp2
111 files changed, 2636 insertions, 847 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index a5453ac5273..43e17417c51 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -60,6 +60,7 @@ vespa_define_module(
src/apps/vespa-attribute-inspect
src/apps/vespa-fileheader-inspect
src/apps/vespa-index-inspect
+ src/apps/vespa-query-analyzer
src/apps/vespa-ranking-expression-analyzer
TESTS
@@ -140,6 +141,7 @@ vespa_define_module(
src/tests/features/element_completeness
src/tests/features/element_similarity_feature
src/tests/features/euclidean_distance
+ src/tests/features/first_phase_rank
src/tests/features/imported_dot_product
src/tests/features/internal_max_reduce_prod_join_feature
src/tests/features/item_raw_score
diff --git a/searchlib/src/apps/vespa-query-analyzer/.gitignore b/searchlib/src/apps/vespa-query-analyzer/.gitignore
new file mode 100644
index 00000000000..e5a31caab09
--- /dev/null
+++ b/searchlib/src/apps/vespa-query-analyzer/.gitignore
@@ -0,0 +1,3 @@
+/.depend
+/Makefile
+/vespa-query-analyzer
diff --git a/searchlib/src/apps/vespa-query-analyzer/CMakeLists.txt b/searchlib/src/apps/vespa-query-analyzer/CMakeLists.txt
new file mode 100644
index 00000000000..f84a413ee70
--- /dev/null
+++ b/searchlib/src/apps/vespa-query-analyzer/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_vespa-query-analyzer_app
+ SOURCES
+ vespa-query-analyzer.cpp
+ OUTPUT_NAME vespa-query-analyzer
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
diff --git a/searchlib/src/apps/vespa-query-analyzer/vespa-query-analyzer.cpp b/searchlib/src/apps/vespa-query-analyzer/vespa-query-analyzer.cpp
new file mode 100644
index 00000000000..178c09c02ac
--- /dev/null
+++ b/searchlib/src/apps/vespa-query-analyzer/vespa-query-analyzer.cpp
@@ -0,0 +1,361 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/data/simple_buffer.h>
+#include <vespa/vespalib/data/slime/json_format.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/io/mapped_file_input.h>
+#include <vespa/vespalib/util/overload.h>
+#include <vespa/vespalib/util/signalhandler.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/queryeval/flow.h>
+#include <variant>
+#include <vector>
+#include <map>
+
+using namespace vespalib::slime::convenience;
+using vespalib::make_string_short::fmt;
+using vespalib::slime::JsonFormat;
+using vespalib::slime::ARRAY;
+using vespalib::slime::OBJECT;
+using vespalib::slime::STRING;
+using vespalib::slime::DOUBLE;
+using vespalib::slime::BOOL;
+using search::queryeval::FlowStats;
+using search::queryeval::InFlow;
+
+//-----------------------------------------------------------------------------
+
+using Path = std::vector<std::variant<size_t,vespalib::stringref>>;
+using Paths = std::vector<Path>;
+
+template <typename F>
+struct Matcher : vespalib::slime::ObjectTraverser {
+ Path path;
+ Paths result;
+ F match;
+ ~Matcher();
+ Matcher(F match_in) noexcept : path(), result(), match(match_in) {}
+ void search(const Inspector &node) {
+ if (path.empty() && match(path, node)) {
+ result.push_back(path);
+ }
+ if (node.type() == OBJECT()) {
+ node.traverse(*this);
+ }
+ if (node.type() == ARRAY()) {
+ size_t size = node.entries();
+ for (size_t i = 0; i < size; ++i) {
+ path.emplace_back(i);
+ if (match(path, node[i])) {
+ result.push_back(path);
+ }
+ search(node[i]);
+ path.pop_back();
+ }
+ }
+ }
+ void field(const Memory &symbol, const Inspector &inspector) final {
+ path.emplace_back(symbol.make_stringref());
+ if (match(path, inspector)) {
+ result.push_back(path);
+ }
+ search(inspector);
+ path.pop_back();
+ }
+};
+template <typename F> Matcher<F>::~Matcher() = default;
+
+std::vector<Path> find_field(const Inspector &root, const vespalib::string &name) {
+ auto matcher = Matcher([&](const Path &path, const Inspector &){
+ return ((path.size() > 0) &&
+ (std::holds_alternative<vespalib::stringref>(path.back())) &&
+ (std::get<vespalib::stringref>(path.back()) == name));
+ });
+ matcher.search(root);
+ return matcher.result;
+}
+
+std::vector<Path> find_tag(const Inspector &root, const vespalib::string &name) {
+ auto matcher = Matcher([&](const Path &path, const Inspector &value){
+ return ((path.size() > 0) &&
+ (std::holds_alternative<vespalib::stringref>(path.back())) &&
+ (std::get<vespalib::stringref>(path.back()) == "tag") &&
+ (value.asString().make_stringref() == name));
+ });
+ matcher.search(root);
+ return matcher.result;
+}
+
+vespalib::string path_to_str(const Path &path) {
+ size_t cnt = 0;
+ vespalib::string str("[");
+ for (const auto &item: path) {
+ if (cnt++ > 0) {
+ str.append(",");
+ }
+ std::visit(vespalib::overload{
+ [&str](size_t value)noexcept{ str.append(fmt("%zu", value)); },
+ [&str](vespalib::stringref value)noexcept{ str.append(value); }}, item);
+ }
+ str.append("]");
+ return str;
+}
+
+vespalib::string strip_name(vespalib::stringref name) {
+ auto end = name.find("<");
+ auto ns = name.rfind("::", end);
+ size_t begin = (ns > name.size()) ? 0 : ns + 2;
+ return name.substr(begin, end - begin);
+}
+
+const Inspector &apply_path(const Inspector &node, const Path &path, size_t max = -1) {
+ size_t cnt = 0;
+ const Inspector *ptr = &node;
+ for (const auto &elem: path) {
+ if (cnt++ >= max) {
+ return *ptr;
+ }
+ if (std::holds_alternative<size_t>(elem)) {
+ ptr = &((*ptr)[std::get<size_t>(elem)]);
+ }
+ if (std::holds_alternative<vespalib::stringref>(elem)) {
+ auto ref = std::get<vespalib::stringref>(elem);
+ ptr = &((*ptr)[Memory(ref.data(), ref.size())]);
+ }
+ }
+ return *ptr;
+}
+
+void extract(vespalib::string &value, const Inspector &data) {
+ if (data.valid() && data.type() == STRING()) {
+ value = data.asString().make_stringref();
+ }
+}
+
+struct Sample {
+ enum class Type { INVALID, INIT, SEEK, UNPACK, TERMWISE };
+ Type type = Type::INVALID;
+ std::vector<size_t> path;
+ double self_time_ms = 0.0;
+ double total_time_ms = 0.0;
+ size_t count = 0;
+ Sample(const Inspector &sample) {
+ auto name = sample["name"].asString().make_stringref();
+ if (ends_with(name, "/init")) {
+ type = Type::INIT;
+ }
+ if (ends_with(name, "/seek")) {
+ type = Type::SEEK;
+ }
+ if (ends_with(name, "/unpack")) {
+ type = Type::UNPACK;
+ }
+ if (ends_with(name, "/termwise")) {
+ type = Type::TERMWISE;
+ }
+ if (starts_with(name, "/")) {
+ size_t child = 0;
+ for (size_t pos = 1; pos < name.size(); ++pos) {
+ char c = name[pos];
+ if (c == '/') {
+ path.push_back(child);
+ child = 0;
+ } else {
+ if (c < '0' || c > '9') {
+ break;
+ }
+ child = child * 10 + (c - '0');
+ }
+ }
+ }
+ self_time_ms = sample["self_time_ms"].asDouble();
+ total_time_ms = sample["total_time_ms"].asDouble();
+ count = sample["count"].asLong();
+ }
+ static vespalib::string type_to_str(Type type) {
+ switch(type) {
+ case Type::INVALID: return "<invalid>";
+ case Type::INIT: return "init";
+ case Type::SEEK: return "seek";
+ case Type::UNPACK: return "unpack";
+ case Type::TERMWISE: return "termwise";
+ }
+ abort();
+ }
+ static vespalib::string path_to_str(const std::vector<size_t> &path) {
+ vespalib::string result("/");
+ for (size_t elem: path) {
+ result += fmt("%zu/", elem);
+ }
+ return result;
+ }
+ vespalib::string to_string() const {
+ return fmt("type: %s, path: %s, count: %zu, total_time_ms: %g\n",
+ type_to_str(type).c_str(), path_to_str(path).c_str(), count, total_time_ms);
+ }
+};
+
+struct Node {
+ vespalib::string type = "unknown";
+ bool strict = false;
+ FlowStats flow_stats = FlowStats(0.0, 0.0, 0.0);
+ InFlow in_flow = InFlow(0.0);
+ size_t count = 0;
+ double self_time_ms = 0.0;
+ double total_time_ms = 0.0;
+ std::vector<Node> children;
+ Node(const Inspector &obj) {
+ extract(type, obj["[type]"]);
+ type = strip_name(type);
+ strict = obj["strict"].asBool();
+ flow_stats.estimate = obj["relative_estimate"].asDouble();
+ flow_stats.cost = obj["cost"].asDouble();
+ flow_stats.strict_cost = obj["strict_cost"].asDouble();
+ const Inspector &list = obj["children"];
+ for (size_t i = 0; true; ++i) {
+ const Inspector &child = list[fmt("[%zu]", i)];
+ if (child.valid()) {
+ children.emplace_back(child);
+ } else {
+ break;
+ }
+ }
+ }
+ ~Node();
+ void add_sample(const Sample &sample) {
+ Node *node = this;
+ for (size_t child: sample.path) {
+ if (child < node->children.size()) {
+ node = &node->children[child];
+ } else {
+ fprintf(stderr, "... ignoring bad sample: %s\n", sample.to_string().c_str());
+ return;
+ }
+ }
+ node->count += sample.count;
+ node->self_time_ms += sample.self_time_ms;
+ node->total_time_ms += sample.total_time_ms;
+ }
+ void dump_line(size_t indent) const {
+ fprintf(stderr, "|%10zu ", count);
+ fprintf(stderr, "|%11.3f ", total_time_ms);
+ fprintf(stderr, "|%10.3f | ", self_time_ms);
+ for (size_t i = 0; i < indent; ++i) {
+ fprintf(stderr, " ");
+ }
+ fprintf(stderr, "%s\n", type.c_str());
+ for (const Node &child: children) {
+ child.dump_line(indent + 1);
+ }
+ }
+ void dump() const {
+ fprintf(stderr, "| count | total_time | self_time | structure\n");
+ fprintf(stderr, "+-----------+------------+-----------+-------------------------------\n");
+ dump_line(0);
+ fprintf(stderr, "+-----------+------------+-----------+-------------------------------\n");
+ }
+};
+Node::~Node() = default;
+
+void each_sample_list(const Inspector &list, auto f) {
+ for (size_t i = 0; i < list.entries(); ++i) {
+ f(Sample(list[i]));
+ each_sample_list(list[i]["children"], f);
+ }
+}
+
+void each_sample(const Inspector &prof, auto f) {
+ each_sample_list(prof["roots"], f);
+}
+
+struct State {
+ void analyze(const Inspector &root) {
+ auto bp_list = find_field(root, "optimized");
+ for (const Path &path: bp_list) {
+ const Inspector &node = apply_path(root, path, path.size()-3);
+ const Inspector &key_field = node["distribution-key"];
+ if (key_field.valid()) {
+ int key = key_field.asLong();
+ Node data(apply_path(root, path));
+ auto prof_list = find_tag(node, "match_profiling");
+ double total_ms = 0.0;
+ std::map<Sample::Type,double> time_map;
+ for (const Path &prof_path: prof_list) {
+ const Inspector &prof = apply_path(node, prof_path, prof_path.size()-1);
+ if (prof["profiler"].asString().make_stringref() == "tree") {
+ total_ms += prof["total_time_ms"].asDouble();
+ each_sample(prof, [&](const Sample &sample) {
+ if (sample.type == Sample::Type::SEEK) {
+ data.add_sample(sample);
+ }
+ if (sample.path.empty()) {
+ time_map[sample.type] += sample.total_time_ms;
+ }
+ });
+ }
+ }
+ data.dump();
+ fprintf(stderr, "distribution key: %d, total_time_ms: %g\n", key, total_ms);
+ for (auto [type, time]: time_map) {
+ fprintf(stderr, "sample type %s used %g ms total\n", Sample::type_to_str(type).c_str(), time);
+ }
+ }
+ }
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+void usage(const char *self) {
+ fprintf(stderr, "usage: %s <json query result file>\n", self);
+ fprintf(stderr, " analyze query cost (planning vs profiling)\n");
+ fprintf(stderr, " query result must contain optimized blueprint dump\n");
+ fprintf(stderr, " query result must contain match phase tree profiling\n\n");
+}
+
+struct MyApp {
+ vespalib::string file_name;
+ bool parse_params(int argc, char **argv);
+ int main();
+};
+
+bool
+MyApp::parse_params(int argc, char **argv) {
+ if (argc != 2) {
+ return false;
+ }
+ file_name = argv[1];
+ return true;
+}
+
+int
+MyApp::main()
+{
+ vespalib::MappedFileInput file(file_name);
+ if (!file.valid()) {
+ fprintf(stderr, "could not read input file: '%s'\n",
+ file_name.c_str());
+ return 1;
+ }
+ Slime slime;
+ if(JsonFormat::decode(file, slime) == 0) {
+ fprintf(stderr, "file contains invalid json: '%s'\n",
+ file_name.c_str());
+ return 1;
+ }
+ State state;
+ state.analyze(slime.get());
+ return 0;
+}
+
+int main(int argc, char **argv) {
+ MyApp my_app;
+ vespalib::SignalHandler::PIPE.ignore();
+ if (!my_app.parse_params(argc, argv)) {
+ usage(argv[0]);
+ return 1;
+ }
+ return my_app.main();
+}
+
+//-----------------------------------------------------------------------------
diff --git a/searchlib/src/tests/attribute/bitvector_search_cache/bitvector_search_cache_test.cpp b/searchlib/src/tests/attribute/bitvector_search_cache/bitvector_search_cache_test.cpp
index d51ec22a54a..1d66eefaff7 100644
--- a/searchlib/src/tests/attribute/bitvector_search_cache/bitvector_search_cache_test.cpp
+++ b/searchlib/src/tests/attribute/bitvector_search_cache/bitvector_search_cache_test.cpp
@@ -3,6 +3,7 @@
#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/searchlib/attribute/bitvector_search_cache.h>
#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/vespalib/util/memoryusage.h>
using namespace search;
using namespace search::attribute;
@@ -31,9 +32,13 @@ struct Fixture {
TEST_F("require that bit vectors can be inserted and retrieved", Fixture)
{
EXPECT_EQUAL(0u, f.cache.size());
+ auto old_mem_usage = f.cache.get_memory_usage();
f.cache.insert("foo", f.entry1);
f.cache.insert("bar", f.entry2);
EXPECT_EQUAL(2u, f.cache.size());
+ auto new_mem_usage = f.cache.get_memory_usage();
+ EXPECT_LESS(old_mem_usage.usedBytes(), new_mem_usage.usedBytes());
+ EXPECT_LESS(old_mem_usage.allocatedBytes(), new_mem_usage.allocatedBytes());
EXPECT_EQUAL(f.entry1, f.cache.find("foo"));
EXPECT_EQUAL(f.entry2, f.cache.find("bar"));
@@ -43,9 +48,13 @@ TEST_F("require that bit vectors can be inserted and retrieved", Fixture)
TEST_F("require that insert() doesn't replace existing bit vector", Fixture)
{
f.cache.insert("foo", f.entry1);
+ auto old_mem_usage = f.cache.get_memory_usage();
f.cache.insert("foo", f.entry2);
+ auto new_mem_usage = f.cache.get_memory_usage();
EXPECT_EQUAL(1u, f.cache.size());
EXPECT_EQUAL(f.entry1, f.cache.find("foo"));
+ EXPECT_EQUAL(old_mem_usage.usedBytes(), new_mem_usage.usedBytes());
+ EXPECT_EQUAL(old_mem_usage.allocatedBytes(), new_mem_usage.allocatedBytes());
}
TEST_F("require that cache can be cleared", Fixture)
@@ -53,11 +62,15 @@ TEST_F("require that cache can be cleared", Fixture)
f.cache.insert("foo", f.entry1);
f.cache.insert("bar", f.entry2);
EXPECT_EQUAL(2u, f.cache.size());
+ auto old_mem_usage = f.cache.get_memory_usage();
f.cache.clear();
+ auto new_mem_usage = f.cache.get_memory_usage();
EXPECT_EQUAL(0u, f.cache.size());
EXPECT_TRUE(f.cache.find("foo").get() == nullptr);
EXPECT_TRUE(f.cache.find("bar").get() == nullptr);
+ EXPECT_GREATER(old_mem_usage.usedBytes(), new_mem_usage.usedBytes());
+ EXPECT_GREATER(old_mem_usage.allocatedBytes(), new_mem_usage.allocatedBytes());
}
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
index 41ec377dece..7c38c322bc8 100644
--- a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
+++ b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
@@ -508,6 +508,7 @@ assertBitVector(const std::vector<uint32_t> &expDocIds, const BitVector &bitVect
TEST_F("Entry is inserted into search cache if bit vector posting list is used", SearchCacheFixture)
{
EXPECT_EQUAL(0u, f.imported_attr->getSearchCache()->size());
+ auto old_mem_usage = f.imported_attr->get_memory_usage();
auto ctx = f.create_context(word_term("5678"));
ctx->fetchPostings(queryeval::ExecuteInfo::FULL, true);
TermFieldMatchData match;
@@ -515,6 +516,9 @@ TEST_F("Entry is inserted into search cache if bit vector posting list is used",
TEST_DO(f.assertSearch({3, 5}, *iter));
EXPECT_EQUAL(1u, f.imported_attr->getSearchCache()->size());
+ auto new_mem_usage = f.imported_attr->get_memory_usage();
+ EXPECT_LESS(old_mem_usage.usedBytes(), new_mem_usage.usedBytes());
+ EXPECT_LESS(old_mem_usage.allocatedBytes(), new_mem_usage.allocatedBytes());
auto cacheEntry = f.imported_attr->getSearchCache()->find("5678");
EXPECT_EQUAL(cacheEntry->docIdLimit, f.get_imported_attr()->getNumDocs());
TEST_DO(assertBitVector({3, 5}, *cacheEntry->bitVector));
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index b1b2235165f..cce72837dad 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -210,8 +210,10 @@ public:
}
void expect_entry(uint32_t exp_docid, const DoubleVector& exp_vector, const EntryVector& entries) const {
EXPECT_EQUAL(1u, entries.size());
- EXPECT_EQUAL(exp_docid, entries.back().first);
- EXPECT_EQUAL(exp_vector, entries.back().second);
+ if (entries.size() >= 1u) {
+ EXPECT_EQUAL(exp_docid, entries.back().first);
+ EXPECT_EQUAL(exp_vector, entries.back().second);
+ }
}
void expect_add(uint32_t exp_docid, const DoubleVector& exp_vector) const {
expect_entry(exp_docid, exp_vector, _adds);
@@ -329,6 +331,10 @@ public:
static search::tensor::DistanceFunctionFactory::UP my_dist_fun = search::tensor::make_distance_function_factory(search::attribute::DistanceMetric::Euclidean, vespalib::eval::CellType::DOUBLE);
return *my_dist_fun;
}
+
+ uint32_t check_consistency(uint32_t) const noexcept override {
+ return 0;
+ }
};
class MockNearestNeighborIndexFactory : public NearestNeighborIndexFactory {
@@ -1077,8 +1083,22 @@ TEST_F("Populates address space usage in mixed tensor attribute with hnsw index"
class DenseTensorAttributeMockIndex : public Fixture {
public:
DenseTensorAttributeMockIndex() : Fixture(vec_2d_spec, FixtureTraits().mock_hnsw()) {}
+ void add_vec_a();
};
+void
+DenseTensorAttributeMockIndex::add_vec_a()
+{
+ auto& index = mock_index();
+ auto vec_a = vec_2d(3, 5);
+ auto prepare_result = prepare_set_tensor(1, vec_a);
+ index.expect_prepare_add(1, {3, 5});
+ complete_set_tensor(1, vec_a, std::move(prepare_result));
+ assertGetTensor(vec_a, 1);
+ index.expect_complete_add(1, {3, 5});
+ index.clear();
+}
+
TEST_F("setTensor() updates nearest neighbor index", DenseTensorAttributeMockIndex)
{
auto& index = f.mock_index();
@@ -1097,15 +1117,7 @@ TEST_F("setTensor() updates nearest neighbor index", DenseTensorAttributeMockInd
TEST_F("nearest neighbor index can be updated in two phases", DenseTensorAttributeMockIndex)
{
auto& index = f.mock_index();
- {
- auto vec_a = vec_2d(3, 5);
- auto prepare_result = f.prepare_set_tensor(1, vec_a);
- index.expect_prepare_add(1, {3, 5});
- f.complete_set_tensor(1, vec_a, std::move(prepare_result));
- f.assertGetTensor(vec_a, 1);
- index.expect_complete_add(1, {3, 5});
- }
- index.clear();
+ f.add_vec_a();
{
// Replaces previous value.
auto vec_b = vec_2d(7, 9);
@@ -1121,15 +1133,7 @@ TEST_F("nearest neighbor index can be updated in two phases", DenseTensorAttribu
TEST_F("nearest neighbor index is NOT updated when tensor value is unchanged", DenseTensorAttributeMockIndex)
{
auto& index = f.mock_index();
- {
- auto vec_a = vec_2d(3, 5);
- auto prepare_result = f.prepare_set_tensor(1, vec_a);
- index.expect_prepare_add(1, {3, 5});
- f.complete_set_tensor(1, vec_a, std::move(prepare_result));
- f.assertGetTensor(vec_a, 1);
- index.expect_complete_add(1, {3, 5});
- }
- index.clear();
+ f.add_vec_a();
{
// Replaces previous value with the same value
auto vec_b = vec_2d(3, 5);
@@ -1139,6 +1143,39 @@ TEST_F("nearest neighbor index is NOT updated when tensor value is unchanged", D
f.complete_set_tensor(1, vec_b, std::move(prepare_result));
f.assertGetTensor(vec_b, 1);
index.expect_empty_complete_add();
+ index.expect_empty_add();
+ }
+}
+
+TEST_F("nearest neighbor index is updated when value changes from A to B to A", DenseTensorAttributeMockIndex)
+{
+ auto& index = f.mock_index();
+ f.add_vec_a();
+ {
+ // Prepare replace of A with B
+ auto vec_b = vec_2d(7, 9);
+ auto prepare_result_b = f.prepare_set_tensor(1, vec_b);
+ index.expect_prepare_add(1, {7, 9});
+ index.clear();
+ // Prepare replace of B with A, but prepare sees original A
+ auto vec_a = vec_2d(3, 5);
+ auto prepare_result_a = f.prepare_set_tensor(1, vec_a);
+ EXPECT_TRUE(prepare_result_a.get() == nullptr);
+ index.expect_empty_prepare_add();
+ index.clear();
+ // Complete set B
+ f.complete_set_tensor(1, vec_b, std::move(prepare_result_b));
+ index.expect_remove(1, {3, 5});
+ f.assertGetTensor(vec_b, 1);
+ index.expect_complete_add(1, {7, 9});
+ index.expect_empty_add();
+ index.clear();
+ // Complete set A, no prepare result but tensor cells changed
+ f.complete_set_tensor(1, vec_a, std::move(prepare_result_a));
+ index.expect_remove(1, {7, 9});
+ index.expect_empty_complete_add();
+ index.expect_add(1, {3, 5});
+ f.assertGetTensor(vec_a, 1);
}
}
diff --git a/searchlib/src/tests/features/first_phase_rank/CMakeLists.txt b/searchlib/src/tests/features/first_phase_rank/CMakeLists.txt
new file mode 100644
index 00000000000..5aa83399d3d
--- /dev/null
+++ b/searchlib/src/tests/features/first_phase_rank/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+vespa_add_executable(searchlib_features_first_phase_rank_test_app TEST
+ SOURCES
+ first_phase_rank_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_features_first_phase_rank_test_app COMMAND searchlib_features_first_phase_rank_test_app)
diff --git a/searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp b/searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp
new file mode 100644
index 00000000000..01ba6c36124
--- /dev/null
+++ b/searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp
@@ -0,0 +1,143 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/features/first_phase_rank_feature.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/blueprintfactory.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#define ENABLE_GTEST_MIGRATION
+#include <vespa/searchlib/test/ft_test_app_base.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::features::FirstPhaseRankBlueprint;
+using search::features::FirstPhaseRankLookup;
+using search::features::setup_search_features;
+using search::fef::Blueprint;
+using search::fef::BlueprintFactory;
+using search::fef::ObjectStore;
+using search::fef::test::IndexEnvironment;
+using search::fef::test::DummyDependencyHandler;
+using StringVector = std::vector<vespalib::string>;
+
+constexpr feature_t unranked = std::numeric_limits<feature_t>::max();
+
+struct FirstPhaseRankBlueprintTest : public ::testing::Test {
+ BlueprintFactory factory;
+ IndexEnvironment index_env;
+
+ FirstPhaseRankBlueprintTest()
+ : ::testing::Test(),
+ factory(),
+ index_env()
+ {
+ setup_search_features(factory);
+ }
+
+ ~FirstPhaseRankBlueprintTest() override;
+
+ std::shared_ptr<Blueprint> make_blueprint() const {
+ return factory.createBlueprint("firstPhaseRank");
+ }
+
+ void expect_setup_fail(const StringVector& params, const vespalib::string& exp_fail_msg) {
+ auto blueprint = make_blueprint();
+ DummyDependencyHandler deps(*blueprint);
+ EXPECT_FALSE(blueprint->setup(index_env, params));
+ EXPECT_EQ(exp_fail_msg, deps.fail_msg);
+ }
+
+ std::shared_ptr<Blueprint> expect_setup_succeed(const StringVector& params) {
+ auto blueprint = make_blueprint();
+ DummyDependencyHandler deps(*blueprint);
+ EXPECT_TRUE(blueprint->setup(index_env, params));
+ EXPECT_EQ(0, deps.input.size());
+ EXPECT_EQ(StringVector({"score"}), deps.output);
+ return blueprint;
+ }
+};
+
+FirstPhaseRankBlueprintTest::~FirstPhaseRankBlueprintTest() = default;
+
+TEST_F(FirstPhaseRankBlueprintTest, blueprint_can_be_created_from_factory)
+{
+ auto bp = make_blueprint();
+ EXPECT_TRUE(bp);
+ EXPECT_TRUE(dynamic_pointer_cast<FirstPhaseRankBlueprint>(bp));
+}
+
+TEST_F(FirstPhaseRankBlueprintTest, blueprint_setup_fails_when_parameter_list_is_not_empty)
+{
+ expect_setup_fail({"is"},
+ "The parameter list used for setting up rank feature firstPhaseRank is not valid: "
+ "Expected 0 parameter(s), but got 1");
+}
+
+TEST_F(FirstPhaseRankBlueprintTest, blueprint_setup_succeeds)
+{
+ expect_setup_succeed({});
+}
+
+TEST_F(FirstPhaseRankBlueprintTest, blueprint_can_prepare_shared_state)
+{
+ auto blueprint = expect_setup_succeed({});
+ search::fef::test::QueryEnvironment query_env;
+ ObjectStore store;
+ EXPECT_EQ(nullptr, FirstPhaseRankLookup::get_mutable_shared_state(store));
+ EXPECT_EQ(nullptr, FirstPhaseRankLookup::get_shared_state(store));
+ blueprint->prepareSharedState(query_env, store);
+ EXPECT_NE(nullptr, FirstPhaseRankLookup::get_mutable_shared_state(store));
+ EXPECT_NE(nullptr, FirstPhaseRankLookup::get_shared_state(store));
+}
+
+TEST_F(FirstPhaseRankBlueprintTest, dump_features)
+{
+ FtTestAppBase::FT_DUMP_EMPTY(factory, "firstPhaseRank", index_env);
+}
+
+struct FirstPhaseRankExecutorTest : public ::testing::Test {
+ BlueprintFactory factory;
+ FtFeatureTest test;
+
+ FirstPhaseRankExecutorTest()
+ : ::testing::Test(),
+ factory(),
+ test(factory, "firstPhaseRank")
+ {
+ setup_search_features(factory);
+ }
+ ~FirstPhaseRankExecutorTest() override;
+ void setup(std::vector<std::pair<uint32_t,uint32_t>> ranks) {
+ EXPECT_TRUE(test.setup());
+ auto* lookup = FirstPhaseRankLookup::get_mutable_shared_state(test.getQueryEnv().getObjectStore());
+ ASSERT_NE(nullptr, lookup);
+ for (auto& entry : ranks) {
+ lookup->add(entry.first, entry.second);
+ }
+ }
+ bool execute(feature_t exp_score, uint32_t docid) {
+ return test.execute(exp_score, 0.000001, docid);
+ }
+};
+
+FirstPhaseRankExecutorTest::~FirstPhaseRankExecutorTest() = default;
+
+TEST_F(FirstPhaseRankExecutorTest, unranked_docid_gives_huge_output)
+{
+ setup({});
+ EXPECT_TRUE(execute(unranked, 1));
+}
+
+TEST_F(FirstPhaseRankExecutorTest, ranked_docid_gives_expected_output)
+{
+ setup({{3, 5}, {7, 4}});
+ EXPECT_TRUE(execute(unranked, 2));
+ EXPECT_TRUE(execute(5, 3));
+ EXPECT_TRUE(execute(unranked, 4));
+ EXPECT_TRUE(execute(unranked, 5));
+ EXPECT_TRUE(execute(unranked, 6));
+ EXPECT_TRUE(execute(4, 7));
+ EXPECT_TRUE(execute(unranked, 8));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/features/prod_features_test.cpp b/searchlib/src/tests/features/prod_features_test.cpp
index 22105533895..fb00b4ff5e6 100644
--- a/searchlib/src/tests/features/prod_features_test.cpp
+++ b/searchlib/src/tests/features/prod_features_test.cpp
@@ -33,6 +33,7 @@
#include <vespa/searchlib/features/random_normal_stable_feature.h>
#include <vespa/searchlib/features/randomfeature.h>
#include <vespa/searchlib/features/rankingexpressionfeature.h>
+#include <vespa/searchlib/features/second_phase_feature.h>
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/features/termfeature.h>
#include <vespa/searchlib/features/utils.h>
@@ -614,6 +615,32 @@ TEST_F(ProdFeaturesTest, test_first_phase)
}
}
+TEST_F(ProdFeaturesTest, test_second_phase)
+{
+ { // Test blueprint.
+ SecondPhaseBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "secondPhase"));
+
+ FtIndexEnvironment ie;
+ ie.getProperties().add(indexproperties::rank::SecondPhase::NAME, "random");
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, ie, params, in.add("random"), out.add("score"));
+ FT_SETUP_FAIL(pt, params.add("foo"));
+ params.clear();
+
+ FT_DUMP_EMPTY(_factory, "secondPhase", ie);
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "secondPhase");
+ ft.getIndexEnv().getProperties().add(indexproperties::rank::SecondPhase::NAME, "value(11)");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(11.0f));
+ }
+}
+
TEST_F(ProdFeaturesTest, test_foreach)
{
{ // Test blueprint.
diff --git a/searchlib/src/tests/hitcollector/CMakeLists.txt b/searchlib/src/tests/hitcollector/CMakeLists.txt
index 5cedbcbd7e6..cc62dd82af4 100644
--- a/searchlib/src/tests/hitcollector/CMakeLists.txt
+++ b/searchlib/src/tests/hitcollector/CMakeLists.txt
@@ -4,6 +4,7 @@ vespa_add_executable(searchlib_hitcollector_test_app TEST
hitcollector_test.cpp
DEPENDS
searchlib
+ GTest::gtest
)
vespa_add_test(NAME searchlib_hitcollector_test_app COMMAND searchlib_hitcollector_test_app)
vespa_add_executable(searchlib_sorted_hit_sequence_test_app TEST
@@ -11,5 +12,6 @@ vespa_add_executable(searchlib_sorted_hit_sequence_test_app TEST
sorted_hit_sequence_test.cpp
DEPENDS
searchlib
+ GTest::gtest
)
vespa_add_test(NAME searchlib_sorted_hit_sequence_test_app COMMAND searchlib_sorted_hit_sequence_test_app)
diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
index e6e38181412..60daa571f1d 100644
--- a/searchlib/src/tests/hitcollector/hitcollector_test.cpp
+++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
@@ -1,9 +1,9 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/fef/fef.h>
#include <vespa/searchlib/queryeval/hitcollector.h>
+#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/log/log.h>
LOG_SETUP("hitcollector_test");
@@ -13,6 +13,8 @@ using namespace search::fef;
using namespace search::queryeval;
using ScoreMap = std::map<uint32_t, feature_t>;
+using DocidVector = std::vector<uint32_t>;
+using RankedHitVector = std::vector<RankedHit>;
using Ranges = std::pair<Scores, Scores>;
@@ -67,11 +69,11 @@ void checkResult(const ResultSet & rs, const std::vector<RankedHit> & exp)
if ( ! exp.empty()) {
const RankedHit * rh = rs.getArray();
ASSERT_TRUE(rh != nullptr);
- ASSERT_EQUAL(rs.getArrayUsed(), exp.size());
+ ASSERT_EQ(rs.getArrayUsed(), exp.size());
for (uint32_t i = 0; i < exp.size(); ++i) {
- EXPECT_EQUAL(rh[i].getDocId(), exp[i].getDocId());
- EXPECT_EQUAL(rh[i].getRank() + 1.0, exp[i].getRank() + 1.0);
+ EXPECT_EQ(rh[i].getDocId(), exp[i].getDocId());
+ EXPECT_DOUBLE_EQ(rh[i].getRank() + 64.0, exp[i].getRank() + 64.0);
}
} else {
ASSERT_TRUE(rs.getArray() == nullptr);
@@ -93,21 +95,24 @@ void checkResult(ResultSet & rs, BitVector * exp)
}
}
-void testAddHit(uint32_t numDocs, uint32_t maxHitsSize)
+void testAddHit(uint32_t numDocs, uint32_t maxHitsSize, const vespalib::string& label)
{
+ SCOPED_TRACE(label);
LOG(info, "testAddHit: no hits");
- { // no hits
+ {
+ SCOPED_TRACE("no hits");
HitCollector hc(numDocs, maxHitsSize);
std::vector<RankedHit> expRh;
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
LOG(info, "testAddHit: only ranked hits");
- { // only ranked hits
+ {
+ SCOPED_TRACE("only ranked hits");
HitCollector hc(numDocs, maxHitsSize);
std::vector<RankedHit> expRh;
@@ -121,12 +126,13 @@ void testAddHit(uint32_t numDocs, uint32_t maxHitsSize)
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
LOG(info, "testAddHit: both ranked hits and bit vector hits");
- { // both ranked hits and bit vector hits
+ {
+ SCOPED_TRACE("both ranked hits and bitvector hits");
HitCollector hc(numDocs, maxHitsSize);
std::vector<RankedHit> expRh;
BitVector::UP expBv(BitVector::create(numDocs));
@@ -144,14 +150,15 @@ void testAddHit(uint32_t numDocs, uint32_t maxHitsSize)
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, expBv.get()));
+ checkResult(*rs, expRh);
+ checkResult(*rs, expBv.get());
}
}
-TEST("testAddHit") {
- TEST_DO(testAddHit(30, 10));
- TEST_DO(testAddHit(400, 10)); // 400/32 = 12 which is bigger than 10.
+TEST(HitCollectorTest, testAddHit)
+{
+ testAddHit(30, 10, "numDocs==30");
+ testAddHit(400, 10, "numDocs==400"); // 400/32 = 12 which is bigger than 10.
}
struct Fixture {
@@ -197,14 +204,17 @@ struct DescendingScoreFixture : Fixture {
DescendingScoreFixture::~DescendingScoreFixture() = default;
-TEST_F("testReRank - empty", Fixture) {
- EXPECT_EQUAL(0u, f.reRank());
+TEST(HitCollectorTest, rerank_empty)
+{
+ Fixture f;
+ EXPECT_EQ(0u, f.reRank());
}
-TEST_F("testReRank - ascending", AscendingScoreFixture)
+TEST(HitCollectorTest, rerank_ascending)
{
+ AscendingScoreFixture f;
f.addHits();
- EXPECT_EQUAL(5u, f.reRank());
+ EXPECT_EQ(5u, f.reRank());
std::vector<RankedHit> expRh;
for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best
@@ -213,17 +223,18 @@ TEST_F("testReRank - ascending", AscendingScoreFixture)
expRh.back()._rankValue = i + 200; // after reranking
}
}
- EXPECT_EQUAL(expRh.size(), 10u);
+ EXPECT_EQ(expRh.size(), 10u);
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, f.expBv.get()));
+ checkResult(*rs, expRh);
+ checkResult(*rs, f.expBv.get());
}
-TEST_F("testReRank - descending", DescendingScoreFixture)
+TEST(HitCollectorTest, rerank_descending)
{
+ DescendingScoreFixture f;
f.addHits();
- EXPECT_EQUAL(5u, f.reRank());
+ EXPECT_EQ(5u, f.reRank());
std::vector<RankedHit> expRh;
for (uint32_t i = 0; i < 10; ++i) { // 10 first are the best
@@ -232,17 +243,18 @@ TEST_F("testReRank - descending", DescendingScoreFixture)
expRh.back()._rankValue = i + 200; // after reranking
}
}
- EXPECT_EQUAL(expRh.size(), 10u);
+ EXPECT_EQ(expRh.size(), 10u);
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, f.expBv.get()));
+ checkResult(*rs, expRh);
+ checkResult(*rs, f.expBv.get());
}
-TEST_F("testReRank - partial", AscendingScoreFixture)
+TEST(HitCollectorTest, rerank_partial)
{
+ AscendingScoreFixture f;
f.addHits();
- EXPECT_EQUAL(3u, f.reRank(3));
+ EXPECT_EQ(3u, f.reRank(3));
std::vector<RankedHit> expRh;
for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best
@@ -251,36 +263,39 @@ TEST_F("testReRank - partial", AscendingScoreFixture)
expRh.back()._rankValue = i + 200; // after reranking
}
}
- EXPECT_EQUAL(expRh.size(), 10u);
+ EXPECT_EQ(expRh.size(), 10u);
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, f.expBv.get()));
+ checkResult(*rs, expRh);
+ checkResult(*rs, f.expBv.get());
}
-TEST_F("require that hits for 2nd phase candidates can be retrieved", DescendingScoreFixture)
+TEST(HitCollectorTest, require_that_hits_for_2nd_phase_candidates_can_be_retrieved)
{
+ DescendingScoreFixture f;
f.addHits();
std::vector<HitCollector::Hit> scores = extract(f.hc.getSortedHitSequence(5));
- ASSERT_EQUAL(5u, scores.size());
- EXPECT_EQUAL(100, scores[0].second);
- EXPECT_EQUAL(99, scores[1].second);
- EXPECT_EQUAL(98, scores[2].second);
- EXPECT_EQUAL(97, scores[3].second);
- EXPECT_EQUAL(96, scores[4].second);
+ ASSERT_EQ(5u, scores.size());
+ EXPECT_EQ(100, scores[0].second);
+ EXPECT_EQ(99, scores[1].second);
+ EXPECT_EQ(98, scores[2].second);
+ EXPECT_EQ(97, scores[3].second);
+ EXPECT_EQ(96, scores[4].second);
}
-TEST("require that score ranges can be read and set.") {
+TEST(HitCollectorTest, require_that_score_ranges_can_be_read_and_set)
+{
std::pair<Scores, Scores> ranges = std::make_pair(Scores(1.0, 2.0), Scores(3.0, 4.0));
HitCollector hc(20, 10);
hc.setRanges(ranges);
- EXPECT_EQUAL(ranges.first.low, hc.getRanges().first.low);
- EXPECT_EQUAL(ranges.first.high, hc.getRanges().first.high);
- EXPECT_EQUAL(ranges.second.low, hc.getRanges().second.low);
- EXPECT_EQUAL(ranges.second.high, hc.getRanges().second.high);
+ EXPECT_EQ(ranges.first.low, hc.getRanges().first.low);
+ EXPECT_EQ(ranges.first.high, hc.getRanges().first.high);
+ EXPECT_EQ(ranges.second.low, hc.getRanges().second.low);
+ EXPECT_EQ(ranges.second.high, hc.getRanges().second.high);
}
-TEST("testNoHitsToReRank") {
+TEST(HitCollectorTest, no_hits_to_rerank)
+{
uint32_t numDocs = 20;
uint32_t maxHitsSize = 10;
@@ -299,8 +314,8 @@ TEST("testNoHitsToReRank") {
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
}
@@ -317,14 +332,15 @@ void testScaling(const std::vector<feature_t> &initScores,
PredefinedScorer scorer(std::move(finalScores));
// perform second phase ranking
- EXPECT_EQUAL(2u, do_reRank(scorer, hc, 2));
+ EXPECT_EQ(2u, do_reRank(scorer, hc, 2));
// check results
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expected));
+ checkResult(*rs, expected);
}
-TEST("testScaling") {
+TEST(HitCollectorTest, scaling)
+{
std::vector<feature_t> initScores(5);
initScores[0] = 1000;
initScores[1] = 2000;
@@ -338,7 +354,8 @@ TEST("testScaling") {
exp[i]._docId = i;
}
- { // scale down and adjust down
+ {
+ SCOPED_TRACE("scale down and adjust down");
exp[0]._rankValue = 0; // scaled
exp[1]._rankValue = 100; // scaled
exp[2]._rankValue = 200; // scaled
@@ -350,9 +367,10 @@ TEST("testScaling") {
finalScores[3] = 300;
finalScores[4] = 400;
- TEST_DO(testScaling(initScores, std::move(finalScores), exp));
+ testScaling(initScores, std::move(finalScores), exp);
}
- { // scale down and adjust up
+ {
+ SCOPED_TRACE("scale down and adjust up");
exp[0]._rankValue = 200; // scaled
exp[1]._rankValue = 300; // scaled
exp[2]._rankValue = 400; // scaled
@@ -364,10 +382,10 @@ TEST("testScaling") {
finalScores[3] = 500;
finalScores[4] = 600;
- TEST_DO(testScaling(initScores, std::move(finalScores), exp));
+ testScaling(initScores, std::move(finalScores), exp);
}
- { // scale up and adjust down
-
+ {
+ SCOPED_TRACE("scale up and adjust down");
exp[0]._rankValue = -500; // scaled (-500)
exp[1]._rankValue = 750; // scaled
exp[2]._rankValue = 2000; // scaled
@@ -379,9 +397,10 @@ TEST("testScaling") {
finalScores[3] = 3250;
finalScores[4] = 4500;
- TEST_DO(testScaling(initScores, std::move(finalScores), exp));
+ testScaling(initScores, std::move(finalScores), exp);
}
- { // minimal scale (second phase range = 0 (4 - 4) -> 1)
+ {
+ SCOPED_TRACE("minimal scale (second phase range = 0 (4 - 4) -> 1)");
exp[0]._rankValue = 1; // scaled
exp[1]._rankValue = 2; // scaled
exp[2]._rankValue = 3; // scaled
@@ -393,9 +412,10 @@ TEST("testScaling") {
finalScores[3] = 4;
finalScores[4] = 4;
- TEST_DO(testScaling(initScores, std::move(finalScores), exp));
+ testScaling(initScores, std::move(finalScores), exp);
}
- { // minimal scale (first phase range = 0 (4000 - 4000) -> 1)
+ {
+ SCOPED_TRACE("minimal scale (first phase range = 0 (4000 - 4000) -> 1)");
std::vector<feature_t> is(initScores);
is[4] = 4000;
exp[0]._rankValue = -299600; // scaled
@@ -409,11 +429,12 @@ TEST("testScaling") {
finalScores[3] = 400;
finalScores[4] = 500;
- TEST_DO(testScaling(is, std::move(finalScores), exp));
+ testScaling(is, std::move(finalScores), exp);
}
}
-TEST("testOnlyBitVector") {
+TEST(HitCollectorTest, only_bitvector)
+{
uint32_t numDocs = 20;
LOG(info, "testOnlyBitVector: test it");
{
@@ -428,8 +449,8 @@ TEST("testOnlyBitVector") {
std::unique_ptr<ResultSet> rs = hc.getResultSet();
std::vector<RankedHit> expRh;
- TEST_DO(checkResult(*rs, expRh)); // no ranked hits
- TEST_DO(checkResult(*rs, expBv.get())); // only bit vector
+ checkResult(*rs, expRh); // no ranked hits
+ checkResult(*rs, expBv.get()); // only bit vector
}
}
@@ -443,9 +464,9 @@ struct MergeResultSetFixture {
{}
};
-TEST_F("require that result set is merged correctly with first phase ranking",
- MergeResultSetFixture)
+TEST(HitCollectorTest, require_that_result_set_is_merged_correctly_with_first_phase_ranking)
{
+ MergeResultSetFixture f;
std::vector<RankedHit> expRh;
for (uint32_t i = 0; i < f.numDocs; ++i) {
f.hc.addHit(i, i + 1000);
@@ -457,7 +478,7 @@ TEST_F("require that result set is merged correctly with first phase ranking",
expRh.back()._rankValue = (i < f.numDocs - f.maxHitsSize) ? default_rank_value : i + 1000;
}
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
+ checkResult(*rs, expRh);
}
void
@@ -474,9 +495,9 @@ addExpectedHitForMergeTest(const MergeResultSetFixture &f, std::vector<RankedHit
}
}
-TEST_F("require that result set is merged correctly with second phase ranking (document scorer)",
- MergeResultSetFixture)
+TEST(HitCollectorTest, require_that_result_set_is_merged_correctly_with_second_phase_ranking_using_document_scorer)
{
+ MergeResultSetFixture f;
// with second phase ranking that triggers rescoring / scaling
BasicScorer scorer(500); // second phase ranking setting score to docId + 500
std::vector<RankedHit> expRh;
@@ -484,12 +505,13 @@ TEST_F("require that result set is merged correctly with second phase ranking (d
f.hc.addHit(i, i + 1000);
addExpectedHitForMergeTest(f, expRh, i);
}
- EXPECT_EQUAL(f.maxHeapSize, do_reRank(scorer, f.hc, f.maxHeapSize));
+ EXPECT_EQ(f.maxHeapSize, do_reRank(scorer, f.hc, f.maxHeapSize));
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
+ checkResult(*rs, expRh);
}
-TEST("require that hits can be added out of order") {
+TEST(HitCollectorTest, require_that_hits_can_be_added_out_of_order)
+{
HitCollector hc(1000, 100);
std::vector<RankedHit> expRh;
// produce expected result in normal order
@@ -503,11 +525,12 @@ TEST("require that hits can be added out of order") {
hc.addHit(i, i + 100);
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
-TEST("require that hits can be added out of order when passing array limit") {
+TEST(HitCollectorTest, require_that_hits_can_be_added_out_of_order_when_passing_array_limit)
+{
HitCollector hc(10000, 100);
std::vector<RankedHit> expRh;
// produce expected result in normal order
@@ -525,11 +548,12 @@ TEST("require that hits can be added out of order when passing array limit") {
hc.addHit(i, i + 100);
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
-TEST("require that hits can be added out of order only after passing array limit") {
+TEST(HitCollectorTest, require_that_hits_can_be_added_out_of_order_only_after_passing_array_limit)
+{
HitCollector hc(10000, 100);
std::vector<RankedHit> expRh;
// produce expected result in normal order
@@ -548,8 +572,87 @@ TEST("require that hits can be added out of order only after passing array limit
hc.addHit(i, i + 100);
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
+}
+
+struct RankDropFixture {
+ uint32_t _docid_limit;
+ HitCollector _hc;
+ std::vector<uint32_t> _dropped;
+ RankDropFixture(uint32_t docid_limit, uint32_t max_hits_size)
+ : _docid_limit(docid_limit),
+ _hc(docid_limit, max_hits_size)
+ {
+ }
+ void add(std::vector<RankedHit> hits) {
+ for (const auto& hit : hits) {
+ _hc.addHit(hit.getDocId(), hit.getRank());
+ }
+ }
+ void rerank(ScoreMap score_map, size_t count) {
+ PredefinedScorer scorer(score_map);
+ EXPECT_EQ(count, do_reRank(scorer, _hc, count));
+ }
+ std::unique_ptr<BitVector> make_bv(DocidVector docids) {
+ auto bv = BitVector::create(_docid_limit);
+ for (auto& docid : docids) {
+ bv->setBit(docid);
+ }
+ return bv;
+ }
+
+ void setup() {
+ // Initial 7 hits from first phase
+ add({{5, 1100},{10, 1200},{11, 1300},{12, 1400},{14, 500},{15, 900},{16,1000}});
+ // Rerank two best hits, calculate old and new ranges for reranked
+ // hits that will cause hits not reranked to later be rescored by
+ // dividing by 100.
+ rerank({{11,14},{12,13}}, 2);
+ }
+ void check_result(std::optional<double> rank_drop_limit, RankedHitVector exp_array,
+ std::unique_ptr<BitVector> exp_bv, DocidVector exp_dropped) {
+ auto rs = _hc.get_result_set(rank_drop_limit, &_dropped);
+ checkResult(*rs, exp_array);
+ checkResult(*rs, exp_bv.get());
+ EXPECT_EQ(exp_dropped, _dropped);
+ }
+};
+
+TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced)
+{
+ // Track rank score for all 7 hits from first phase
+ RankDropFixture f(10000, 10);
+ f.setup();
+ f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13},{16,10}},
+ {}, {14, 15});
+}
+
+TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced_when_docid_vector_is_used)
+{
+ // Track rank score for 4 best hits from first phase, overflow to docid vector
+ RankDropFixture f(10000, 4);
+ f.setup();
+ f.check_result(13.0, {{11,14}},
+ {}, {5,10,12,14,15,16});
+}
+
+TEST(HitCollectorTest, require_that_bitvector_is_not_dropped_without_second_phase_rank_drop_limit)
+{
+ // Track rank score for 4 best hits from first phase, overflow to bitvector
+ RankDropFixture f(20, 4);
+ f.setup();
+ f.check_result(std::nullopt, {{5,11},{10,12},{11,14},{12,13}},
+ f.make_bv({5,10,11,12,14,15,16}), {});
+}
+
+TEST(HitCollectorTest, require_that_bitvector_is_dropped_with_second_phase_rank_drop_limit)
+{
+ // Track rank for 4 best hits from first phase, overflow to bitvector
+ RankDropFixture f(20, 4);
+ f.setup();
+ f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13}},
+ {}, {14,15,16});
}
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/hitcollector/sorted_hit_sequence_test.cpp b/searchlib/src/tests/hitcollector/sorted_hit_sequence_test.cpp
index c1c3a550d9b..4eefa5b5dfa 100644
--- a/searchlib/src/tests/hitcollector/sorted_hit_sequence_test.cpp
+++ b/searchlib/src/tests/hitcollector/sorted_hit_sequence_test.cpp
@@ -1,7 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/searchlib/queryeval/sorted_hit_sequence.h>
+#include <vespa/vespalib/gtest/gtest.h>
using search::queryeval::SortedHitSequence;
using Hits = std::vector<SortedHitSequence::Hit>;
@@ -10,20 +10,22 @@ using Refs = std::vector<SortedHitSequence::Ref>;
Hits hits({{1,10.0},{2,30.0},{3,20.0}});
Refs refs({1,2,0});
-TEST("require that empty hit sequence is empty") {
+TEST(SortedHitsSEquenceTest, require_that_empty_hit_sequence_is_empty)
+{
EXPECT_TRUE(!SortedHitSequence(nullptr, nullptr, 0).valid());
EXPECT_TRUE(!SortedHitSequence(&hits[0], &refs[0], 0).valid());
}
-TEST("require that sorted hit sequence can be iterated") {
+TEST(SortedHitsSEquenceTest, require_that_sorted_hit_sequence_can_be_iterated)
+{
SortedHitSequence seq(&hits[0], &refs[0], refs.size());
for (const auto &expect: Hits({{2,30.0},{3,20.0},{1,10.0}})) {
ASSERT_TRUE(seq.valid());
- EXPECT_EQUAL(expect.first, seq.get().first);
- EXPECT_EQUAL(expect.second, seq.get().second);
+ EXPECT_EQ(expect.first, seq.get().first);
+ EXPECT_EQ(expect.second, seq.get().second);
seq.next();
}
EXPECT_TRUE(!seq.valid());
}
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
index 485410e0eba..b0234010f77 100644
--- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
@@ -13,7 +13,7 @@
LOG_SETUP("blueprint_test");
using namespace search::queryeval;
-using namespace search::fef;
+using MatchData = search::fef::MatchData;
namespace {
@@ -44,9 +44,7 @@ public:
}
SearchIterator::UP
- createIntermediateSearch(MultiSearch::Children subSearches,
- MatchData &md) const override
- {
+ createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override {
return std::make_unique<MySearch>("or", std::move(subSearches), &md, strict());
}
SearchIteratorUP createFilterSearch(FilterConstraint constraint) const override {
@@ -63,9 +61,7 @@ class OtherOr : public OrBlueprint
private:
public:
SearchIterator::UP
- createIntermediateSearch(MultiSearch::Children subSearches,
- MatchData &md) const override
- {
+ createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override {
return std::make_unique<MySearch>("or", std::move(subSearches), &md, strict());
}
@@ -89,9 +85,7 @@ public:
}
SearchIterator::UP
- createIntermediateSearch(MultiSearch::Children subSearches,
- MatchData &md) const override
- {
+ createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override {
return std::make_unique<MySearch>("and", std::move(subSearches), &md, strict());
}
@@ -106,9 +100,7 @@ class OtherAnd : public AndBlueprint
private:
public:
SearchIterator::UP
- createIntermediateSearch(MultiSearch::Children subSearches,
- MatchData &md) const override
- {
+ createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override {
return std::make_unique<MySearch>("and", std::move(subSearches), &md, strict());
}
@@ -121,9 +113,7 @@ class OtherAndNot : public AndNotBlueprint
{
public:
SearchIterator::UP
- createIntermediateSearch(MultiSearch::Children subSearches,
- MatchData &md) const override
- {
+ createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override {
return std::make_unique<MySearch>("andnot", std::move(subSearches), &md, strict());
}
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index bddc9f92111..490f221d1d8 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -27,8 +27,9 @@
LOG_SETUP("blueprint_test");
using namespace search::queryeval;
-using namespace search::fef;
using namespace search::query;
+using search::fef::MatchData;
+using search::queryeval::Blueprint;
using search::BitVector;
using BlueprintVector = std::vector<std::unique_ptr<Blueprint>>;
using vespalib::Slime;
@@ -575,7 +576,9 @@ void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) {
bp1.asSlime(SlimeInserter(a));
bp2.asSlime(SlimeInserter(b));
if (expect_eq) {
- EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook));
+ if(!EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook))) {
+ fprintf(stderr, "a: %s\n\nb: %s\n\n", bp1.asString().c_str(), bp2.asString().c_str());
+ }
} else {
EXPECT_FALSE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook));
}
@@ -613,7 +616,6 @@ TEST_F("test SourceBlender below AND partial optimization", SourceBlenderTestFix
auto expect = std::make_unique<AndBlueprint>();
addLeafs(*expect, {1,2,3});
- expect->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_2), {{10, 1}, {20, 2}}));
auto blender = std::make_unique<SourceBlenderBlueprint>(f.selector_1);
blender->addChild(addLeafsWithSourceId(3, std::make_unique<AndBlueprint>(), {{30, 3}, {300, 3}}));
@@ -621,6 +623,8 @@ TEST_F("test SourceBlender below AND partial optimization", SourceBlenderTestFix
blender->addChild(addLeafsWithSourceId(1, std::make_unique<AndBlueprint>(), {{10, 1}, {100, 1}, {1000, 1}}));
expect->addChild(std::move(blender));
+ expect->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_2), {{10, 1}, {20, 2}}));
+
optimize_and_compare(std::move(top), std::move(expect));
}
@@ -1401,7 +1405,7 @@ TEST("cost for ANDNOT") {
TEST("cost for SB") {
InvalidSelector sel;
- verify_cost(make::SB(sel), 1.3, 1.3); // max
+ verify_cost(make::SB(sel), 1.3+1.0, 1.3+(1.0-0.8*0.7*0.5)); // max, non_strict+1.0, strict+est
}
TEST("cost for NEAR") {
diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
index 16e78f77eec..9fdf1417a92 100644
--- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
+++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
@@ -356,7 +356,7 @@ DotProductAdapter::~DotProductAdapter() = default;
struct ParallelWeakAndAdapter {
FieldSpec field;
ParallelWeakAndBlueprint blueprint;
- ParallelWeakAndAdapter() : field("foo", 3, 7), blueprint(field, 100, 0.0, 1.0) {}
+ ParallelWeakAndAdapter() : field("foo", 3, 7), blueprint(field, 100, 0.0, 1.0, true) {}
void addChild(std::unique_ptr<Blueprint> child) {
auto child_field = blueprint.getNextChildField(field);
auto term = std::make_unique<LeafProxy>(child_field, std::move(child));
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 57fddb0a819..d6008136d73 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchlib/queryeval/flow.h>
+#include <vespa/searchlib/queryeval/flow_tuning.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vector>
#include <random>
@@ -349,6 +350,35 @@ TEST(FlowTest, blender_flow_cost_accumulation_is_max) {
}
}
+double my_non_strict_cost(double est, double adjust) {
+ return (1.0/adjust) * flow::forced_strict_cost(FlowStats(est, 0.0, est), adjust);
+}
+
+TEST(FlowTest, non_strict_btree_cost) {
+ for (double est: {0.001, 0.01, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0}) {
+ auto prev = FlowStats(est, 1.0, est);
+ auto base = FlowStats(est, flow::non_strict_cost_of_strict_iterator(est, est), est);
+ auto opt05 = FlowStats(est, my_non_strict_cost(est, 0.5), est);
+ auto opt02 = FlowStats(est, my_non_strict_cost(est, 0.2), est);
+ auto opt01 = FlowStats(est, my_non_strict_cost(est, 0.1), est);
+ auto opt005 = FlowStats(est, my_non_strict_cost(est, 0.05), est);
+ auto opt003 = FlowStats(est, my_non_strict_cost(est, 0.03), est);
+ EXPECT_NEAR(strict_crossover(opt05), 0.5, 1e-6);
+ EXPECT_NEAR(strict_crossover(opt02), 0.2, 1e-6);
+ EXPECT_NEAR(strict_crossover(opt01), 0.1, 1e-6);
+ EXPECT_NEAR(strict_crossover(opt005), 0.05, 1e-6);
+ EXPECT_NEAR(strict_crossover(opt003), 0.03, 1e-6);
+ fprintf(stderr, "est: %5.3f\n", est);
+ fprintf(stderr, " prev crossover: %6.4f (cost: %6.4f)\n", strict_crossover(prev), prev.cost);
+ fprintf(stderr, " base crossover: %6.4f (cost: %6.4f)\n", strict_crossover(base), base.cost);
+ fprintf(stderr, " 0.5 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt05), opt05.cost);
+ fprintf(stderr, " 0.2 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt02), opt02.cost);
+ fprintf(stderr, " 0.1 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt01), opt01.cost);
+ fprintf(stderr, " 0.05 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt005), opt005.cost);
+ fprintf(stderr, " 0.03 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt003), opt003.cost);
+ }
+}
+
TEST(FlowTest, optimal_and_flow) {
for (size_t i = 0; i < loop_cnt; ++i) {
for (bool strict: {false, true}) {
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp
index 8591ec1415d..51177850155 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp
@@ -2,14 +2,14 @@
#include "intermediate_blueprint_factory.h"
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
#include <iomanip>
#include <sstream>
namespace search::queryeval::test {
-template <typename BlueprintType>
char
-IntermediateBlueprintFactory<BlueprintType>::child_name(void* blueprint) const
+IntermediateBlueprintFactory::child_name(void* blueprint) const
{
auto itr = _child_names.find(blueprint);
if (itr != _child_names.end()) {
@@ -18,35 +18,33 @@ IntermediateBlueprintFactory<BlueprintType>::child_name(void* blueprint) const
return '?';
}
-template <typename BlueprintType>
-IntermediateBlueprintFactory<BlueprintType>::IntermediateBlueprintFactory(vespalib::stringref name)
+IntermediateBlueprintFactory::IntermediateBlueprintFactory(vespalib::stringref name)
: _name(name),
_children(),
_child_names()
{
}
-template <typename BlueprintType>
-IntermediateBlueprintFactory<BlueprintType>::~IntermediateBlueprintFactory() = default;
+IntermediateBlueprintFactory::~IntermediateBlueprintFactory() = default;
-template <typename BlueprintType>
std::unique_ptr<Blueprint>
-IntermediateBlueprintFactory<BlueprintType>::make_blueprint()
+IntermediateBlueprintFactory::make_blueprint()
{
- auto res = std::make_unique<BlueprintType>();
+ auto res = make_self();
_child_names.clear();
char name = 'A';
+ uint32_t source = 1;
for (const auto& factory : _children) {
auto child = factory->make_blueprint();
_child_names[child.get()] = name++;
+ child->setSourceId(source++); // ignored by non-source-blender blueprints
res->addChild(std::move(child));
}
return res;
}
-template <typename BlueprintType>
vespalib::string
-IntermediateBlueprintFactory<BlueprintType>::get_name(Blueprint& blueprint) const
+IntermediateBlueprintFactory::get_name(Blueprint& blueprint) const
{
auto* intermediate = blueprint.asIntermediate();
if (intermediate != nullptr) {
@@ -69,11 +67,29 @@ IntermediateBlueprintFactory<BlueprintType>::get_name(Blueprint& blueprint) cons
return get_class_name(blueprint);
}
-template class IntermediateBlueprintFactory<AndBlueprint>;
+//-----------------------------------------------------------------------------
AndBlueprintFactory::AndBlueprintFactory()
- : IntermediateBlueprintFactory<AndBlueprint>("AND")
+ : IntermediateBlueprintFactory("AND")
{}
+std::unique_ptr<IntermediateBlueprint>
+AndBlueprintFactory::make_self() const
+{
+ return std::make_unique<AndBlueprint>();
+}
+
+//-----------------------------------------------------------------------------
+
+SourceBlenderBlueprintFactory::SourceBlenderBlueprintFactory()
+ : IntermediateBlueprintFactory("SB"),
+ _selector(250, "my_source_blender", 1000)
+{}
+
+std::unique_ptr<IntermediateBlueprint>
+SourceBlenderBlueprintFactory::make_self() const
+{
+ return std::make_unique<SourceBlenderBlueprint>(_selector);
}
+}
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h
index 6f7fe4f9ee7..c791d866612 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h
@@ -4,6 +4,7 @@
#include "benchmark_blueprint_factory.h"
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/attribute/fixedsourceselector.h>
#include <unordered_map>
namespace search::queryeval::test {
@@ -11,7 +12,6 @@ namespace search::queryeval::test {
/**
* Factory that creates an IntermediateBlueprint (of the given type) with children created by the given factories.
*/
-template <typename BlueprintType>
class IntermediateBlueprintFactory : public BenchmarkBlueprintFactory {
private:
vespalib::string _name;
@@ -19,7 +19,8 @@ private:
std::unordered_map<void*, char> _child_names;
char child_name(void* blueprint) const;
-
+protected:
+ virtual std::unique_ptr<IntermediateBlueprint> make_self() const = 0;
public:
IntermediateBlueprintFactory(vespalib::stringref name);
~IntermediateBlueprintFactory();
@@ -30,10 +31,26 @@ public:
vespalib::string get_name(Blueprint& blueprint) const override;
};
-class AndBlueprintFactory : public IntermediateBlueprintFactory<AndBlueprint> {
+class AndBlueprintFactory : public IntermediateBlueprintFactory {
+protected:
+ std::unique_ptr<IntermediateBlueprint> make_self() const override;
public:
AndBlueprintFactory();
};
-}
+class SourceBlenderBlueprintFactory : public IntermediateBlueprintFactory
+{
+private:
+ FixedSourceSelector _selector;
+protected:
+ std::unique_ptr<IntermediateBlueprint> make_self() const override;
+public:
+ SourceBlenderBlueprintFactory();
+ void init_selector(auto f, uint32_t limit) {
+ for (uint32_t i = 0; i < limit; ++i) {
+ _selector.setSource(i, f(i));
+ }
+ }
+};
+}
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index f4a1ade8a66..e74fefac70e 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -13,19 +13,31 @@
#include <vector>
using namespace search::attribute;
-using namespace search::fef;
using namespace search::queryeval::test;
using namespace search::queryeval;
using namespace search;
using namespace vespalib;
using search::index::Schema;
+using search::fef::MatchData;
using vespalib::make_string_short::fmt;
const vespalib::string field_name = "myfield";
double budget_sec = 1.0;
+double estimate_actual_cost(Blueprint &bp, InFlow in_flow) {
+ if (in_flow.strict()) {
+ assert(bp.strict());
+ return bp.strict_cost();
+ } else if (bp.strict()) {
+ auto stats = FlowStats::from(flow::DefaultAdapter(), &bp);
+ return flow::forced_strict_cost(stats, in_flow.rate());
+ } else {
+ return bp.cost() * in_flow.rate();
+ }
+}
+
enum class PlanningAlgo {
Order,
Estimate,
@@ -236,7 +248,8 @@ strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, Planning
timer.after();
}
FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
- return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)};
+ double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(true));
+ return {timer.min_time() * 1000.0, hits + 1, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)};
}
template <bool do_unpack>
@@ -269,7 +282,7 @@ non_strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, doub
timer.after();
}
FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost());
- double actual_cost = flow.cost * filter_hit_ratio;
+ double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(filter_hit_ratio));
return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)};
}
@@ -291,10 +304,6 @@ benchmark_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, bool
}
}
-
-
-
-
//-----------------------------------------------------------------------------
double est_forced_strict_cost(double estimate, double strict_cost, double rate) {
@@ -317,26 +326,26 @@ struct Sample {
}
};
-double find_crossover(const char *type, const auto &calculate_at, double delta) {
+double find_crossover(const char *type, const char *a, const char *b, const auto &calculate_at, double delta) {
double min = delta;
double max = 1.0;
fprintf(stderr, "looking for %s crossover in the range [%g, %g]...\n", type, min, max);
auto at_min = calculate_at(min);
auto at_max = calculate_at(max);
- fprintf(stderr, " before: [%s, %s], after: [%s, %s]\n",
- at_min.first.str().c_str(), at_max.first.str().c_str(),
- at_min.second.str().c_str(), at_max.second.str().c_str());
- auto best_before = [](auto values) { return (values.first < values.second); };
- if (best_before(at_min) == best_before(at_max)) {
+ fprintf(stderr, " %s: [%s, %s], %s: [%s, %s]\n",
+ a, at_min.first.str().c_str(), at_max.first.str().c_str(),
+ b, at_min.second.str().c_str(), at_max.second.str().c_str());
+ auto a_best = [](auto values) { return (values.first < values.second); };
+ if (a_best(at_min) == a_best(at_max)) {
fprintf(stderr, " NO %s CROSSOVER FOUND\n", type);
return 0.0;
}
while (max > (min + delta)) {
double x = (min + max) / 2.0;
auto at_x = calculate_at(x);
- fprintf(stderr, " best@%g: %s (%s vs %s)\n", x, best_before(at_x) ? "before" : "after",
+ fprintf(stderr, " best@%g: %s (%s vs %s)\n", x, a_best(at_x) ? a : b,
at_x.first.str().c_str(), at_x.second.str().c_str());
- if (best_before(at_min) == best_before(at_x)) {
+ if (a_best(at_min) == a_best(at_x)) {
min = x;
at_min = at_x;
} else {
@@ -409,11 +418,11 @@ void analyze_crossover(BenchmarkBlueprintFactory &fixed, std::function<std::uniq
std::vector<double> results;
std::vector<const char *> names;
names.push_back("time crossover");
- results.push_back(find_crossover("TIME", combine(estimate_AND_time_ms), delta));
+ results.push_back(find_crossover("TIME", "before", "after", combine(estimate_AND_time_ms), delta));
names.push_back("cost crossover");
- results.push_back(find_crossover("COST", combine(calculate_AND_cost), delta));
+ results.push_back(find_crossover("COST", "before", "after", combine(calculate_AND_cost), delta));
names.push_back("abs_est crossover");
- results.push_back(find_crossover("ABS_EST", combine(first_abs_est), delta));
+ results.push_back(find_crossover("ABS_EST", "before", "after", combine(first_abs_est), delta));
sample_at("COST", combine(calculate_AND_cost), results, names);
sample_at("TIME", combine(estimate_AND_time_ms), results, names);
}
@@ -429,21 +438,37 @@ to_string(bool val)
void
print_result_header()
{
- std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl;
+ std::cout << "| in_flow | chn | o_ratio | a_ratio | f.est | f.cost | f.act_cost | f.scost | f.act_scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl;
+}
+
+std::ostream &operator<<(std::ostream &dst, InFlow in_flow) {
+ auto old_w = dst.width();
+ auto old_p = dst.precision();
+ dst << std::setw(7) << std::setprecision(5);
+ if (in_flow.strict()) {
+ dst << " STRICT";
+ } else {
+ dst << in_flow.rate();
+ }
+ dst << std::setw(old_w);
+ dst << std::setprecision(old_p);
+ return dst;
}
void
-print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, double filter_hit_ratio, uint32_t num_docs)
+print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, InFlow in_flow, uint32_t num_docs)
{
std::cout << std::fixed << std::setprecision(5)
- << "| " << std::setw(5) << children
- << " | " << std::setw(7) << filter_hit_ratio
+ << "| " << in_flow
+ << " | " << std::setw(5) << children
<< " | " << std::setw(7) << op_hit_ratio
<< " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
<< " | " << std::setw(6) << res.flow.estimate
<< std::setprecision(4)
<< " | " << std::setw(9) << res.flow.cost
+ << " | " << std::setw(10) << (res.flow.cost * in_flow.rate())
<< " | " << std::setw(7) << res.flow.strict_cost
+ << " | " << std::setw(11) << (in_flow.strict() ? res.flow.strict_cost : flow::forced_strict_cost(res.flow, in_flow.rate()))
<< " | " << std::setw(8) << res.hits
<< " | " << std::setw(8) << res.seeks
<< std::setprecision(3)
@@ -640,7 +665,7 @@ run_benchmark_case(const BenchmarkCaseSetup& setup)
if (filter_hit_ratio * setup.filter_crossover_factor <= op_hit_ratio) {
auto res = benchmark_search(*factory, setup.num_docs + 1,
setup.bcase.strict_context, setup.bcase.force_strict, setup.bcase.unpack_iterator, filter_hit_ratio, PlanningAlgo::Cost);
- print_result(res, children, op_hit_ratio, filter_hit_ratio, setup.num_docs);
+ print_result(res, children, op_hit_ratio, InFlow(setup.bcase.strict_context, filter_hit_ratio), setup.num_docs);
result.add(res);
}
}
@@ -681,23 +706,25 @@ run_benchmarks(const BenchmarkSetup& setup)
void
print_intermediate_blueprint_result_header(size_t children)
{
+ std::cout << "| in_flow";
// This matches the naming scheme in IntermediateBlueprintFactory.
char name = 'A';
for (size_t i = 0; i < children; ++i) {
- std::cout << "| " << name++ << ".ratio ";
+ std::cout << " | " << name++ << ".ratio";
}
- std::cout << "| flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl;
+ std::cout << " | flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl;
}
void
-print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, uint32_t num_docs)
+print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, InFlow in_flow, uint32_t num_docs)
{
- std::cout << std::fixed << std::setprecision(5);
+ std::cout << std::fixed << std::setprecision(5)
+ << "| " << in_flow;
for (auto ratio : children_ratios) {
- std::cout << "| " << std::setw(7) << ratio << " ";
+ std::cout << " | " << std::setw(7) << ratio;
}
std::cout << std::setprecision(5)
- << "| " << std::setw(10) << res.flow.cost
+ << " | " << std::setw(10) << res.flow.cost
<< " | " << std::setw(10) << res.flow.strict_cost
<< " | " << std::setw(8) << res.flow.estimate
<< " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
@@ -745,9 +772,8 @@ struct BlueprintFactorySetup {
BlueprintFactorySetup::~BlueprintFactorySetup() = default;
-template <typename IntermediateBlueprintFactoryType>
void
-run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
+run_intermediate_blueprint_benchmark(auto factory_factory, std::vector<InFlow> in_flows, const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
{
print_intermediate_blueprint_result_header(2);
double max_speedup = 0.0;
@@ -755,26 +781,28 @@ run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const Bluep
for (double b_hit_ratio: b.op_hit_ratios) {
auto b_factory = b.make_factory_shared(num_docs, b_hit_ratio);
for (double a_hit_ratio : a.op_hit_ratios) {
- IntermediateBlueprintFactoryType factory;
- factory.add_child(a.make_factory(num_docs, a_hit_ratio));
- factory.add_child(b_factory);
+ auto factory = factory_factory();
+ factory->add_child(a.make_factory(num_docs, a_hit_ratio));
+ factory->add_child(b_factory);
double time_ms_esti = 0.0;
- for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost,
- PlanningAlgo::CostForceStrict}) {
- auto res = benchmark_search(factory, num_docs + 1, true, false, false, 1.0, algo);
- print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, num_docs);
- if (algo == PlanningAlgo::Estimate) {
- time_ms_esti = res.time_ms;
- }
- if (algo == PlanningAlgo::CostForceStrict) {
- double speedup = time_ms_esti / res.time_ms;
- if (speedup > max_speedup) {
- max_speedup = speedup;
+ for (InFlow in_flow: in_flows) {
+ for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost,
+ PlanningAlgo::CostForceStrict}) {
+ auto res = benchmark_search(*factory, num_docs + 1, in_flow.strict(), false, false, in_flow.rate(), algo);
+ print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, in_flow, num_docs);
+ if (algo == PlanningAlgo::Estimate) {
+ time_ms_esti = res.time_ms;
}
- if (speedup < min_speedup) {
- min_speedup = speedup;
+ if (algo == PlanningAlgo::CostForceStrict) {
+ double speedup = time_ms_esti / res.time_ms;
+ if (speedup > max_speedup) {
+ max_speedup = speedup;
+ }
+ if (speedup < min_speedup) {
+ min_speedup = speedup;
+ }
+ std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl;
}
- std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl;
}
}
}
@@ -786,7 +814,19 @@ void
run_and_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
{
std::cout << "AND[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl;
- run_intermediate_blueprint_benchmark<AndBlueprintFactory>(a, b, num_docs);
+ run_intermediate_blueprint_benchmark([](){ return std::make_unique<AndBlueprintFactory>(); }, {true}, a, b, num_docs);
+}
+
+void
+run_source_blender_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
+{
+ std::cout << "SB[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl;
+ auto factory_factory = [&](){
+ auto factory = std::make_unique<SourceBlenderBlueprintFactory>();
+ factory->init_selector([](uint32_t i){ return (i%10 == 0) ? 1 : 2; }, num_docs + 1);
+ return factory;
+ };
+ run_intermediate_blueprint_benchmark(factory_factory, {true, 0.75, 0.5, 0.25, 0.1, 0.01, 0.001}, a, b, num_docs);
}
//-------------------------------------------------------------------------------------
@@ -970,16 +1010,40 @@ TEST(IteratorBenchmark, analyze_AND_bitvector_vs_IN)
}
}
+TEST(IteratorBenchmark, analyze_strict_SOURCEBLENDER_memory_and_disk)
+{
+ for (double small_ratio: {0.001, 0.005, 0.01, 0.05}) {
+ run_source_blender_benchmark({str_fs, QueryOperator::Term, {small_ratio}},
+ {str_index, QueryOperator::Term, {small_ratio * 10}},
+ num_docs);
+ }
+}
+
TEST(IteratorBenchmark, analyze_OR_non_strict_fs)
{
for (auto or_hit_ratio : {0.01, 0.1, 0.5}) {
BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio},
{2, 4, 6, 8, 10, 100, 1000});
+ //setup.force_strict = true;
setup.filter_hit_ratios = gen_ratios(or_hit_ratio, 10.0, 13);
run_benchmarks(setup);
}
}
+TEST(IteratorBenchmark, analyze_OR_non_strict_fs_child_est_adjust)
+{
+ for (auto or_hit_ratio : {0.01, 0.1, 0.5}) {
+ for (uint32_t children : {2, 4, 6, 8, 10, 100, 1000}) {
+ double child_est = or_hit_ratio / children;
+ BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio},
+ {children});
+ //setup.force_strict = true;
+ setup.filter_hit_ratios = gen_ratios(child_est, 10.0, 13);
+ run_benchmarks(setup);
+ }
+ }
+}
+
TEST(IteratorBenchmark, analyze_OR_non_strict_non_fs)
{
BenchmarkSetup setup(num_docs, {int32}, {QueryOperator::Or}, {false}, {0.1}, {2, 4, 6, 8, 10});
@@ -1008,6 +1072,22 @@ TEST(IteratorBenchmark, analyze_btree_vs_bitvector_iterators_strict)
run_benchmarks(setup);
}
+TEST(IteratorBenchmark, btree_vs_array_nonstrict_crossover) {
+ for (double hit_ratio: { 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
+ 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
+ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
+ 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0})
+ {
+ auto btree = make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, hit_ratio, 1, false);
+ auto array = make_blueprint_factory( int32_array, QueryOperator::Term, num_docs, 0, hit_ratio, 1, false);
+ auto time_ms = [&](auto &bpf, double in_flow) {
+ return Sample(benchmark_search(bpf, num_docs + 1, false, false, false, in_flow, PlanningAlgo::Cost).time_ms);
+ };
+ auto calculate_at = [&](double in_flow) { return std::make_pair(time_ms(*btree, in_flow), time_ms(*array, in_flow)); };
+ fprintf(stderr, "btree/array crossover@%5.3f: %8.6f\n", hit_ratio, find_crossover("TIME", "btree", "array", calculate_at, 0.0001));
+ }
+}
+
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
int res = RUN_ALL_TESTS();
diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
index 2bd560637d2..996cd448f44 100644
--- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
+++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
@@ -68,8 +68,8 @@ struct TestHeap : public WeakAndHeap
{
ScoresHistory history;
- TestHeap(uint32_t scoresToTrack_) : WeakAndHeap(scoresToTrack_), history() {}
- virtual void adjust(score_t *begin, score_t *end) override {
+ explicit TestHeap(uint32_t scoresToTrack_) : WeakAndHeap(scoresToTrack_), history() {}
+ void adjust(score_t *begin, score_t *end) override {
Scores scores;
for (score_t *itr = begin; itr != end; ++itr) {
scores.add(*itr);
@@ -87,8 +87,8 @@ struct WandTestSpec : public WandSpec
TermFieldMatchData rootMatchData;
MatchParams matchParams;
- WandTestSpec(uint32_t scoresToTrack, uint32_t scoresAdjustFrequency = 1,
- score_t scoreThreshold = 0, double thresholdBoostFactor = 1);
+ explicit WandTestSpec(uint32_t scoresToTrack, uint32_t scoresAdjustFrequency = 1,
+ score_t scoreThreshold = 0, double thresholdBoostFactor = 1);
~WandTestSpec();
SearchIterator::UP create() {
MatchData::UP childrenMatchData = createMatchData();
@@ -114,7 +114,7 @@ WandTestSpec<HeapType>::WandTestSpec(uint32_t scoresToTrack, uint32_t scoresAdju
{}
template <typename HeapType>
-WandTestSpec<HeapType>::~WandTestSpec() {}
+WandTestSpec<HeapType>::~WandTestSpec() = default;
using WandSpecWithTestHeap = WandTestSpec<TestHeap>;
using WandSpecWithRealHeap = WandTestSpec<SharedWeakAndPriorityQueue>;
@@ -137,8 +137,8 @@ SimpleResult
asSimpleResult(const FakeResult &result)
{
SimpleResult retval;
- for (size_t i = 0; i < result.inspect().size(); ++i) {
- retval.addHit(result.inspect()[i].docId);
+ for (const auto & doc : result.inspect()) {
+ retval.addHit(doc.docId);
}
return retval;
}
@@ -152,26 +152,26 @@ struct WandBlueprintSpec
FakeRequestContext requestContext;
WandBlueprintSpec &add(const std::string &token, int32_t weight) {
- tokens.push_back(std::make_pair(token, weight));
+ tokens.emplace_back(token, weight);
return *this;
}
Node::UP createNode(uint32_t scoresToTrack = 100,
score_t scoreThreshold = 0,
double thresholdBoostFactor = 1) const {
- SimpleWandTerm *node = new SimpleWandTerm(tokens.size(), "view", 0, Weight(0),
- scoresToTrack, scoreThreshold, thresholdBoostFactor);
- for (size_t i = 0; i < tokens.size(); ++i) {
- node->addTerm(tokens[i].first, Weight(tokens[i].second));
+ auto node = std::make_unique<SimpleWandTerm>(tokens.size(), "view", 0, Weight(0),
+ scoresToTrack, scoreThreshold, thresholdBoostFactor);
+ for (const auto & token : tokens) {
+ node->addTerm(token.first, Weight(token.second));
}
- return Node::UP(node);
+ return node;
}
Blueprint::UP blueprint(Searchable &searchable, const std::string &field, const search::query::Node &term) const {
FieldSpecList fields;
fields.add(FieldSpec(field, fieldId, handle));
Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, term);
- EXPECT_TRUE(dynamic_cast<ParallelWeakAndBlueprint*>(bp.get()) != 0);
+ EXPECT_TRUE(dynamic_cast<ParallelWeakAndBlueprint*>(bp.get()) != nullptr);
return bp;
}
@@ -182,7 +182,7 @@ struct WandBlueprintSpec
bp->basic_plan(true, docIdLimit);
bp->fetchPostings(ExecuteInfo::FULL);
SearchIterator::UP sb = bp->createSearch(*md);
- EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != 0);
+ EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != nullptr);
return sb;
}
@@ -197,7 +197,7 @@ struct WandBlueprintSpec
bp->basic_plan(true, docIdLimit);
bp->fetchPostings(ExecuteInfo::FULL);
SearchIterator::UP sb = bp->createSearch(*md);
- EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != 0);
+ EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != nullptr);
return doSearch(*sb, *md->resolveTermField(handle));
}
};
@@ -258,7 +258,7 @@ struct AlgoSameScoreFixture : public FixtureBase
struct AlgoScoreThresholdFixture : public FixtureBase
{
- AlgoScoreThresholdFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) {
+ explicit AlgoScoreThresholdFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) {
spec.leaf(LeafSpec("A", 1).doc(1, 10).doc(2, 30));
spec.leaf(LeafSpec("B", 2).doc(1, 20).doc(3, 40));
prepare();
@@ -267,7 +267,7 @@ struct AlgoScoreThresholdFixture : public FixtureBase
struct AlgoLargeScoresFixture : public FixtureBase
{
- AlgoLargeScoresFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) {
+ explicit AlgoLargeScoresFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) {
spec.leaf(LeafSpec("A", 60000).doc(1, 60000).doc(2, 70000));
spec.leaf(LeafSpec("B", 70000).doc(1, 80000).doc(3, 90000));
prepare();
@@ -276,7 +276,7 @@ struct AlgoLargeScoresFixture : public FixtureBase
struct AlgoExhaustPastFixture : public FixtureBase
{
- AlgoExhaustPastFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) {
+ explicit AlgoExhaustPastFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) {
spec.leaf(LeafSpec("A", 1).doc(1, 20).doc(3, 40).doc(5, 10));
spec.leaf(LeafSpec("B", 1).doc(5, 10));
spec.leaf(LeafSpec("C", 1).doc(5, 10));
@@ -449,11 +449,11 @@ struct BlueprintFixtureBase
};
BlueprintFixtureBase::BlueprintFixtureBase() : spec(), searchable() {}
-BlueprintFixtureBase::~BlueprintFixtureBase() {}
+BlueprintFixtureBase::~BlueprintFixtureBase() = default;
struct BlueprintHitsFixture : public BlueprintFixtureBase
{
- FakeResult createResult(size_t hits) {
+ static FakeResult createResult(size_t hits) {
FakeResult result;
for (size_t i = 0; i < hits; ++i) {
result.doc(i + 1);
@@ -479,7 +479,7 @@ struct BlueprintHitsFixture : public BlueprintFixtureBase
struct ThresholdBoostFixture : public FixtureBase
{
FakeResult result;
- ThresholdBoostFixture(double boost) : FixtureBase(1, 1, 800, boost) {
+ explicit ThresholdBoostFixture(double boost) : FixtureBase(1, 1, 800, boost) {
spec.leaf(LeafSpec("A").doc(1, 10));
spec.leaf(LeafSpec("B").doc(2, 20));
spec.leaf(LeafSpec("C").doc(3, 30));
@@ -532,7 +532,7 @@ TEST(ParallelWeakAndTest, require_that_blueprint_picks_up_docid_limit)
BlueprintFixture f;
Node::UP term = f.spec.createNode(57, 67, 77.7);
Blueprint::UP bp = f.blueprint(*term);
- const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get());
+ const auto * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get());
EXPECT_EQ(0u, pbp->get_docid_limit());
bp->setDocIdLimit(1000);
EXPECT_EQ(1000u, pbp->get_docid_limit());
@@ -543,7 +543,7 @@ TEST(ParallelWeakAndTest, require_that_scores_to_track_score_threshold_and_thres
BlueprintFixture f;
Node::UP term = f.spec.createNode(57, 67, 77.7);
Blueprint::UP bp = f.blueprint(*term);
- const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get());
+ const auto * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get());
EXPECT_EQ(57u, pbp->getScores().getScoresToTrack());
EXPECT_EQ(67u, pbp->getScoreThreshold());
EXPECT_EQ(77.7, pbp->getThresholdBoostFactor());
@@ -708,7 +708,7 @@ SearchIterator::UP create_wand(bool use_dww,
class Verifier : public search::test::DwwIteratorChildrenVerifier {
public:
- Verifier(bool use_dww) : _use_dww(use_dww) { }
+ explicit Verifier(bool use_dww) : _use_dww(use_dww) { }
private:
SearchIterator::UP create(bool strict) const override {
MatchParams match_params(_dummy_heap, _dummy_heap.getMinScore(), 1.0, 1);
diff --git a/searchlib/src/tests/queryeval/sourceblender/sourceblender_test.cpp b/searchlib/src/tests/queryeval/sourceblender/sourceblender_test.cpp
index b84cb02a357..b2a1f6a645a 100644
--- a/searchlib/src/tests/queryeval/sourceblender/sourceblender_test.cpp
+++ b/searchlib/src/tests/queryeval/sourceblender/sourceblender_test.cpp
@@ -7,15 +7,14 @@
#include <vespa/searchlib/queryeval/leaf_blueprints.h>
#define ENABLE_GTEST_MIGRATION
#include <vespa/searchlib/test/searchiteratorverifier.h>
-#include <vespa/searchlib/common/bitvectoriterator.h>
#include <vespa/searchlib/attribute/fixedsourceselector.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/vespalib/gtest/gtest.h>
using namespace search::queryeval;
-using namespace search::fef;
using namespace search;
using std::make_unique;
+using search::fef::MatchData;
/**
* Proxy search used to verify unpack pattern
@@ -27,24 +26,24 @@ private:
SimpleResult _unpacked;
protected:
- virtual void doSeek(uint32_t docid) override {
+ void doSeek(uint32_t docid) override {
_search->seek(docid);
setDocId(_search->getDocId());
}
- virtual void doUnpack(uint32_t docid) override {
+ void doUnpack(uint32_t docid) override {
_unpacked.addHit(docid);
_search->unpack(docid);
}
public:
- UnpackChecker(SearchIterator *search) : _search(search), _unpacked() {}
+ explicit UnpackChecker(SearchIterator *search) : _search(search), _unpacked() {}
const SimpleResult &getUnpacked() const { return _unpacked; }
};
class MySelector : public search::FixedSourceSelector
{
public:
- MySelector(int defaultSource) : search::FixedSourceSelector(defaultSource, "fs") { }
+ explicit MySelector(int defaultSource) : search::FixedSourceSelector(defaultSource, "fs") { }
MySelector & set(Source s, uint32_t docId) {
setSource(s, docId);
return *this;
@@ -65,12 +64,12 @@ TEST(SourceBlenderTest, test_strictness)
a.addHit(2).addHit(5).addHit(6).addHit(8);
b.addHit(3).addHit(5).addHit(6).addHit(7);
- MySelector *sel = new MySelector(5);
+ auto *sel = new MySelector(5);
sel->set(2, 1).set(3, 2).set(5, 2).set(7, 1);
- SourceBlenderBlueprint *blend_b = new SourceBlenderBlueprint(*sel);
- Blueprint::UP a_b(new SimpleBlueprint(a));
- Blueprint::UP b_b(new SimpleBlueprint(b));
+ auto *blend_b = new SourceBlenderBlueprint(*sel);
+ auto a_b = std::make_unique<SimpleBlueprint>(a);
+ auto b_b = std::make_unique<SimpleBlueprint>(b);
a_b->setSourceId(1);
b_b->setSourceId(2);
blend_b->addChild(std::move(a_b));
@@ -111,16 +110,16 @@ TEST(SourceBlenderTest, test_full_sourceblender_search)
c.addHit(4).addHit(11).addHit(21).addHit(32);
// these are all handed over to the blender
- UnpackChecker *ua = new UnpackChecker(new SimpleSearch(a));
- UnpackChecker *ub = new UnpackChecker(new SimpleSearch(b));
- UnpackChecker *uc = new UnpackChecker(new SimpleSearch(c));
+ auto *ua = new UnpackChecker(new SimpleSearch(a));
+ auto *ub = new UnpackChecker(new SimpleSearch(b));
+ auto *uc = new UnpackChecker(new SimpleSearch(c));
auto sel = make_unique<MySelector>(5);
sel->set(2, 1).set(3, 2).set(11, 2).set(21, 3).set(34, 1);
SourceBlenderSearch::Children abc;
- abc.push_back(SourceBlenderSearch::Child(ua, 1));
- abc.push_back(SourceBlenderSearch::Child(ub, 2));
- abc.push_back(SourceBlenderSearch::Child(uc, 3));
+ abc.emplace_back(ua, 1);
+ abc.emplace_back(ub, 2);
+ abc.emplace_back(uc, 3);
SearchIterator::UP blend(SourceBlenderSearch::create(sel->createIterator(), abc, true));
SimpleResult result;
@@ -149,7 +148,7 @@ using search::test::SearchIteratorVerifier;
class Verifier : public SearchIteratorVerifier {
public:
Verifier();
- ~Verifier();
+ ~Verifier() override;
SearchIterator::UP create(bool strict) const override {
return SearchIterator::UP(SourceBlenderSearch::create(_selector.createIterator(),
createChildren(strict),
@@ -178,7 +177,7 @@ Verifier::Verifier() :
_indexes[indexId].push_back(docId);
}
}
-Verifier::~Verifier() {}
+Verifier::~Verifier() = default;
TEST(SourceBlenderTest, test_that_source_blender_iterator_adheres_to_search_terator_requirements)
{
diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp
index 94ecd8fa539..a7516226daf 100644
--- a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp
@@ -13,6 +13,7 @@
#include <vespa/searchlib/queryeval/simpleresult.h>
#include <vespa/searchlib/queryeval/wand/weak_and_search.h>
#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+#include <vespa/searchlib/queryeval/wand/weak_and_heap.h>
#include <vespa/vespalib/util/box.h>
#include <vespa/vespalib/util/stringfmt.h>
@@ -135,7 +136,7 @@ constexpr vespalib::duration max_time = 1000s;
//-----------------------------------------------------------------------------
struct ChildFactory {
- ChildFactory() {}
+ ChildFactory() = default;
virtual std::string name() const = 0;
virtual SearchIterator::UP createChild(uint32_t idx, uint32_t limit) const = 0;
virtual ~ChildFactory() = default;
@@ -190,8 +191,9 @@ struct ModSearchFactory : ChildFactory {
//-----------------------------------------------------------------------------
struct VespaWandFactory : SparseVectorFactory {
+ mutable SharedWeakAndPriorityQueue _scores;
uint32_t n;
- explicit VespaWandFactory(uint32_t n_in) noexcept : n(n_in) {}
+ explicit VespaWandFactory(uint32_t n_in) : _scores(n_in), n(n_in) {}
std::string name() const override {
return vespalib::make_string("VespaWand(%u)", n);
}
@@ -200,7 +202,7 @@ struct VespaWandFactory : SparseVectorFactory {
for (size_t i = 0; i < childCnt; ++i) {
terms.emplace_back(childFactory.createChild(i, limit), default_weight, limit / (i + 1));
}
- return WeakAndSearch::create(terms, n, true);
+ return WeakAndSearch::create(terms, wand::MatchParams(_scores), n, true);
}
};
diff --git a/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
index 5e056eb6c0e..457f7133dc1 100644
--- a/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
+++ b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
@@ -29,20 +29,20 @@ struct Stats {
size_t unpackCnt;
size_t skippedDocs;
size_t skippedHits;
- Stats() : hitCnt(0), seekCnt(0), unpackCnt(0),
+ Stats() noexcept : hitCnt(0), seekCnt(0), unpackCnt(0),
skippedDocs(0), skippedHits(0) {}
- void hit() {
+ void hit() noexcept {
++hitCnt;
}
- void seek(size_t docs, size_t hits) {
+ void seek(size_t docs, size_t hits) noexcept {
++seekCnt;
skippedDocs += docs;
skippedHits += hits;
}
- void unpack() {
+ void unpack() noexcept {
++unpackCnt;
}
- void print() {
+ void print() const {
fprintf(stderr, "Stats: hits=%zu, seeks=%zu, unpacks=%zu, skippedDocs=%zu, skippedHits=%zu\n",
hitCnt, seekCnt, unpackCnt, skippedDocs, skippedHits);
}
@@ -77,7 +77,7 @@ struct ModSearch : SearchIterator {
}
}
void doUnpack(uint32_t docid) override {
- if (tfmd != NULL) {
+ if (tfmd != nullptr) {
tfmd->reset(docid);
search::fef::TermFieldMatchDataPosition pos;
pos.setElementWeight(info.getMaxWeight());
@@ -96,40 +96,52 @@ ModSearch::~ModSearch() = default;
struct WandFactory {
virtual std::string name() const = 0;
virtual SearchIterator::UP create(const wand::Terms &terms) = 0;
- virtual ~WandFactory() {}
+ virtual ~WandFactory() = default;
};
struct VespaWandFactory : WandFactory {
+ mutable SharedWeakAndPriorityQueue _scores;
uint32_t n;
- VespaWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit VespaWandFactory(uint32_t n_in) noexcept
+ : _scores(n_in),
+ n(n_in)
+ {}
~VespaWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA WAND (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(WeakAndSearch::create(terms, n, true));
+ std::string name() const override { return make_string("VESPA WAND (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return WeakAndSearch::create(terms, wand::MatchParams(_scores, 1, 1), n, true);
}
};
VespaWandFactory::~VespaWandFactory() = default;
struct VespaArrayWandFactory : WandFactory {
+ mutable SharedWeakAndPriorityQueue _scores;
uint32_t n;
- VespaArrayWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit VespaArrayWandFactory(uint32_t n_in)
+ : _scores(n_in),
+ n(n_in)
+ {}
~VespaArrayWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA ARRAY WAND (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(WeakAndSearch::createArrayWand(terms, n, true));
+ std::string name() const override { return make_string("VESPA ARRAY WAND (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return WeakAndSearch::createArrayWand(terms, wand::MatchParams(_scores, 1, 1), wand::TermFrequencyScorer(), n, true);
}
};
VespaArrayWandFactory::~VespaArrayWandFactory() = default;
struct VespaHeapWandFactory : WandFactory {
+ mutable SharedWeakAndPriorityQueue _scores;
uint32_t n;
- VespaHeapWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit VespaHeapWandFactory(uint32_t n_in)
+ : _scores(n_in),
+ n(n_in)
+ {}
~VespaHeapWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA HEAP WAND (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(WeakAndSearch::createHeapWand(terms, n, true));
+ std::string name() const override { return make_string("VESPA HEAP WAND (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return WeakAndSearch::createHeapWand(terms, wand::MatchParams(_scores, 1, 1), wand::TermFrequencyScorer(), n, true);
}
};
@@ -138,39 +150,39 @@ VespaHeapWandFactory::~VespaHeapWandFactory() = default;
struct VespaParallelWandFactory : public WandFactory {
SharedWeakAndPriorityQueue scores;
TermFieldMatchData rootMatchData;
- VespaParallelWandFactory(uint32_t n) : scores(n), rootMatchData() {}
+ explicit VespaParallelWandFactory(uint32_t n) noexcept : scores(n), rootMatchData() {}
~VespaParallelWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA PWAND (n=%u)", scores.getScoresToTrack()); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(ParallelWeakAndSearch::create(terms,
+ std::string name() const override { return make_string("VESPA PWAND (n=%u)", scores.getScoresToTrack()); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return ParallelWeakAndSearch::create(terms,
PWMatchParams(scores, 0, 1, 1),
- PWRankParams(rootMatchData, MatchData::UP()), true));
+ PWRankParams(rootMatchData, {}), true);
}
};
VespaParallelWandFactory::~VespaParallelWandFactory() = default;
struct VespaParallelArrayWandFactory : public VespaParallelWandFactory {
- VespaParallelArrayWandFactory(uint32_t n) : VespaParallelWandFactory(n) {}
+ explicit VespaParallelArrayWandFactory(uint32_t n) noexcept : VespaParallelWandFactory(n) {}
~VespaParallelArrayWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA ARRAY PWAND (n=%u)", scores.getScoresToTrack()); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(ParallelWeakAndSearch::createArrayWand(terms,
+ std::string name() const override { return make_string("VESPA ARRAY PWAND (n=%u)", scores.getScoresToTrack()); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return ParallelWeakAndSearch::createArrayWand(terms,
PWMatchParams(scores, 0, 1, 1),
- PWRankParams(rootMatchData, MatchData::UP()), true));
+ PWRankParams(rootMatchData, {}), true);
}
};
VespaParallelArrayWandFactory::~VespaParallelArrayWandFactory() = default;
struct VespaParallelHeapWandFactory : public VespaParallelWandFactory {
- VespaParallelHeapWandFactory(uint32_t n) : VespaParallelWandFactory(n) {}
+ explicit VespaParallelHeapWandFactory(uint32_t n) noexcept : VespaParallelWandFactory(n) {}
~VespaParallelHeapWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA HEAP PWAND (n=%u)", scores.getScoresToTrack()); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(ParallelWeakAndSearch::createHeapWand(terms,
+ std::string name() const override { return make_string("VESPA HEAP PWAND (n=%u)", scores.getScoresToTrack()); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return ParallelWeakAndSearch::createHeapWand(terms,
PWMatchParams(scores, 0, 1, 1),
- PWRankParams(rootMatchData, MatchData::UP()), true));
+ PWRankParams(rootMatchData, {}), true);
}
};
@@ -178,11 +190,11 @@ VespaParallelHeapWandFactory::~VespaParallelHeapWandFactory() = default;
struct TermFrequencyRiseWandFactory : WandFactory {
uint32_t n;
- TermFrequencyRiseWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit TermFrequencyRiseWandFactory(uint32_t n_in) noexcept : n(n_in) {}
~TermFrequencyRiseWandFactory() override;
- virtual std::string name() const override { return make_string("RISE WAND TF (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(new rise::TermFrequencyRiseWand(terms, n));
+ std::string name() const override { return make_string("RISE WAND TF (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return std::make_unique<rise::TermFrequencyRiseWand>(terms, n);
}
};
@@ -190,11 +202,11 @@ TermFrequencyRiseWandFactory::~TermFrequencyRiseWandFactory() = default;
struct DotProductRiseWandFactory : WandFactory {
uint32_t n;
- DotProductRiseWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit DotProductRiseWandFactory(uint32_t n_in) noexcept : n(n_in) {}
~DotProductRiseWandFactory() override;
- virtual std::string name() const override { return make_string("RISE WAND DP (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(new rise::DotProductRiseWand(terms, n));
+ std::string name() const override { return make_string("RISE WAND DP (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return std::make_unique<rise::DotProductRiseWand>(terms, n);
}
};
@@ -204,13 +216,13 @@ struct FilterFactory : WandFactory {
WandFactory &factory;
Stats stats;
uint32_t n;
- FilterFactory(WandFactory &f, uint32_t n_in) : factory(f), n(n_in) {}
+ FilterFactory(WandFactory &f, uint32_t n_in) noexcept : factory(f), n(n_in) {}
~FilterFactory() override;
- virtual std::string name() const override { return make_string("Filter (mod=%u) [%s]", n, factory.name().c_str()); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
+ std::string name() const override { return make_string("Filter (mod=%u) [%s]", n, factory.name().c_str()); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
AndNotSearch::Children children;
children.push_back(factory.create(terms));
- children.emplace_back(new ModSearch(stats, n, search::endDocId, n, NULL));
+ children.emplace_back(new ModSearch(stats, n, search::endDocId, n, nullptr));
return AndNotSearch::create(std::move(children), true);
}
};
@@ -220,8 +232,8 @@ FilterFactory::~FilterFactory() = default;
struct Setup {
Stats stats;
vespalib::duration minTime;
- Setup() : stats(), minTime(10000s) {}
- virtual ~Setup() {}
+ Setup() noexcept : stats(), minTime(10000s) {}
+ virtual ~Setup() = default;
virtual std::string name() const = 0;
virtual SearchIterator::UP create() = 0;
void perform() {
@@ -256,10 +268,10 @@ struct WandSetup : Setup {
MatchData::UP matchData;
WandSetup(WandFactory &f, uint32_t c, uint32_t l) : Setup(), factory(f), childCnt(c), limit(l), weight(100), matchData() {}
~WandSetup() override;
- virtual std::string name() const override {
+ std::string name() const override {
return make_string("Wand Setup (terms=%u,docs=%u) [%s]", childCnt, limit, factory.name().c_str());
}
- virtual SearchIterator::UP create() override {
+ SearchIterator::UP create() override {
MatchDataLayout layout;
std::vector<TermFieldHandle> handles;
for (size_t i = 0; i < childCnt; ++i) {
diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
index 689f9f085d0..4aab66f3cc9 100644
--- a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
+++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
@@ -1,8 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/searchlib/queryeval/fake_search.h>
#include <vespa/searchlib/queryeval/wand/weak_and_search.h>
+#include <vespa/searchlib/queryeval/wand/weak_and_heap.h>
#include <vespa/searchlib/queryeval/simpleresult.h>
-#include <vespa/searchlib/queryeval/simplesearch.h>
#include <vespa/searchlib/queryeval/test/eagerchild.h>
#include <vespa/searchlib/queryeval/test/leafspec.h>
#include <vespa/searchlib/queryeval/test/wandspec.h>
@@ -20,11 +19,13 @@ namespace {
struct MyWandSpec : public WandSpec
{
+ SharedWeakAndPriorityQueue scores;
uint32_t n;
- MyWandSpec(uint32_t n_) : WandSpec(), n(n_) {}
+ explicit MyWandSpec(uint32_t n_in) : WandSpec(), scores(n_in), n(n_in) {}
SearchIterator *create() {
- return new TrackedSearch("WAND", getHistory(), WeakAndSearch::create(getTerms(), n, true));
+ return new TrackedSearch("WAND", getHistory(),
+ WeakAndSearch::create(getTerms(), wand::MatchParams(scores, 1, 1), n, true));
}
};
@@ -104,7 +105,8 @@ TEST(WeakAndTest, require_that_initial_docid_for_subsearches_are_taken_into_acco
wand::Terms terms;
terms.push_back(wand::Term(new TrackedSearch("foo", history, new EagerChild(search::endDocId)), 100, 1));
terms.push_back(wand::Term(new TrackedSearch("bar", history, new EagerChild(10)), 100, 2));
- SearchIterator::UP search(new TrackedSearch("WAND", history, WeakAndSearch::create(terms, 2, true)));
+ SharedWeakAndPriorityQueue scores(2);
+ auto search = std::make_unique<TrackedSearch>("WAND", history, WeakAndSearch::create(terms, wand::MatchParams(scores), 2, true));
SimpleResult hits;
hits.search(*search);
EXPECT_EQ(SimpleResult().addHit(10), hits);
@@ -114,17 +116,26 @@ TEST(WeakAndTest, require_that_initial_docid_for_subsearches_are_taken_into_acco
}
class IteratorChildrenVerifier : public search::test::IteratorChildrenVerifier {
+public:
+ IteratorChildrenVerifier();
+ ~IteratorChildrenVerifier() override;
private:
+ mutable std::vector<std::unique_ptr<SharedWeakAndPriorityQueue>> _scores;
SearchIterator::UP create(bool strict) const override {
wand::Terms terms;
for (size_t i = 0; i < _num_children; ++i) {
terms.emplace_back(createIterator(_split_lists[i], strict).release(),
100, _split_lists[i].size());
}
- return SearchIterator::UP(WeakAndSearch::create(terms, -1, strict));
+ static constexpr size_t LARGE_ENOUGH_HEAP_FOR_ALL = 10000;
+ _scores.push_back(std::make_unique<SharedWeakAndPriorityQueue>(LARGE_ENOUGH_HEAP_FOR_ALL));
+ return WeakAndSearch::create(terms, wand::MatchParams(*_scores.back(), 1, 1), -1, strict);
}
};
+IteratorChildrenVerifier::IteratorChildrenVerifier() : _scores() {}
+IteratorChildrenVerifier::~IteratorChildrenVerifier() = default;
+
TEST(WeakAndTest, verify_search_iterator_conformance)
{
IteratorChildrenVerifier verifier;
diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp
index 54bf1e92037..0573404a3b4 100644
--- a/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp
+++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp
@@ -16,15 +16,16 @@ void checkWandHits(WandFactory &vespa, WandFactory &rise, uint32_t step, uint32_
s1->initFullRange();
SearchIterator::UP s2 = riseSetup.create();
s2->initFullRange();
- ASSERT_TRUE(dynamic_cast<WeakAndType*>(s1.get()) != 0);
- ASSERT_TRUE(dynamic_cast<WeakAndType*>(s2.get()) == 0);
- ASSERT_TRUE(dynamic_cast<RiseType*>(s2.get()) != 0);
- ASSERT_TRUE(dynamic_cast<RiseType*>(s1.get()) == 0);
+ ASSERT_TRUE(dynamic_cast<WeakAndType*>(s1.get()) != nullptr);
+ ASSERT_TRUE(dynamic_cast<WeakAndType*>(s2.get()) == nullptr);
+ ASSERT_TRUE(dynamic_cast<RiseType*>(s2.get()) != nullptr);
+ ASSERT_TRUE(dynamic_cast<RiseType*>(s1.get()) == nullptr);
s1->seek(1);
s2->seek(1);
while (!s1->isAtEnd() &&
!s2->isAtEnd())
{
+ if (s1->getDocId() != s2->getDocId()) assert(true);
ASSERT_EQUAL(s1->getDocId(), s2->getDocId());
if ((filter == 0) || ((s1->getDocId() % filter) != 0)) {
s1->unpack(s1->getDocId());
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
index e1f3f0805d9..8a0bc28f4dd 100644
--- a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
@@ -63,4 +63,27 @@ TEST("require that DotProductScorer calculates term score")
EXPECT_EQUAL(11u, itr->_unpackDocId);
}
+TEST("test bm25 idf scorer for wand")
+{
+ wand::Bm25TermFrequencyScorer scorer(1000000, 1.0);
+ EXPECT_EQUAL(13410046, scorer.calculateMaxScore(1, 1));
+ EXPECT_EQUAL(11464136, scorer.calculateMaxScore(10, 1));
+ EXPECT_EQUAL(6907256, scorer.calculateMaxScore(1000, 1));
+ EXPECT_EQUAL(4605121, scorer.calculateMaxScore(10000, 1));
+ EXPECT_EQUAL(2302581, scorer.calculateMaxScore(100000, 1));
+ EXPECT_EQUAL(693147, scorer.calculateMaxScore(500000, 1));
+ EXPECT_EQUAL(105360, scorer.calculateMaxScore(900000, 1));
+ EXPECT_EQUAL(10050, scorer.calculateMaxScore(990000, 1));
+}
+
+TEST("test limited range of bm25 idf scorer for wand")
+{
+ wand::Bm25TermFrequencyScorer scorer08(1000000, 0.8);
+ wand::Bm25TermFrequencyScorer scorer10(1000000, 1.0);
+ EXPECT_EQUAL(8207814, scorer08.calculateMaxScore(1000, 1));
+ EXPECT_EQUAL(2690049, scorer08.calculateMaxScore(990000, 1));
+ EXPECT_EQUAL(6907256, scorer10.calculateMaxScore(1000, 1));
+ EXPECT_EQUAL(10050, scorer10.calculateMaxScore(990000, 1));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
index 53224425a04..348326c3936 100644
--- a/searchlib/src/tests/ranksetup/ranksetup_test.cpp
+++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
@@ -26,6 +26,7 @@
#include <vespa/searchlib/fef/test/rankresult.h>
#include <vespa/searchlib/features/rankingexpressionfeature.h>
+#include <vespa/searchlib/features/second_phase_feature.h>
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/features/valuefeature.h>
#include <vespa/searchlib/fef/test/plugin/chain.h>
@@ -787,6 +788,19 @@ RankSetupTest::testFeatureDump()
exp.addScore("test_cfgvalue(foo)", 1.0);
EXPECT_EQUAL(exp, dumper.dump());
}
+ { // Dump secondPhase feature
+ IndexEnvironment indexEnv;
+ indexEnv.getProperties().add(indexproperties::rank::FirstPhase::NAME, "value(2)");
+ indexEnv.getProperties().add(indexproperties::rank::SecondPhase::NAME, "value(4)");
+ RankEnvironment rankEnv(_factory, indexEnv, _queryEnv);
+ FeatureDumper dumper(rankEnv);
+ dumper.configure();
+ dumper.addDumpFeature("secondPhase");
+ EXPECT_TRUE(dumper.setup());
+ RankResult exp;
+ exp.addScore("secondPhase", 4.0);
+ EXPECT_EQUAL(exp, dumper.dump());
+ }
}
void
@@ -939,6 +953,7 @@ RankSetupTest::RankSetupTest() :
setup_fef_test_plugin(_factory);
_factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
_factory.addPrototype(Blueprint::SP(new RankingExpressionBlueprint()));
+ _factory.addPrototype(std::make_shared<SecondPhaseBlueprint>());
// setup an original attribute manager with two attributes
search::attribute::Config cfg(search::attribute::BasicType::INT32,
diff --git a/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp
index 4ffc1fe366e..136878f0ea5 100644
--- a/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp
+++ b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp
@@ -9,7 +9,6 @@
#include <vespa/searchlib/test/attribute_builder.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/util/exceptions.h>
-#include <iostream>
using namespace search::attribute::test;
using namespace search::attribute;
diff --git a/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt b/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt
index e1a54f7883a..92ad9ae2648 100644
--- a/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt
+++ b/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt
@@ -7,3 +7,10 @@ vespa_add_executable(searchlib_distance_functions_test_app TEST
GTest::GTest
)
vespa_add_test(NAME searchlib_distance_functions_test_app COMMAND searchlib_distance_functions_test_app)
+
+vespa_add_executable(searchlib_distance_functions_benchmark_app TEST
+ SOURCES
+ distance_functions_benchmark.cpp
+ DEPENDS
+ searchlib
+)
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp
new file mode 100644
index 00000000000..14a0adac651
--- /dev/null
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp
@@ -0,0 +1,129 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/typed_cells.h>
+#include <vespa/searchlib/common/geo_gcd.h>
+#include <vespa/searchlib/tensor/distance_functions.h>
+#include <vespa/searchlib/tensor/distance_function_factory.h>
+#include <vespa/searchlib/tensor/mips_distance_transform.h>
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <vespa/vespalib/util/classname.h>
+
+using namespace search::tensor;
+using vespalib::eval::Int8Float;
+using vespalib::BFloat16;
+using vespalib::eval::TypedCells;
+using search::attribute::DistanceMetric;
+
+size_t npos = std::string::npos;
+
+double run_calc(size_t iterations, TypedCells b, const BoundDistanceFunction & df) __attribute__((noinline));
+double run_calc_with_limit(size_t iterations, TypedCells b, const BoundDistanceFunction & df) __attribute__((noinline));
+
+double
+run_calc(size_t iterations, TypedCells b, const BoundDistanceFunction & df) {
+ vespalib::BenchmarkTimer timer(1.0);
+ double min_result = std::numeric_limits<double>::max();
+ while (timer.has_budget()) {
+ timer.before();
+ for (size_t i(0); i < iterations; i++) {
+ min_result = std::min(df.calc(b), min_result);
+ }
+ timer.after();
+ }
+ printf("%s::calc: Time used = %1.3f, min_result=%3.3f\n",
+ vespalib::getClassName(df).c_str(), timer.min_time(), min_result);
+ return min_result;
+}
+
+double
+run_calc_with_limit(size_t iterations, TypedCells b, const BoundDistanceFunction & df) {
+ vespalib::BenchmarkTimer timer(1.0);
+ double min_result = std::numeric_limits<double>::max();
+ while (timer.has_budget()) {
+ timer.before();
+ for (size_t i(0); i < iterations; i++) {
+ min_result = std::min(df.calc_with_limit(b, std::numeric_limits<double>::max()), min_result);
+ }
+ timer.after();
+ }
+
+ printf("%s::calc_with_limit: Time used = %1.3f, min_result=%3.3f\n",
+ vespalib::getClassName(df).c_str(), timer.min_time(), min_result);
+ return min_result;
+}
+
+template<typename T>
+void benchmark(size_t iterations, size_t elems) __attribute__((noinline));
+
+template<typename T>
+void benchmark(size_t iterations, size_t elems, const DistanceFunctionFactory & df) {
+ std::vector<T> av, bv;
+ srandom(7);
+ av.reserve(elems);
+ bv.reserve(elems);
+ for (size_t i(0); i < elems; i++) {
+ av.push_back(random()%128);
+ bv.push_back(random()%128);
+ }
+ TypedCells a_cells(av), b_cells(bv);
+
+ double calc_result = run_calc(iterations, b_cells, *df.for_query_vector(a_cells));
+ double calc_with_limit_result = run_calc_with_limit(iterations, b_cells, *df.for_query_vector(a_cells));
+ assert(calc_result == calc_with_limit_result);
+}
+
+template<typename T>
+void benchmark(size_t iterations, size_t elems, const std::string & dist_functions) {
+ if (dist_functions.find("euclid") != npos) {
+ benchmark<T>(iterations, elems, EuclideanDistanceFunctionFactory<T>());
+ }
+ if (dist_functions.find("angular") != npos) {
+ if constexpr ( ! std::is_same<T, BFloat16>()) {
+ benchmark<T>(iterations, elems, AngularDistanceFunctionFactory<T>());
+ }
+ }
+ if (dist_functions.find("prenorm") != npos) {
+ if constexpr ( ! std::is_same<T, BFloat16>()) {
+ benchmark<T>(iterations, elems, PrenormalizedAngularDistanceFunctionFactory<T>());
+ }
+ }
+ if (dist_functions.find("mips") != npos) {
+ if constexpr ( !std::is_same<T, BFloat16>()) {
+ benchmark<T>(iterations, elems, MipsDistanceFunctionFactory<T>());
+ }
+ }
+}
+
+void
+benchmark(size_t iterations, size_t elems, const std::string & dist_functions, const std::string & data_types) {
+ if (data_types.find("double") != npos) {
+ benchmark<double>(iterations, elems, dist_functions);
+ }
+ if (data_types.find("float32") != npos) {
+ benchmark<float>(iterations, elems, dist_functions);
+ }
+ if (data_types.find("bfloat16") != npos) {
+ benchmark<BFloat16>(iterations, elems, dist_functions);
+ }
+ if (data_types.find("float8") != npos) {
+ benchmark<Int8Float>(iterations, elems, dist_functions);
+ }
+}
+
+int
+main(int argc, char *argv[]) {
+ size_t num_iterations = 10000000;
+ size_t num_elems = 1024;
+ std::string dist_functions = "angular euclid prenorm mips";
+ std::string data_types = "double float32 bfloat16 float8";
+ if (argc > 1) { num_iterations = atol(argv[1]); }
+ if (argc > 2) { num_elems = atol(argv[2]); }
+ if (argc > 3) { dist_functions = argv[3]; }
+ if (argc > 4) { data_types = argv[4]; }
+
+ printf("Benchmarking %ld iterations with vector length %ld with distance functions '%s' for data types '%s'\n",
+ num_iterations, num_elems, dist_functions.c_str(), data_types.c_str());
+ benchmark(num_iterations, num_elems, dist_functions, data_types);
+
+ return 0;
+}
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index d50677314df..97b88bc787a 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -111,7 +111,7 @@ class MyBoundDistanceFunction : public BoundDistanceFunction {
std::unique_ptr<BoundDistanceFunction> _real;
public:
- MyBoundDistanceFunction(std::unique_ptr<BoundDistanceFunction> real)
+ explicit MyBoundDistanceFunction(std::unique_ptr<BoundDistanceFunction> real)
: _real(std::move(real))
{
}
@@ -147,19 +147,19 @@ class MyDistanceFunctionFactory : public DistanceFunctionFactory
{
std::unique_ptr<DistanceFunctionFactory> _real;
public:
- MyDistanceFunctionFactory(std::unique_ptr<DistanceFunctionFactory> real)
+ explicit MyDistanceFunctionFactory(std::unique_ptr<DistanceFunctionFactory> real)
: _real(std::move(real))
{
}
~MyDistanceFunctionFactory() override;
- std::unique_ptr<BoundDistanceFunction> for_query_vector(TypedCells lhs) override {
+ std::unique_ptr<BoundDistanceFunction> for_query_vector(TypedCells lhs) const override {
EXPECT_FALSE(lhs.non_existing_attribute_value());
return std::make_unique<MyBoundDistanceFunction>(_real->for_query_vector(lhs));
}
- std::unique_ptr<BoundDistanceFunction> for_insertion_vector(TypedCells lhs) override {
+ std::unique_ptr<BoundDistanceFunction> for_insertion_vector(TypedCells lhs) const override {
EXPECT_FALSE(lhs.non_existing_attribute_value());
return std::make_unique<MyBoundDistanceFunction>(_real->for_insertion_vector(lhs));
}
@@ -936,6 +936,36 @@ TYPED_TEST(HnswIndexTest, search_during_remove)
this->expect_top_3_by_docid("{0, 0}", {0, 0}, {7});
}
+TYPED_TEST(HnswIndexTest, inconsistent_index)
+{
+ this->init(false);
+ this->vectors.clear();
+ this->vectors.set(1, {1, 3}).set(2, {7, 1}).set(3, {6, 5}).set(4, {8, 3}).set(5, {10, 3});
+ this->add_document(1);
+ this->add_document(2);
+ this->add_document(3);
+ this->add_document(4);
+ this->add_document(5);
+ this->expect_entry_point(1, 0);
+ this->expect_level_0(1, {2, 3});
+ this->expect_level_0(2, {1, 3, 4, 5});
+ this->expect_level_0(3, {1, 2, 4});
+ this->expect_level_0(4, {2, 3, 5});
+ this->expect_level_0(5, {2, 4});
+ EXPECT_EQ(0, this->index->check_consistency(6));
+ // Remove vector for docid 5 but don't update index.
+ this->vectors.clear(5);
+ EXPECT_EQ(1, this->index->check_consistency(6));
+ /*
+ * Removing document 2 causes mutual reconnect for nodes [1, 3, 4, 5]
+ * where nodes 1 and 5 are not previously connected. Distance from
+ * node 1 to node 5 cannot be calculated due to missing vector.
+ */
+ this->remove_document(2);
+ // No reconnect for node without vector
+ this->expect_level_0(5, {4});
+}
+
using HnswMultiIndexTest = HnswIndexTest<HnswIndex<HnswIndexType::MULTI>>;
namespace {
diff --git a/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp b/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp
index e6944e257e9..5eb42bb8ac4 100644
--- a/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp
+++ b/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp
@@ -118,7 +118,7 @@ TEST_F(TokenExtractorTest, empty_string)
TEST_F(TokenExtractorTest, plain_string)
{
- EXPECT_EQ((Words{"Plain string"}), process(StringFieldValue("Plain string")));
+ EXPECT_EQ((Words{}), process(StringFieldValue("Plain string")));
}
TEST_F(TokenExtractorTest, normal_string)
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 5b17b491a20..635851f9f1d 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -11,7 +11,6 @@
#include "in_term_search.h"
#include "multi_term_or_filter_search.h"
#include "predicate_attribute.h"
-#include <vespa/eval/eval/value.h>
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h>
#include <vespa/searchlib/common/location.h>
@@ -94,6 +93,7 @@ using search::queryeval::StrictHeapOrSearch;
using search::queryeval::WeightedSetTermBlueprint;
using search::queryeval::flow::btree_cost;
using search::queryeval::flow::btree_strict_cost;
+using search::queryeval::flow::estimate_when_unknown;
using search::queryeval::flow::get_num_indirections;
using search::queryeval::flow::lookup_cost;
using search::queryeval::flow::lookup_strict_cost;
@@ -150,10 +150,9 @@ public:
search::queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
if (_hit_estimate.is_unknown()) {
// E.g. attributes without fast-search are not able to provide a hit estimate.
- // In this case we just assume matching half of the document corpus.
// In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict.
size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
- return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)};
+ return {estimate_when_unknown(), lookup_cost(indirections), lookup_strict_cost(indirections)};
} else {
double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit);
return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)};
@@ -443,7 +442,8 @@ private:
class DirectWandBlueprint : public queryeval::ComplexLeafBlueprint
{
private:
- mutable queryeval::SharedWeakAndPriorityQueue _scores;
+ using WeakAndPriorityQueue = queryeval::WeakAndPriorityQueue;
+ std::unique_ptr<WeakAndPriorityQueue> _scores;
const queryeval::wand::score_t _scoreThreshold;
double _thresholdBoostFactor;
const uint32_t _scoresAdjustFrequency;
@@ -452,14 +452,16 @@ private:
const IDocidWithWeightPostingStore &_attr;
vespalib::datastore::EntryRef _dictionary_snapshot;
+
public:
DirectWandBlueprint(const FieldSpec &field, const IDocidWithWeightPostingStore &attr, uint32_t scoresToTrack,
- queryeval::wand::score_t scoreThreshold, double thresholdBoostFactor, size_t size_hint)
+ queryeval::wand::score_t scoreThreshold, double thresholdBoostFactor, size_t size_hint,
+ bool thread_safe)
: ComplexLeafBlueprint(field),
- _scores(scoresToTrack),
+ _scores(WeakAndPriorityQueue::createHeap(scoresToTrack, thread_safe)),
_scoreThreshold(scoreThreshold),
_thresholdBoostFactor(thresholdBoostFactor),
- _scoresAdjustFrequency(queryeval::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
+ _scoresAdjustFrequency(queryeval::wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
_weights(),
_terms(),
_attr(attr),
@@ -496,7 +498,7 @@ public:
using OrFlow = search::queryeval::OrFlow;
using MyAdapter = attribute::DirectPostingStoreFlowStatsAdapter;
double child_est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms);
- double my_est = abs_to_rel_est(_scores.getScoresToTrack(), docid_limit);
+ double my_est = abs_to_rel_est(_scores->getScoresToTrack(), docid_limit);
double est = (child_est + my_est) / 2.0;
return {est, OrFlow::cost_of(MyAdapter(docid_limit), _terms, false),
OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())};
@@ -508,9 +510,8 @@ public:
return std::make_unique<queryeval::EmptySearch>();
}
return queryeval::ParallelWeakAndSearch::create(*tfmda[0],
- queryeval::ParallelWeakAndSearch::MatchParams(_scores, _scoreThreshold,
- _thresholdBoostFactor, _scoresAdjustFrequency)
- .setDocIdLimit(get_docid_limit()),
+ queryeval::ParallelWeakAndSearch::MatchParams(*_scores, _scoreThreshold, _thresholdBoostFactor,
+ _scoresAdjustFrequency, get_docid_limit()),
_weights, _terms, _attr, strict());
}
std::unique_ptr<SearchIterator> createFilterSearch(FilterConstraint constraint) const override;
@@ -712,15 +713,12 @@ public:
void visit(query::WandTerm &n) override {
if (has_always_btree_iterators_with_docid_and_weight()) {
- auto *bp = new DirectWandBlueprint(_field, *_dwwps,
- n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
- n.getNumTerms());
+ auto *bp = new DirectWandBlueprint(_field, *_dwwps, n.getTargetNumHits(), n.getScoreThreshold(),
+ n.getThresholdBoostFactor(), n.getNumTerms(), is_search_multi_threaded());
createDirectMultiTerm(bp, n);
} else {
- auto *bp = new ParallelWeakAndBlueprint(_field,
- n.getTargetNumHits(),
- n.getScoreThreshold(),
- n.getThresholdBoostFactor());
+ auto *bp = new ParallelWeakAndBlueprint(_field, n.getTargetNumHits(), n.getScoreThreshold(),
+ n.getThresholdBoostFactor(), is_search_multi_threaded());
createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
index e2928710a32..ac6fc6f603a 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
@@ -16,15 +16,18 @@ struct AttributeBlueprintParams
double global_filter_upper_limit;
double target_hits_max_adjustment_factor;
vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm;
+ double weakand_range;
AttributeBlueprintParams(double global_filter_lower_limit_in,
double global_filter_upper_limit_in,
double target_hits_max_adjustment_factor_in,
- vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in)
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in,
+ double weakand_range_in)
: global_filter_lower_limit(global_filter_lower_limit_in),
global_filter_upper_limit(global_filter_upper_limit_in),
target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in),
- fuzzy_matching_algorithm(fuzzy_matching_algorithm_in)
+ fuzzy_matching_algorithm(fuzzy_matching_algorithm_in),
+ weakand_range(weakand_range_in)
{
}
@@ -32,7 +35,8 @@ struct AttributeBlueprintParams
: AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE,
fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE,
fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE,
- fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE)
+ fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE,
+ fef::indexproperties::temporary::WeakAndRange::DEFAULT_VALUE)
{
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
index 928023c0f94..e2d7f0fe312 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -132,7 +132,7 @@ AttributeWeightedSetBlueprint::calculate_flow_stats(uint32_t docid_limit) const
using MyAdapter = attribute::HitEstimateFlowStatsAdapter;
size_t num_indirections = queryeval::flow::get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
double est = OrFlow::estimate_of(MyAdapter(docid_limit, num_indirections), _estimates);
- return {est, OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, false),
+ return {est, queryeval::flow::reverse_hash_lookup(),
OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
index e20d02afe50..6762c0516b2 100644
--- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
@@ -3,6 +3,7 @@
#include "bitvector_search_cache.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/util/memoryusage.h>
#include <mutex>
namespace search::attribute {
@@ -10,6 +11,7 @@ namespace search::attribute {
BitVectorSearchCache::BitVectorSearchCache()
: _mutex(),
_size(0),
+ _entries_extra_memory_usage(0),
_cache()
{}
@@ -18,9 +20,19 @@ BitVectorSearchCache::~BitVectorSearchCache() = default;
void
BitVectorSearchCache::insert(const vespalib::string &term, std::shared_ptr<Entry> entry)
{
+ size_t entry_extra_memory_usage = 0;
+ if (entry) {
+ entry_extra_memory_usage = sizeof(Entry);
+ if (entry->bitVector) {
+ entry_extra_memory_usage += entry->bitVector->getFileBytes();
+ }
+ }
std::unique_lock guard(_mutex);
- _cache.insert(std::make_pair(term, std::move(entry)));
+ auto ins_res = _cache.insert(std::make_pair(term, std::move(entry)));
_size.store(_cache.size());
+ if (ins_res.second) {
+ _entries_extra_memory_usage += entry_extra_memory_usage;
+ }
}
std::shared_ptr<BitVectorSearchCache::Entry>
@@ -36,12 +48,25 @@ BitVectorSearchCache::find(const vespalib::string &term) const
return {};
}
+vespalib::MemoryUsage
+BitVectorSearchCache::get_memory_usage() const
+{
+ std::lock_guard guard(_mutex);
+ size_t cache_memory_consumption = _cache.getMemoryConsumption();
+ size_t cache_memory_used = _cache.getMemoryUsed();
+ size_t self_memory_used = sizeof(BitVectorSearchCache) - sizeof(_cache);
+ size_t allocated = self_memory_used + cache_memory_consumption + _entries_extra_memory_usage;
+ size_t used = self_memory_used + cache_memory_used + _entries_extra_memory_usage;
+ return vespalib::MemoryUsage(allocated, used, 0, 0);
+}
+
void
BitVectorSearchCache::clear()
{
std::unique_lock guard(_mutex);
_cache.clear();
_size.store(0ul, std::memory_order_relaxed);
+ _entries_extra_memory_usage = 0;
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
index 233f8315aaf..3a38cdcea26 100644
--- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
+++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
@@ -10,6 +10,8 @@
#include <atomic>
namespace search { class BitVector; }
+namespace vespalib { class MemoryUsage; }
+
namespace search::attribute {
/**
@@ -37,6 +39,7 @@ private:
mutable std::shared_mutex _mutex;
std::atomic<uint64_t> _size;
+ size_t _entries_extra_memory_usage;
Cache _cache;
public:
@@ -45,6 +48,7 @@ public:
void insert(const vespalib::string &term, std::shared_ptr<Entry> entry);
std::shared_ptr<Entry> find(const vespalib::string &term) const;
size_t size() const { return _size.load(std::memory_order_relaxed); }
+ vespalib::MemoryUsage get_memory_usage() const;
void clear();
};
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
index 029dc155785..f6a33165f0c 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
@@ -3,6 +3,7 @@
#include "imported_attribute_vector.h"
#include "imported_attribute_vector_read_guard.h"
#include "imported_search_context.h"
+#include <vespa/vespalib/util/memoryusage.h>
namespace search::attribute {
@@ -58,4 +59,15 @@ void ImportedAttributeVector::clearSearchCache() {
}
}
+vespalib::MemoryUsage
+ImportedAttributeVector::get_memory_usage() const
+{
+ constexpr auto self_memory_usage = sizeof(ImportedAttributeVector);
+ vespalib::MemoryUsage result(self_memory_usage, self_memory_usage, 0, 0);
+ if (_search_cache) {
+ result.merge(_search_cache->get_memory_usage());
+ }
+ return result;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
index bd018df5273..5b68957b7f5 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
@@ -6,6 +6,8 @@
#include <vespa/searchcommon/attribute/i_document_meta_store_context.h>
#include <vespa/vespalib/stllike/string.h>
+namespace vespalib { class MemoryUsage; }
+
namespace search::attribute {
class BitVectorSearchCache;
@@ -62,6 +64,7 @@ public:
std::unique_ptr<AttributeReadGuard> makeReadGuard(bool stableEnumGuard) const override;
virtual std::unique_ptr<AttributeReadGuard> makeReadGuard(std::shared_ptr<MetaStoreReadGuard> targetMetaStoreReadGuard, bool stableEnumGuard) const;
+ vespalib::MemoryUsage get_memory_usage() const;
protected:
vespalib::string _name;
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index 0690801ee61..27c2b6d5e41 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -26,6 +26,8 @@ vespa_add_library(searchlib_features OBJECT
fieldmatchfeature.cpp
fieldtermmatchfeature.cpp
firstphasefeature.cpp
+ first_phase_rank_feature.cpp
+ first_phase_rank_lookup.cpp
flow_completeness_feature.cpp
foreachfeature.cpp
freshnessfeature.cpp
@@ -57,6 +59,7 @@ vespa_add_library(searchlib_features OBJECT
rankingexpressionfeature.cpp
raw_score_feature.cpp
reverseproximityfeature.cpp
+ second_phase_feature.cpp
setup.cpp
subqueries_feature.cpp
tensor_attribute_executor.cpp
diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
index 505b8166ee7..03d2e94b5d0 100644
--- a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
@@ -68,7 +68,7 @@ Bm25Executor::Bm25Executor(const fef::FieldInfo& field,
}
double
-Bm25Executor::calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count)
+Bm25Executor::calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) noexcept
{
return std::log(1 + (static_cast<double>(total_doc_count - matching_doc_count + 0.5) /
static_cast<double>(matching_doc_count + 0.5)));
diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.h b/searchlib/src/vespa/searchlib/features/bm25_feature.h
index a1b45375285..637d656990b 100644
--- a/searchlib/src/vespa/searchlib/features/bm25_feature.h
+++ b/searchlib/src/vespa/searchlib/features/bm25_feature.h
@@ -39,7 +39,7 @@ public:
double k1_param,
double b_param);
- double static calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count);
+ double static calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) noexcept;
void handle_bind_match_data(const fef::MatchData& match_data) override;
void execute(uint32_t docId) override;
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp
new file mode 100644
index 00000000000..5c8a9a391ff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp
@@ -0,0 +1,71 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "first_phase_rank_feature.h"
+#include "valuefeature.h"
+#include <vespa/vespalib/util/stash.h>
+
+namespace search::features {
+
+FirstPhaseRankExecutor::FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup)
+ : FeatureExecutor(),
+ _lookup(lookup)
+{
+}
+FirstPhaseRankExecutor::~FirstPhaseRankExecutor() = default;
+
+void
+FirstPhaseRankExecutor::execute(uint32_t docid)
+{
+ outputs().set_number(0, _lookup.lookup(docid));
+}
+
+FirstPhaseRankBlueprint::FirstPhaseRankBlueprint()
+ : Blueprint("firstPhaseRank")
+{
+}
+
+FirstPhaseRankBlueprint::~FirstPhaseRankBlueprint() = default;
+
+void
+FirstPhaseRankBlueprint::visitDumpFeatures(const fef::IIndexEnvironment&, fef::IDumpFeatureVisitor&) const
+{
+}
+
+std::unique_ptr<fef::Blueprint>
+FirstPhaseRankBlueprint::createInstance() const
+{
+ return std::make_unique<FirstPhaseRankBlueprint>();
+}
+
+fef::ParameterDescriptions
+FirstPhaseRankBlueprint::getDescriptions() const
+{
+ return fef::ParameterDescriptions().desc();
+}
+
+bool
+FirstPhaseRankBlueprint::setup(const fef::IIndexEnvironment&, const fef::ParameterList&)
+{
+ describeOutput("score", "The first phase rank.");
+ return true;
+}
+
+void
+FirstPhaseRankBlueprint::prepareSharedState(const fef::IQueryEnvironment&, fef::IObjectStore& store) const
+{
+ FirstPhaseRankLookup::make_shared_state(store);
+}
+
+fef::FeatureExecutor&
+FirstPhaseRankBlueprint::createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const
+{
+ const auto* lookup = FirstPhaseRankLookup::get_shared_state(env.getObjectStore());
+ if (lookup != nullptr) {
+ return stash.create<FirstPhaseRankExecutor>(*lookup);
+ } else {
+ std::vector<feature_t> values{std::numeric_limits<feature_t>::max()};
+ return stash.create<ValueExecutor>(values);
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h
new file mode 100644
index 00000000000..f90ea26f859
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h
@@ -0,0 +1,40 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "first_phase_rank_lookup.h"
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search::features {
+
+class FirstPhaseRankLookup;
+
+/*
+ * Executor for first phase rank feature that outputs the first phase rank
+ * for the given docid on this search node (1.0, 2.0, 3.0, etc.).
+ */
+class FirstPhaseRankExecutor : public fef::FeatureExecutor {
+ const FirstPhaseRankLookup& _lookup;
+public:
+ FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup);
+ ~FirstPhaseRankExecutor() override;
+ void execute(uint32_t docid) override;
+};
+
+/*
+ * Blueprint for first phase rank feature.
+ */
+class FirstPhaseRankBlueprint : public fef::Blueprint {
+public:
+ FirstPhaseRankBlueprint();
+ ~FirstPhaseRankBlueprint() override;
+ void visitDumpFeatures(const fef::IIndexEnvironment& env, fef::IDumpFeatureVisitor& visitor) const override;
+ std::unique_ptr<fef::Blueprint> createInstance() const override;
+ fef::ParameterDescriptions getDescriptions() const override;
+ bool setup(const fef::IIndexEnvironment& env, const fef::ParameterList& params) override;
+ void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override;
+ fef::FeatureExecutor& createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp
new file mode 100644
index 00000000000..2dfaabb8326
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp
@@ -0,0 +1,67 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "first_phase_rank_lookup.h"
+#include <vespa/searchlib/fef/objectstore.h>
+#include <cassert>
+#include <limits>
+
+using search::fef::AnyWrapper;
+
+namespace search::features {
+
+namespace {
+
+const vespalib::string key = "firstPhaseRankLookup";
+
+}
+
+FirstPhaseRankLookup::FirstPhaseRankLookup()
+ : _map()
+{
+}
+
+FirstPhaseRankLookup::FirstPhaseRankLookup(FirstPhaseRankLookup&&) = default;
+
+FirstPhaseRankLookup::~FirstPhaseRankLookup() = default;
+
+feature_t
+FirstPhaseRankLookup::lookup(uint32_t docid) const noexcept
+{
+ auto itr = _map.find(docid);
+ if (itr != _map.end()) [[likely]] {
+ return itr->second;
+ } else {
+ return std::numeric_limits<feature_t>::max();
+ }
+}
+
+void
+FirstPhaseRankLookup::add(uint32_t docid, uint32_t rank)
+{
+ auto insres = _map.insert(std::make_pair(docid, rank));
+ assert(insres.second);
+}
+
+void
+FirstPhaseRankLookup::make_shared_state(fef::IObjectStore& store)
+{
+ if (store.get(key) == nullptr) {
+ store.add(key, std::make_unique<AnyWrapper<FirstPhaseRankLookup>>(FirstPhaseRankLookup()));
+ }
+}
+
+FirstPhaseRankLookup*
+FirstPhaseRankLookup::get_mutable_shared_state(fef::IObjectStore& store)
+{
+ auto* wrapper = dynamic_cast<AnyWrapper<FirstPhaseRankLookup>*>(store.get_mutable(key));
+ return (wrapper == nullptr) ? nullptr : &wrapper->getValue();
+}
+
+const FirstPhaseRankLookup*
+FirstPhaseRankLookup::get_shared_state(const fef::IObjectStore& store)
+{
+ const auto* wrapper = dynamic_cast<const AnyWrapper<FirstPhaseRankLookup>*>(store.get(key));
+ return (wrapper == nullptr) ? nullptr : &wrapper->getValue();
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h
new file mode 100644
index 00000000000..83d89ed2dd1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h
@@ -0,0 +1,32 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search::fef { class IObjectStore; }
+
+namespace search::features {
+
+/*
+ * This class contains a mapping from docids used by second phase to
+ * first phase rank.
+ */
+class FirstPhaseRankLookup {
+ vespalib::hash_map<uint32_t, uint32_t> _map;
+public:
+ FirstPhaseRankLookup();
+ FirstPhaseRankLookup(const FirstPhaseRankLookup&) = delete;
+ FirstPhaseRankLookup(FirstPhaseRankLookup&&);
+ ~FirstPhaseRankLookup();
+ FirstPhaseRankLookup& operator=(const FirstPhaseRankLookup&) = delete;
+ FirstPhaseRankLookup& operator=(FirstPhaseRankLookup&&) = delete;
+ feature_t lookup(uint32_t docid) const noexcept;
+ void add(uint32_t docid, uint32_t rank);
+ static void make_shared_state(fef::IObjectStore& store);
+ static FirstPhaseRankLookup* get_mutable_shared_state(fef::IObjectStore& store);
+ static const FirstPhaseRankLookup* get_shared_state(const fef::IObjectStore& store);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/second_phase_feature.cpp b/searchlib/src/vespa/searchlib/features/second_phase_feature.cpp
new file mode 100644
index 00000000000..82ce36be859
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/second_phase_feature.cpp
@@ -0,0 +1,57 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "second_phase_feature.h"
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/vespalib/util/stash.h>
+
+using namespace search::fef;
+
+namespace search::features {
+
+void
+SecondPhaseExecutor::execute(uint32_t)
+{
+ outputs().set_number(0, inputs().get_number(0));
+}
+
+
+SecondPhaseBlueprint::SecondPhaseBlueprint()
+ : Blueprint("secondPhase")
+{
+}
+
+void
+SecondPhaseBlueprint::visitDumpFeatures(const IIndexEnvironment&,
+ IDumpFeatureVisitor&) const
+{
+}
+
+Blueprint::UP
+SecondPhaseBlueprint::createInstance() const
+{
+ return std::make_unique<SecondPhaseBlueprint>();
+}
+
+bool
+SecondPhaseBlueprint::setup(const IIndexEnvironment& env,
+ const ParameterList&)
+{
+ if (auto maybe_input = defineInput(indexproperties::rank::SecondPhase::lookup(env.getProperties()),
+ AcceptInput::ANY))
+ {
+ describeOutput("score", "The ranking score for second phase.", maybe_input.value());
+ return true;
+ } else {
+ return false;
+ }
+}
+
+FeatureExecutor &
+SecondPhaseBlueprint::createExecutor(const IQueryEnvironment&, vespalib::Stash& stash) const
+{
+ return stash.create<SecondPhaseExecutor>();
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/second_phase_feature.h b/searchlib/src/vespa/searchlib/features/second_phase_feature.h
new file mode 100644
index 00000000000..61805186453
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/second_phase_feature.h
@@ -0,0 +1,35 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+
+namespace search::features {
+
+/**
+ * Implements the executor outputting the second phase ranking.
+ */
+class SecondPhaseExecutor : public fef::FeatureExecutor {
+public:
+ bool isPure() override { return true; }
+ void execute(uint32_t docId) override;
+};
+
+/**
+ * Implements the blueprint for the second phase feature.
+ */
+class SecondPhaseBlueprint : public fef::Blueprint {
+public:
+ SecondPhaseBlueprint();
+ void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
+ fef::Blueprint::UP createInstance() const override;
+
+ fef::ParameterDescriptions getDescriptions() const override {
+ return fef::ParameterDescriptions().desc();
+ }
+ bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+
+ fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index 71e083e2326..d65459817f0 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -22,6 +22,7 @@
#include "fieldmatchfeature.h"
#include "fieldtermmatchfeature.h"
#include "firstphasefeature.h"
+#include "first_phase_rank_feature.h"
#include "flow_completeness_feature.h"
#include "foreachfeature.h"
#include "freshnessfeature.h"
@@ -48,6 +49,7 @@
#include "rankingexpressionfeature.h"
#include "raw_score_feature.h"
#include "reverseproximityfeature.h"
+#include "second_phase_feature.h"
#include "subqueries_feature.h"
#include "tensor_from_labels_feature.h"
#include "tensor_from_weighted_set_feature.h"
@@ -90,6 +92,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<FieldMatchBlueprint>());
registry.addPrototype(std::make_shared<FieldTermMatchBlueprint>());
registry.addPrototype(std::make_shared<FirstPhaseBlueprint>());
+ registry.addPrototype(std::make_shared<FirstPhaseRankBlueprint>());
registry.addPrototype(std::make_shared<FlowCompletenessBlueprint>());
registry.addPrototype(std::make_shared<ForeachBlueprint>());
registry.addPrototype(std::make_shared<FreshnessBlueprint>());
@@ -109,6 +112,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<RandomNormalBlueprint>());
registry.addPrototype(std::make_shared<RandomNormalStableBlueprint>());
registry.addPrototype(std::make_shared<RawScoreBlueprint>());
+ registry.addPrototype(std::make_shared<SecondPhaseBlueprint>());
registry.addPrototype(std::make_shared<SubqueriesBlueprint>());
registry.addPrototype(std::make_shared<TensorFromLabelsBlueprint>());
registry.addPrototype(std::make_shared<TensorFromWeightedSetBlueprint>());
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 4637ad5a4e8..1f88c34bef3 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -179,6 +179,21 @@ namespace onsummary {
namespace temporary {
+const vespalib::string WeakAndRange::NAME("vespa.weakand.range");
+const double WeakAndRange::DEFAULT_VALUE(0.0);
+
+double
+WeakAndRange::lookup(const Properties &props)
+{
+ return lookup(props, DEFAULT_VALUE);
+}
+
+double
+WeakAndRange::lookup(const Properties &props, double defaultValue)
+{
+ return lookupDouble(props, NAME, defaultValue);
+}
+
}
namespace mutate {
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index db8de8209a9..d047eb13347 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -178,6 +178,18 @@ namespace mutate {
// Add temporary flags used for safe rollout of new features here
namespace temporary {
+/**
+ * A number in the range [0,1] for the effective idf range for WeakAndOperator.
+ * 1.0 will give the complete range as used by default by bm25.
+ * scaled_idf = (1.0 - range) * max_idf + (range * idf)
+ * 0.0 which is default gives default legacy behavior.
+ **/
+struct WeakAndRange {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ static double lookup(const Properties &props, double defaultValue);
+};
}
namespace mutate::on_match {
diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.cpp b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
index 3e5baf49116..a90702a88a6 100644
--- a/searchlib/src/vespa/searchlib/fef/objectstore.cpp
+++ b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
@@ -35,4 +35,11 @@ ObjectStore::get(const vespalib::string & key) const
return (found != _objectMap.end()) ? found->second : NULL;
}
+Anything *
+ObjectStore::get_mutable(const vespalib::string& key)
+{
+ auto found = _objectMap.find(key);
+ return (found != _objectMap.end()) ? found->second : nullptr;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.h b/searchlib/src/vespa/searchlib/fef/objectstore.h
index 9d1671e521c..d2d768ee338 100644
--- a/searchlib/src/vespa/searchlib/fef/objectstore.h
+++ b/searchlib/src/vespa/searchlib/fef/objectstore.h
@@ -24,6 +24,7 @@ class AnyWrapper : public Anything
public:
explicit AnyWrapper(T value) : _value(std::move(value)) { }
const T & getValue() const { return _value; }
+ T& getValue() { return _value; }
static const T & getValue(const Anything & any) { return static_cast<const AnyWrapper &>(any).getValue(); }
private:
T _value;
@@ -38,6 +39,7 @@ public:
virtual ~IObjectStore() = default;
virtual void add(const vespalib::string & key, Anything::UP value) = 0;
virtual const Anything * get(const vespalib::string & key) const = 0;
+ virtual Anything* get_mutable(const vespalib::string& key) = 0;
};
/**
@@ -50,6 +52,7 @@ public:
~ObjectStore() override;
void add(const vespalib::string & key, Anything::UP value) override;
const Anything * get(const vespalib::string & key) const override;
+ Anything* get_mutable(const vespalib::string & key) override;
private:
using ObjectMap = vespalib::hash_map<vespalib::string, Anything *>;
ObjectMap _objectMap;
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index aadc5300ede..ba5abb35141 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -71,6 +71,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_global_filter_lower_limit(0.0),
_global_filter_upper_limit(1.0),
_target_hits_max_adjustment_factor(20.0),
+ _weakand_range(0.0),
_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::DfaTable),
_mutateOnMatch(),
_mutateOnFirstPhase(),
@@ -126,6 +127,7 @@ RankSetup::configure()
set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties()));
set_target_hits_max_adjustment_factor(matching::TargetHitsMaxAdjustmentFactor::lookup(_indexEnv.getProperties()));
set_fuzzy_matching_algorithm(matching::FuzzyAlgorithm::lookup(_indexEnv.getProperties()));
+ set_weakand_range(temporary::WeakAndRange::lookup(_indexEnv.getProperties()));
_mutateOnMatch._attribute = mutate::on_match::Attribute::lookup(_indexEnv.getProperties());
_mutateOnMatch._operation = mutate::on_match::Operation::lookup(_indexEnv.getProperties());
_mutateOnFirstPhase._attribute = mutate::on_first_phase::Attribute::lookup(_indexEnv.getProperties());
@@ -193,7 +195,7 @@ RankSetup::compile()
_firstPhaseRankFeature = parser.featureName();
_first_phase_resolver->addSeed(_firstPhaseRankFeature);
} else {
- vespalib::string e = fmt("invalid feature name for initial rank: '%s'", _firstPhaseRankFeature.c_str());
+ vespalib::string e = fmt("invalid feature name for first phase rank: '%s'", _firstPhaseRankFeature.c_str());
_warnings.emplace_back(e);
_compileError = true;
}
@@ -204,7 +206,7 @@ RankSetup::compile()
_secondPhaseRankFeature = parser.featureName();
_second_phase_resolver->addSeed(_secondPhaseRankFeature);
} else {
- vespalib::string e = fmt("invalid feature name for final rank: '%s'", _secondPhaseRankFeature.c_str());
+ vespalib::string e = fmt("invalid feature name for second phase rank: '%s'", _secondPhaseRankFeature.c_str());
_warnings.emplace_back(e);
_compileError = true;
}
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index d8b977a0331..f20ecd4b42b 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -80,6 +80,7 @@ private:
double _global_filter_lower_limit;
double _global_filter_upper_limit;
double _target_hits_max_adjustment_factor;
+ double _weakand_range;
vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm;
MutateOperation _mutateOnMatch;
MutateOperation _mutateOnFirstPhase;
@@ -402,6 +403,8 @@ public:
double get_target_hits_max_adjustment_factor() const { return _target_hits_max_adjustment_factor; }
void set_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm v) { _fuzzy_matching_algorithm = v; }
vespalib::FuzzyMatchingAlgorithm get_fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; }
+ void set_weakand_range(double v) { _weakand_range = v; }
+ double get_weakand_range() const { return _weakand_range; }
/**
* This method may be used to indicate that certain features
diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
index 3645496e4fb..41551ac1062 100644
--- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
+++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
@@ -10,11 +10,11 @@ LOG_SETUP(".fef.matchdatabuilder");
namespace search::fef::test {
-MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) :
- _queryEnv(queryEnv),
- _data(data),
- _index(),
- _match()
+MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data)
+ : _queryEnv(queryEnv),
+ _data(data),
+ _index(),
+ _match()
{
// reset all match data objects.
for (TermFieldHandle handle = 0; handle < _data.getNumTermFields(); ++handle) {
@@ -22,7 +22,7 @@ MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data)
}
}
-MatchDataBuilder::~MatchDataBuilder() {}
+MatchDataBuilder::~MatchDataBuilder() = default;
TermFieldMatchData *
MatchDataBuilder::getTermFieldMatchData(uint32_t termId, uint32_t fieldId)
@@ -59,7 +59,7 @@ MatchDataBuilder::addElement(const vespalib::string &fieldName, int32_t weight,
LOG(error, "Field '%s' does not exist.", fieldName.c_str());
return false;
}
- _index[info->id()].elements.push_back(MyElement(weight, length));
+ _index[info->id()].elements.emplace_back(weight, length);
return true;
}
@@ -77,8 +77,7 @@ MatchDataBuilder::addOccurence(const vespalib::string &fieldName, uint32_t termI
}
const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id());
if (tfd == nullptr) {
- LOG(error, "Field '%s' is not searched by the given term.",
- fieldName.c_str());
+ LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str());
return false;
}
_match[termId][info->id()].insert(Position(pos, element));
@@ -99,14 +98,13 @@ MatchDataBuilder::setWeight(const vespalib::string &fieldName, uint32_t termId,
}
const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id());
if (tfd == nullptr) {
- LOG(error, "Field '%s' is not searched by the given term.",
- fieldName.c_str());
+ LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str());
return false;
}
uint32_t eid = _index[info->id()].elements.size();
_match[termId][info->id()].clear();
_match[termId][info->id()].insert(Position(0, eid));
- _index[info->id()].elements.push_back(MyElement(weight, 1));
+ _index[info->id()].elements.emplace_back(weight, 1);
return true;
}
@@ -142,19 +140,13 @@ MatchDataBuilder::apply(uint32_t docId)
// For each occurence of that term, in that field, do
for (const auto& occ : field_elem.second) {
// Append a term match position to the term match data.
- match->appendPosition(TermFieldMatchDataPosition(
- occ.eid,
- occ.pos,
- field.getWeight(occ.eid),
- field.getLength(occ.eid)));
- LOG(debug,
- "Added occurence of term '%u' in field '%s'"
- " at position '%u'.",
+ match->appendPosition(TermFieldMatchDataPosition(occ.eid, occ.pos,
+ field.getWeight(occ.eid),
+ field.getLength(occ.eid)));
+ LOG(debug, "Added occurence of term '%u' in field '%s' at position '%u'.",
termId, name.c_str(), occ.pos);
if (occ.pos >= field.getLength(occ.eid)) {
- LOG(warning,
- "Added occurence of term '%u' in field '%s'"
- " at position '%u' >= fieldLen '%u'.",
+ LOG(warning, "Added occurence of term '%u' in field '%s' at position '%u' >= fieldLen '%u'.",
termId, name.c_str(), occ.pos, field.getLength(occ.eid));
}
}
diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
index 0e5025efd37..753e1596520 100644
--- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
+++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
@@ -13,7 +13,7 @@ public:
struct MyElement {
int32_t weight;
uint32_t length;
- MyElement(int32_t w, uint32_t l) : weight(w), length(l) {}
+ MyElement(int32_t w, uint32_t l) noexcept : weight(w), length(l) {}
};
struct MyField {
uint32_t fieldLength;
@@ -21,7 +21,7 @@ public:
MyField() : fieldLength(0), elements() {}
MyElement &getElement(uint32_t eid) {
while (elements.size() <= eid) {
- elements.push_back(MyElement(0, 0));
+ elements.emplace_back(0, 0);
}
return elements[eid];
}
@@ -68,6 +68,8 @@ public:
* @param data The match data to build in.
*/
MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data);
+ MatchDataBuilder(const MatchDataBuilder &) = delete;
+ MatchDataBuilder & operator=(const MatchDataBuilder &) = delete;
~MatchDataBuilder();
/**
@@ -133,10 +135,6 @@ public:
bool apply(uint32_t docId);
private:
- MatchDataBuilder(const MatchDataBuilder &); // hide
- MatchDataBuilder & operator=(const MatchDataBuilder &); // hide
-
-private:
QueryEnvironment &_queryEnv;
MatchData &_data;
IndexData _index;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index 2bc94073c92..49a0f0621d2 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -213,6 +213,17 @@ FieldIndex<interleaved_features>::getMemoryUsage() const
}
template <bool interleaved_features>
+void
+FieldIndex<interleaved_features>::commit()
+{
+ _remover.flush();
+ freeze();
+ assign_generation();
+ incGeneration();
+ reclaim_memory();
+}
+
+template <bool interleaved_features>
queryeval::SearchIterator::UP
FieldIndex<interleaved_features>::make_search_iterator(const vespalib::string& term,
uint32_t field_id,
@@ -248,7 +259,7 @@ public:
: SimpleLeafBlueprint(field),
_guard(),
_field(field),
- _posting_itr(posting_itr),
+ _posting_itr(std::move(posting_itr)),
_feature_store(feature_store),
_field_id(field_id),
_query_term(query_term),
@@ -302,7 +313,7 @@ FieldIndex<interleaved_features>::make_term_blueprint(const vespalib::string& te
auto posting_itr = findFrozen(term);
bool use_bit_vector = field.isFilter();
return std::make_unique<MemoryTermBlueprint<interleaved_features>>
- (std::move(guard), posting_itr, getFeatureStore(), field, field_id, term, use_bit_vector);
+ (std::move(guard), std::move(posting_itr), getFeatureStore(), field, field_id, term, use_bit_vector);
}
template class FieldIndex<false>;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index 0b245300a7b..18e60cf2194 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -87,13 +87,7 @@ public:
vespalib::MemoryUsage getMemoryUsage() const override;
PostingListStore &getPostingListStore() { return _postingListStore; }
- void commit() override {
- _remover.flush();
- freeze();
- assign_generation();
- incGeneration();
- reclaim_memory();
- }
+ void commit() override;
/**
* Should only by used by unit tests.
diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
index 09fc443cf0e..9e46df57416 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
+++ b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
@@ -49,7 +49,7 @@ template <typename T>
struct FloatDecoder {
static T fromstr(const char * q, const char * qend, const char ** end) noexcept {
T v(0);
-#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 180000
+#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 190000
vespalib::string tmp(q, qend - q);
char* tmp_end = nullptr;
const char *tmp_cstring = tmp.c_str();
diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
index 51fe2d12637..126ecd56dc2 100644
--- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
@@ -21,6 +21,7 @@ vespa_add_library(searchlib_queryeval OBJECT
fake_searchable.cpp
field_spec.cpp
filter_wrapper.cpp
+ first_phase_rescorer.cpp
flow.cpp
full_search.cpp
get_weight_from_node.cpp
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 7334db4b716..c02990c5921 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -1,14 +1,15 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "blueprint.h"
-#include "leaf_blueprints.h"
+#include "andnotsearch.h"
+#include "andsearch.h"
#include "emptysearch.h"
-#include "full_search.h"
#include "field_spec.hpp"
-#include "andsearch.h"
-#include "orsearch.h"
-#include "andnotsearch.h"
+#include "flow_tuning.h"
+#include "full_search.h"
+#include "leaf_blueprints.h"
#include "matching_elements_search.h"
+#include "orsearch.h"
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <vespa/vespalib/objects/objectdumper.h>
@@ -168,31 +169,6 @@ Blueprint::null_plan(InFlow in_flow, uint32_t docid_limit)
sort(in_flow);
}
-double
-Blueprint::estimate_actual_cost(InFlow in_flow) const noexcept
-{
- double res = estimate_strict_cost_diff(in_flow);
- if (in_flow.strict()) {
- res += strict_cost();
- } else {
- res += in_flow.rate() * cost();
- }
- return res;
-}
-
-double
-Blueprint::estimate_strict_cost_diff(InFlow &in_flow) const noexcept
-{
- if (in_flow.strict()) {
- REQUIRE(strict());
- } else if (strict()) {
- double rate = in_flow.rate();
- in_flow.force_strict();
- return flow::strict_cost_diff(estimate(), rate);
- }
- return 0.0;
-}
-
Blueprint::UP
Blueprint::optimize(Blueprint::UP bp) {
Blueprint *root = bp.release();
@@ -238,7 +214,7 @@ Blueprint::default_flow_stats(uint32_t docid_limit, uint32_t abs_est, size_t chi
FlowStats
Blueprint::default_flow_stats(size_t child_cnt)
{
- return {0.5, 1.0 + child_cnt, 1.0 + child_cnt};
+ return {flow::estimate_when_unknown(), 1.0 + child_cnt, 1.0 + child_cnt};
}
std::unique_ptr<MatchingElementsSearch>
@@ -278,8 +254,8 @@ create_op_filter(const Blueprint::Children &children, bool strict, Blueprint::Fi
MultiSearch::Children list;
std::unique_ptr<SearchIterator> spare;
list.reserve(children.size());
- for (size_t i = 0; i < children.size(); ++i) {
- auto filter = children[i]->createFilterSearch(constraint);
+ for (const auto & child : children) {
+ auto filter = child->createFilterSearch(constraint);
auto matches_any = filter->matches_any();
if (should_short_circuit<Op>(matches_any)) {
return filter;
@@ -623,24 +599,6 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double
return (count_termwise_nodes(unpack) > 1);
}
-double
-IntermediateBlueprint::estimate_self_cost(InFlow) const noexcept
-{
- return 0.0;
-}
-
-double
-IntermediateBlueprint::estimate_actual_cost(InFlow in_flow) const noexcept
-{
- double res = estimate_strict_cost_diff(in_flow);
- auto cost_of = [](const auto &child, InFlow child_flow)noexcept{
- return child->estimate_actual_cost(child_flow);
- };
- res += flow::actual_cost_of(flow::DefaultAdapter(), _children, my_flow(in_flow), cost_of);
- res += estimate_self_cost(in_flow);
- return res;
-}
-
void
IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass)
{
@@ -665,9 +623,9 @@ IntermediateBlueprint::sort(InFlow in_flow)
sort(_children, in_flow);
}
auto flow = my_flow(in_flow);
- for (size_t i = 0; i < _children.size(); ++i) {
- _children[i]->sort(InFlow(flow.strict(), flow.flow()));
- flow.add(_children[i]->estimate());
+ for (const auto & child : _children) {
+ child->sort(InFlow(flow.strict(), flow.flow()));
+ flow.add(child->estimate());
}
}
@@ -686,8 +644,8 @@ IntermediateBlueprint::createSearch(fef::MatchData &md) const
{
MultiSearch::Children subSearches;
subSearches.reserve(_children.size());
- for (size_t i = 0; i < _children.size(); ++i) {
- subSearches.push_back(_children[i]->createSearch(md));
+ for (const auto & child : _children) {
+ subSearches.push_back(child->createSearch(md));
}
return createIntermediateSearch(std::move(subSearches), md);
}
@@ -735,18 +693,17 @@ void
IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo)
{
auto flow = my_flow(InFlow(strict(), execInfo.hit_rate()));
- for (size_t i = 0; i < _children.size(); ++i) {
+ for (const auto & child : _children) {
double nextHitRate = flow.flow();
- Blueprint & child = *_children[i];
- child.fetchPostings(ExecuteInfo::create(nextHitRate, execInfo));
- flow.add(child.estimate());
+ child->fetchPostings(ExecuteInfo::create(nextHitRate, execInfo));
+ flow.add(child->estimate());
}
}
void
IntermediateBlueprint::freeze()
{
- for (Blueprint::UP &child: _children) {
+ for (auto &child: _children) {
child->freeze();
}
freeze_self();
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index a493c725407..a443f34f856 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -313,20 +313,6 @@ public:
// optimal ordering. Used for testing.
void null_plan(InFlow in_flow, uint32_t docid_limit);
- // Estimate the actual cost of evaluating the (sub-)query
- // represented by this blueprint with the given in-flow. This
- // function should be called after query planning has been
- // performed. This function could be useful to predict very
- // expensive queries, but the initial use-case is to understand
- // query cost better in micro-benchmarks to improve low-level cost
- // tuning.
- virtual double estimate_actual_cost(InFlow in_flow) const noexcept;
- // Estimate the change in cost caused by having a strict iterator
- // with a non-strict in-flow. Note that this function might force
- // the in_flow to be strict in order to align it with the
- // strictness of this blueprint.
- double estimate_strict_cost_diff(InFlow &in_flow) const noexcept;
-
static Blueprint::UP optimize(Blueprint::UP bp);
virtual void sort(InFlow in_flow) = 0;
static Blueprint::UP optimize_and_sort(Blueprint::UP bp, InFlow in_flow, const Options &opts) {
@@ -496,9 +482,6 @@ public:
void setDocIdLimit(uint32_t limit) noexcept final;
void each_node_post_order(const std::function<void(Blueprint&)> &f) override;
- // additional cost not attributed to the children flow (heap merge/unpack/etc)
- virtual double estimate_self_cost(InFlow in_flow) const noexcept;
- double estimate_actual_cost(InFlow in_flow) const noexcept override;
void optimize(Blueprint* &self, OptimizePass pass) final;
void sort(InFlow in_flow) override;
void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp
index 27ff0d235a3..c4aea7deae8 100644
--- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp
@@ -22,6 +22,11 @@ CreateBlueprintVisitorHelper::CreateBlueprintVisitorHelper(Searchable &searchabl
CreateBlueprintVisitorHelper::~CreateBlueprintVisitorHelper() = default;
+bool
+CreateBlueprintVisitorHelper::is_search_multi_threaded() const noexcept {
+ return getRequestContext().thread_bundle().size() > 1;
+}
+
attribute::SearchContextParams
CreateBlueprintVisitorHelper::createContextParams() const {
return attribute::SearchContextParams().metaStoreReadGuard(_requestContext.getMetaStoreReadGuard());
@@ -104,7 +109,8 @@ void
CreateBlueprintVisitorHelper::visitWandTerm(query::WandTerm &n)
{
createWeightedSet(std::make_unique<ParallelWeakAndBlueprint>(_field, n.getTargetNumHits(),
- n.getScoreThreshold(), n.getThresholdBoostFactor()),
+ n.getScoreThreshold(), n.getThresholdBoostFactor(),
+ is_search_multi_threaded()),
n);
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
index 98f62fa3249..ec163260dc3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
+++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
@@ -29,6 +29,7 @@ protected:
const IRequestContext & getRequestContext() const { return _requestContext; }
attribute::SearchContextParams createContextParams() const;
attribute::SearchContextParams createContextParams(bool isFilter) const;
+ bool is_search_multi_threaded() const noexcept;
public:
CreateBlueprintVisitorHelper(Searchable &searchable, const FieldSpec &field, const IRequestContext & requestContext);
~CreateBlueprintVisitorHelper() override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.cpp b/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.cpp
new file mode 100644
index 00000000000..a7b1e3a7c92
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.cpp
@@ -0,0 +1,38 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "first_phase_rescorer.h"
+
+namespace search::queryeval {
+
+FirstPhaseRescorer::FirstPhaseRescorer(const std::pair<Scores,Scores>& ranges)
+ : _scale(1.0),
+ _adjust(0.0)
+{
+ if (need_rescore(ranges)) {
+ auto& first_phase_scores = ranges.first;
+ auto& second_phase_scores = ranges.second;
+ // scale and adjust the first phase score according to the
+ // first phase and second phase heap score values to avoid that
+ // a score from the first phase is larger than second_phase_scores.low
+ double first_phase_range = first_phase_scores.high - first_phase_scores.low;
+ if (first_phase_range < 1.0) {
+ first_phase_range = 1.0;
+ }
+ double second_phase_range = second_phase_scores.high - second_phase_scores.low;
+ if (second_phase_range < 1.0) {
+ second_phase_range = 1.0;
+ }
+ _scale = second_phase_range / first_phase_range;
+ _adjust = first_phase_scores.low * _scale - second_phase_scores.low;
+ }
+}
+
+bool
+FirstPhaseRescorer::need_rescore(const std::pair<Scores,Scores>& ranges)
+{
+ auto& first_phase_scores = ranges.first;
+ auto& second_phase_scores = ranges.second;
+ return (first_phase_scores.low > second_phase_scores.low);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.h b/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.h
new file mode 100644
index 00000000000..301e2aa78d0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.h
@@ -0,0 +1,25 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "scores.h"
+#include <cstdint>
+
+namespace search::queryeval {
+
+/*
+ * Rescore hits not selected for second phase to prevent them from getting
+ * a better score than hits selected for second phase ranking.
+ */
+class FirstPhaseRescorer {
+ double _scale;
+ double _adjust;
+public:
+ FirstPhaseRescorer(const std::pair<Scores,Scores>& ranges);
+ static bool need_rescore(const std::pair<Scores,Scores>& ranges);
+ double rescore(uint32_t, double score) const noexcept {
+ return ((score * _scale) - _adjust);
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index be7b9031c00..b7841dc2017 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -204,16 +204,6 @@ double ordered_cost_of(ADAPTER adapter, const T &children, F flow, bool allow_fo
return total_cost;
}
-static double actual_cost_of(auto adapter, const auto &children, auto flow, auto cost_of) noexcept {
- double total_cost = 0.0;
- for (const auto &child: children) {
- double child_cost = cost_of(child, InFlow(flow.strict(), flow.flow()));
- flow.update_cost(total_cost, child_cost);
- flow.add(adapter.estimate(child));
- }
- return total_cost;
-}
-
auto select_strict_and_child(auto adapter, const auto &children, size_t first, double est, bool native_strict) {
double cost = 0.0;
size_t best_idx = first;
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index cf1d1a8c09f..5ed61ef9fc8 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -60,6 +60,12 @@ inline size_t get_num_indirections(const attribute::BasicType& basic_type,
return res;
}
+// Some blueprints are not able to provide a hit estimate (e.g. attributes without fast-search).
+// In such cases the following estimate is used instead. In most cases this is an overestimate.
+inline double estimate_when_unknown() {
+ return 0.1;
+}
+
// Non-strict cost of lookup based matching in an attribute (not fast-search).
// Test used: IteratorBenchmark::analyze_term_search_in_attributes_non_strict
inline double lookup_cost(size_t num_indirections) {
@@ -90,7 +96,7 @@ inline double lookup_strict_cost(size_t num_indirections) {
* as the latency (time) penalty is higher if choosing wrong.
*/
inline double non_strict_cost_of_strict_iterator(double estimate, double strict_cost) {
- return strict_cost + strict_cost_diff(estimate, 1.0);
+ return 2.0 * (strict_cost + strict_cost_diff(estimate, 0.5));
}
// Strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field).
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
index bf7f44f0e7a..01587ef485a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "hitcollector.h"
+#include "first_phase_rescorer.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/sort.h>
#include <cassert>
@@ -43,9 +44,7 @@ HitCollector::HitCollector(uint32_t numDocs, uint32_t maxHitsSize)
_unordered(false),
_docIdVector(),
_bitVector(),
- _reRankedHits(),
- _scale(1.0),
- _adjust(0)
+ _reRankedHits()
{
if (_maxHitsSize > 0) {
_collector = std::make_unique<RankedHitCollector>(*this);
@@ -71,7 +70,7 @@ HitCollector::RankedHitCollector::collect(uint32_t docId, feature_t score)
}
hc._hits.emplace_back(docId, score);
} else {
- collectAndChangeCollector(docId, score);
+ collectAndChangeCollector(docId, score); // note - self-destruct.
}
}
@@ -101,11 +100,10 @@ HitCollector::RankedHitCollector::collectAndChangeCollector(uint32_t docId, feat
if (hc._maxDocIdVectorSize > hc._maxHitsSize) {
// start using docid vector
hc._docIdVector.reserve(hc._maxDocIdVectorSize);
- uint32_t iSize = hc._hits.size();
- for (uint32_t i = 0; i < iSize; ++i) {
- hc._docIdVector.push_back(hc._hits[i].first);
+ for (const auto& hit : hc._hits) {
+ hc._docIdVector.push_back(hit.first);
}
- if ((iSize > 0) && (docId < hc._docIdVector.back())) {
+ if (!hc._docIdVector.empty() && (docId < hc._docIdVector.back())) {
hc._unordered = true;
}
hc._docIdVector.push_back(docId);
@@ -114,9 +112,8 @@ HitCollector::RankedHitCollector::collectAndChangeCollector(uint32_t docId, feat
// start using bit vector
hc._bitVector = BitVector::create(hc._numDocs);
hc._bitVector->invalidateCachedCount();
- uint32_t iSize = hc._hits.size();
- for (uint32_t i = 0; i < iSize; ++i) {
- hc._bitVector->setBit(hc._hits[i].first);
+ for (const auto& hit : _hc._hits) {
+ hc._bitVector->setBit(hit.first);
}
hc._bitVector->setBit(docId);
newCollector = std::make_unique<BitVectorCollector<true>>(hc);
@@ -125,7 +122,7 @@ HitCollector::RankedHitCollector::collectAndChangeCollector(uint32_t docId, feat
std::make_heap(hc._hits.begin(), hc._hits.end(), ScoreComparator());
hc._hitsSortOrder = SortOrder::HEAP;
this->considerForHitVector(docId, score);
- hc._collector = std::move(newCollector);
+ hc._collector = std::move(newCollector); // note - self-destruct.
}
template<bool CollectRankedHit>
@@ -145,7 +142,7 @@ HitCollector::DocIdCollector<CollectRankedHit>::collect(uint32_t docId, feature_
}
hc._docIdVector.push_back(docId);
} else {
- collectAndChangeCollector(docId);
+ collectAndChangeCollector(docId); // note - self-destruct.
}
}
@@ -157,9 +154,8 @@ HitCollector::DocIdCollector<CollectRankedHit>::collectAndChangeCollector(uint32
// start using bit vector instead of docid array.
hc._bitVector = BitVector::create(hc._numDocs);
hc._bitVector->invalidateCachedCount();
- uint32_t iSize = static_cast<uint32_t>(hc._docIdVector.size());
- for (uint32_t i = 0; i < iSize; ++i) {
- hc._bitVector->setBit(hc._docIdVector[i]);
+ for (auto docid : hc._docIdVector) {
+ hc._bitVector->setBit(docid);
}
std::vector<uint32_t> emptyVector;
emptyVector.swap(hc._docIdVector);
@@ -191,91 +187,231 @@ HitCollector::setRanges(const std::pair<Scores, Scores> &ranges)
namespace {
+struct NoRescorer
+{
+ static double rescore(uint32_t, double score) noexcept { return score; }
+};
+
+template <typename Rescorer>
+class RerankRescorer {
+ Rescorer _rescorer;
+ using HitVector = std::vector<HitCollector::Hit>;
+ using Iterator = typename HitVector::const_iterator;
+ Iterator _reranked_cur;
+ Iterator _reranked_end;
+public:
+ RerankRescorer(const Rescorer& rescorer,
+ const HitVector& reranked_hits)
+ : _rescorer(rescorer),
+ _reranked_cur(reranked_hits.begin()),
+ _reranked_end(reranked_hits.end())
+ {
+ }
+
+ double rescore(uint32_t docid, double score) noexcept {
+ if (_reranked_cur != _reranked_end && _reranked_cur->first == docid) {
+ double result = _reranked_cur->second;
+ ++_reranked_cur;
+ return result;
+ } else {
+ return _rescorer.rescore(docid, score);
+ }
+ }
+};
+
+class SimpleHitAdder {
+protected:
+ ResultSet& _rs;
+public:
+ SimpleHitAdder(ResultSet& rs)
+ : _rs(rs)
+ {
+ }
+ void add(uint32_t docid, double rank_value) {
+ _rs.push_back({docid, rank_value});
+ }
+};
+
+class ConditionalHitAdder : public SimpleHitAdder {
+protected:
+ double _second_phase_rank_drop_limit;
+public:
+ ConditionalHitAdder(ResultSet& rs, double second_phase_rank_drop_limit)
+ : SimpleHitAdder(rs),
+ _second_phase_rank_drop_limit(second_phase_rank_drop_limit)
+ {
+ }
+ void add(uint32_t docid, double rank_value) {
+ if (rank_value > _second_phase_rank_drop_limit) {
+ _rs.push_back({docid, rank_value});
+ }
+ }
+};
+
+class TrackingConditionalHitAdder : public ConditionalHitAdder {
+ std::vector<uint32_t>& _dropped;
+public:
+ TrackingConditionalHitAdder(ResultSet& rs, double second_phase_rank_drop_limit, std::vector<uint32_t>& dropped)
+ : ConditionalHitAdder(rs, second_phase_rank_drop_limit),
+ _dropped(dropped)
+ {
+ }
+ void add(uint32_t docid, double rank_value) {
+ if (rank_value > _second_phase_rank_drop_limit) {
+ _rs.push_back({docid, rank_value});
+ } else {
+ _dropped.emplace_back(docid);
+ }
+ }
+};
+
+template <typename HitAdder, typename Rescorer>
void
-mergeHitsIntoResultSet(const std::vector<HitCollector::Hit> &hits, ResultSet &result)
+add_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, Rescorer rescorer)
{
- uint32_t rhCur(0);
- uint32_t rhEnd(result.getArrayUsed());
- for (const auto &hit : hits) {
- while (rhCur != rhEnd && result[rhCur].getDocId() != hit.first) {
- // just set the iterators right
- ++rhCur;
+ for (auto& hit : hits) {
+ hit_adder.add(hit.first, rescorer.rescore(hit.first, hit.second));
+ }
+}
+
+template <typename HitAdder, typename Rescorer>
+void
+add_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer)
+{
+ if (reranked_hits.empty()) {
+ add_rescored_hits(hit_adder, hits, rescorer);
+ } else {
+ add_rescored_hits(hit_adder, hits, RerankRescorer(rescorer, reranked_hits));
+ }
+}
+
+template <typename Rescorer>
+void
+add_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped, Rescorer rescorer)
+{
+ if (second_phase_rank_drop_limit.has_value()) {
+ if (dropped != nullptr) {
+ add_rescored_hits(TrackingConditionalHitAdder(rs, second_phase_rank_drop_limit.value(), *dropped), hits, reranked_hits, rescorer);
+ } else {
+ add_rescored_hits(ConditionalHitAdder(rs, second_phase_rank_drop_limit.value()), hits, reranked_hits, rescorer);
}
- assert(rhCur != rhEnd); // the hits should be a subset of the hits in ranked hit array.
- result[rhCur]._rankValue = hit.second;
+ } else {
+ add_rescored_hits(SimpleHitAdder(rs), hits, reranked_hits, rescorer);
+ }
+}
+
+template <typename HitAdder, typename Rescorer>
+void
+mixin_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, Rescorer rescorer)
+{
+ auto hits_cur = hits.begin();
+ auto hits_end = hits.end();
+ for (auto docid : docids) {
+ if (hits_cur != hits_end && docid == hits_cur->first) {
+ hit_adder.add(docid, rescorer.rescore(docid, hits_cur->second));
+ ++hits_cur;
+ } else {
+ hit_adder.add(docid, default_value);
+ }
+ }
+}
+
+template <typename HitAdder, typename Rescorer>
+void
+mixin_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer)
+{
+ if (reranked_hits.empty()) {
+ mixin_rescored_hits(hit_adder, hits, docids, default_value, rescorer);
+ } else {
+ mixin_rescored_hits(hit_adder, hits, docids, default_value, RerankRescorer(rescorer, reranked_hits));
+ }
+}
+
+template <typename Rescorer>
+void
+mixin_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped, Rescorer rescorer)
+{
+ if (second_phase_rank_drop_limit.has_value()) {
+ if (dropped != nullptr) {
+ mixin_rescored_hits(TrackingConditionalHitAdder(rs, second_phase_rank_drop_limit.value(), *dropped), hits, docids, default_value, reranked_hits, rescorer);
+ } else {
+ mixin_rescored_hits(ConditionalHitAdder(rs, second_phase_rank_drop_limit.value()), hits, docids, default_value, reranked_hits, rescorer);
+ }
+ } else {
+ mixin_rescored_hits(SimpleHitAdder(rs), hits, docids, default_value, reranked_hits, rescorer);
+ }
+}
+
+void
+add_bitvector_to_dropped(std::vector<uint32_t>& dropped, vespalib::ConstArrayRef<RankedHit> hits, const BitVector& bv)
+{
+ auto hits_cur = hits.begin();
+ auto hits_end = hits.end();
+ auto docid = bv.getFirstTrueBit();
+ auto docid_limit = bv.size();
+ while (docid < docid_limit) {
+ if (hits_cur != hits_end && hits_cur->getDocId() == docid) {
+ ++hits_cur;
+ } else {
+ dropped.emplace_back(docid);
+ }
+ docid = bv.getNextTrueBit(docid + 1);
}
}
}
std::unique_ptr<ResultSet>
-HitCollector::getResultSet(HitRank default_value)
+HitCollector::get_result_set(std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped)
{
- bool needReScore = false;
- Scores &initHeapScores = _ranges.first;
- Scores &finalHeapScores = _ranges.second;
- if (initHeapScores.low > finalHeapScores.low) {
- // scale and adjust the score according to the range
- // of the initial and final heap score values to avoid that
- // a score from the first phase is larger than finalHeapScores.low
- feature_t initRange = initHeapScores.high - initHeapScores.low;
- if (initRange < 1.0) initRange = 1.0f;
- feature_t finalRange = finalHeapScores.high - finalHeapScores.low;
- if (finalRange < 1.0) finalRange = 1.0f;
- _scale = finalRange / initRange;
- _adjust = initHeapScores.low * _scale - finalHeapScores.low;
- needReScore = true;
+ /*
+ * Use default_rank_value (i.e. -HUGE_VAL) when hit collector saves
+ * rank scores, otherwise use zero_rank_value (i.e. 0.0).
+ */
+ auto default_value = save_rank_scores() ? search::default_rank_value : search::zero_rank_value;
+
+ bool needReScore = FirstPhaseRescorer::need_rescore(_ranges);
+ FirstPhaseRescorer rescorer(_ranges);
+
+ if (dropped != nullptr) {
+ dropped->clear();
}
// destroys the heap property or score sort order
sortHitsByDocId();
auto rs = std::make_unique<ResultSet>();
- if ( ! _collector->isDocIdCollector() ) {
- unsigned int iSize = _hits.size();
- rs->allocArray(iSize);
+ if ( ! _collector->isDocIdCollector() ||
+ (second_phase_rank_drop_limit.has_value() &&
+ (_bitVector || dropped == nullptr))) {
+ rs->allocArray(_hits.size());
+ auto* dropped_or_null = dropped;
+ if (second_phase_rank_drop_limit.has_value() && _bitVector) {
+ dropped_or_null = nullptr;
+ }
if (needReScore) {
- for (uint32_t i = 0; i < iSize; ++i) {
- rs->push_back(RankedHit(_hits[i].first, getReScore(_hits[i].second)));
- }
+ add_rescored_hits(*rs, _hits, _reRankedHits, second_phase_rank_drop_limit, dropped_or_null, rescorer);
} else {
- for (uint32_t i = 0; i < iSize; ++i) {
- rs->push_back(RankedHit(_hits[i].first, _hits[i].second));
- }
+ add_rescored_hits(*rs, _hits, _reRankedHits, second_phase_rank_drop_limit, dropped_or_null, NoRescorer());
}
} else {
if (_unordered) {
std::sort(_docIdVector.begin(), _docIdVector.end());
}
- unsigned int iSize = _hits.size();
- unsigned int jSize = _docIdVector.size();
- rs->allocArray(jSize);
- uint32_t i = 0;
+ rs->allocArray(_docIdVector.size());
if (needReScore) {
- for (uint32_t j = 0; j < jSize; ++j) {
- uint32_t docId = _docIdVector[j];
- if (i < iSize && docId == _hits[i].first) {
- rs->push_back(RankedHit(docId, getReScore(_hits[i].second)));
- ++i;
- } else {
- rs->push_back(RankedHit(docId, default_value));
- }
- }
+ mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, second_phase_rank_drop_limit, dropped, rescorer);
} else {
- for (uint32_t j = 0; j < jSize; ++j) {
- uint32_t docId = _docIdVector[j];
- if (i < iSize && docId == _hits[i].first) {
- rs->push_back(RankedHit(docId, _hits[i].second));
- ++i;
- } else {
- rs->push_back(RankedHit(docId, default_value));
- }
- }
+ mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, second_phase_rank_drop_limit, dropped, NoRescorer());
}
}
- if (!_reRankedHits.empty()) {
- mergeHitsIntoResultSet(_reRankedHits, *rs);
+ if (second_phase_rank_drop_limit.has_value() && _bitVector) {
+ if (dropped != nullptr) {
+ assert(dropped->empty());
+ add_bitvector_to_dropped(*dropped, {rs->getArray(), rs->getArrayUsed()}, *_bitVector);
+ }
+ _bitVector.reset();
}
if (_bitVector) {
@@ -285,4 +421,10 @@ HitCollector::getResultSet(HitRank default_value)
return rs;
}
+std::unique_ptr<ResultSet>
+HitCollector::getResultSet()
+{
+ return get_result_set(std::nullopt, nullptr);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
index 94ffe619bab..c23fb0a6ef6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
@@ -8,6 +8,7 @@
#include <vespa/searchlib/common/resultset.h>
#include <vespa/vespalib/util/sort.h>
#include <algorithm>
+#include <optional>
#include <vector>
namespace search::queryeval {
@@ -35,8 +36,6 @@ private:
std::vector<Hit> _reRankedHits;
std::pair<Scores, Scores> _ranges;
- feature_t _scale;
- feature_t _adjust;
struct ScoreComparator {
bool operator() (const Hit & lhs, const Hit & rhs) const noexcept {
@@ -120,12 +119,11 @@ private:
void collect(uint32_t docId, feature_t score) override;
};
- HitRank getReScore(feature_t score) const {
- return ((score * _scale) - _adjust);
- }
VESPA_DLL_LOCAL void sortHitsByScore(size_t topn);
VESPA_DLL_LOCAL void sortHitsByDocId();
+ bool save_rank_scores() const noexcept { return _maxHitsSize != 0; }
+
public:
HitCollector(const HitCollector &) = delete;
HitCollector &operator=(const HitCollector &) = delete;
@@ -169,15 +167,17 @@ public:
const std::pair<Scores, Scores> &getRanges() const { return _ranges; }
void setRanges(const std::pair<Scores, Scores> &ranges);
+ std::unique_ptr<ResultSet>
+ get_result_set(std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped);
+
/**
* Returns a result set based on the content of this collector.
* Invoking this method will destroy the heap property of the
* ranked hits and the match data heap.
*
- * @param auto pointer to the result set
- * @param default_value rank value to be used for results without rank value
+ * @return unique pointer to the result set
**/
- std::unique_ptr<ResultSet> getResultSet(HitRank default_value = default_rank_value);
+ std::unique_ptr<ResultSet> getResultSet();
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 33b249572f0..93cb8d68c33 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -26,7 +26,7 @@ size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources,
return i;
}
}
- sources.push_back(std::unique_ptr<CombineType>(new CombineType()));
+ sources.push_back(std::make_unique<CombineType>());
sources.back()->setSourceId(child_source);
sources.back()->setDocIdLimit(docid_limit);
return (sources.size() - 1);
@@ -318,11 +318,6 @@ OrBlueprint::calculate_flow_stats(uint32_t) const {
OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())};
}
-double
-OrBlueprint::estimate_self_cost(InFlow in_flow) const noexcept {
- return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0;
-}
-
Blueprint::HitEstimate
OrBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -424,6 +419,13 @@ WeakAndBlueprint::my_flow(InFlow in_flow) const
return AnyFlow::create<OrFlow>(in_flow);
}
+WeakAndBlueprint::WeakAndBlueprint(uint32_t n, float idf_range, bool thread_safe)
+ : _scores(WeakAndPriorityQueue::createHeap(n, thread_safe)),
+ _n(n),
+ _idf_range(idf_range),
+ _weights()
+{}
+
WeakAndBlueprint::~WeakAndBlueprint() = default;
FlowStats
@@ -436,11 +438,6 @@ WeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const {
OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())};
}
-double
-WeakAndBlueprint::estimate_self_cost(InFlow in_flow) const noexcept {
- return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0;
-}
-
Blueprint::HitEstimate
WeakAndBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -488,11 +485,12 @@ WeakAndBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches,
assert(_weights.size() == childCnt());
for (size_t i = 0; i < sub_searches.size(); ++i) {
// TODO: pass ownership with unique_ptr
- terms.emplace_back(sub_searches[i].release(),
- _weights[i],
+ terms.emplace_back(sub_searches[i].release(), _weights[i],
getChild(i).getState().estimate().estHits);
}
- return WeakAndSearch::create(terms, _n, strict());
+ return (_idf_range == 0.0)
+ ? WeakAndSearch::create(terms, wand::MatchParams(*_scores), wand::TermFrequencyScorer(), _n, strict())
+ : WeakAndSearch::create(terms, wand::MatchParams(*_scores), wand::Bm25TermFrequencyScorer(get_docid_limit(), _idf_range), _n, strict());
}
SearchIterator::UP
@@ -517,11 +515,6 @@ NearBlueprint::calculate_flow_stats(uint32_t) const {
AndFlow::cost_of(get_children(), true) + childCnt() * est};
}
-double
-NearBlueprint::estimate_self_cost(InFlow) const noexcept {
- return childCnt() * estimate();
-}
-
Blueprint::HitEstimate
NearBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -562,7 +555,7 @@ NearBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches,
tfmda.add(cs.field(j).resolve(md));
}
}
- return SearchIterator::UP(new NearSearch(std::move(sub_searches), tfmda, _window, strict()));
+ return std::make_unique<NearSearch>(std::move(sub_searches), tfmda, _window, strict());
}
SearchIterator::UP
@@ -587,11 +580,6 @@ ONearBlueprint::calculate_flow_stats(uint32_t) const {
AndFlow::cost_of(get_children(), true) + childCnt() * est};
}
-double
-ONearBlueprint::estimate_self_cost(InFlow) const noexcept {
- return childCnt() * estimate();
-}
-
Blueprint::HitEstimate
ONearBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -630,7 +618,7 @@ ONearBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches,
}
// could sort sub_searches here
// but then strictness inheritance would also need to be fixed
- return SearchIterator::UP(new ONearSearch(std::move(sub_searches), tfmda, _window, strict()));
+ return std::make_unique<ONearSearch>(std::move(sub_searches), tfmda, _window, strict());
}
SearchIterator::UP
@@ -756,7 +744,8 @@ SourceBlenderBlueprint::calculate_flow_stats(uint32_t) const {
my_cost = std::max(my_cost, child->cost());
my_strict_cost = std::max(my_strict_cost, child->strict_cost());
}
- return {OrFlow::estimate_of(get_children()), my_cost, my_strict_cost};
+ double my_est = OrFlow::estimate_of(get_children());
+ return {my_est, my_cost + 1.0, my_strict_cost + my_est};
}
Blueprint::HitEstimate
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index ade4c9318e4..87331ca83c5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -4,6 +4,7 @@
#include "blueprint.h"
#include "multisearch.h"
+#include <vespa/searchlib/queryeval/wand/weak_and_heap.h>
namespace search::queryeval {
@@ -67,7 +68,6 @@ public:
~OrBlueprint() override;
bool supports_termwise_children() const override { return true; }
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
- double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
void optimize_self(OptimizePass pass) override;
@@ -89,13 +89,14 @@ private:
class WeakAndBlueprint : public IntermediateBlueprint
{
private:
+ std::unique_ptr<WeakAndPriorityQueue> _scores;
uint32_t _n;
+ float _idf_range;
std::vector<uint32_t> _weights;
AnyFlow my_flow(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
- double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
Blueprint::UP get_replacement() override;
@@ -107,14 +108,15 @@ public:
fef::MatchData &md) const override;
SearchIterator::UP createFilterSearch(FilterConstraint constraint) const override;
- explicit WeakAndBlueprint(uint32_t n) noexcept : _n(n) {}
+ explicit WeakAndBlueprint(uint32_t n) : WeakAndBlueprint(n, 0.0, true) {}
+ WeakAndBlueprint(uint32_t n, float idf_range, bool thread_safe);
~WeakAndBlueprint() override;
void addTerm(Blueprint::UP bp, uint32_t weight) {
addChild(std::move(bp));
_weights.push_back(weight);
}
- uint32_t getN() const { return _n; }
- const std::vector<uint32_t> &getWeights() const { return _weights; }
+ uint32_t getN() const noexcept { return _n; }
+ const std::vector<uint32_t> &getWeights() const noexcept { return _weights; }
};
//-----------------------------------------------------------------------------
@@ -127,7 +129,6 @@ private:
AnyFlow my_flow(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
- double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
void sort(Children &children, InFlow in_flow) const override;
@@ -150,7 +151,6 @@ private:
AnyFlow my_flow(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
- double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
void sort(Children &children, InFlow in_flow) const override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
index d825c9e1a20..9d19ba87af7 100644
--- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
@@ -11,9 +11,9 @@ namespace search::queryeval {
//-----------------------------------------------------------------------------
FlowStats
-EmptyBlueprint::calculate_flow_stats(uint32_t docid_limit) const
+EmptyBlueprint::calculate_flow_stats(uint32_t) const
{
- return default_flow_stats(docid_limit, 0, 0);
+ return {0.0, 0.2, 0.0};
}
SearchIterator::UP
diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
index 2b25aa29747..c5435b557b0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
@@ -191,16 +191,14 @@ SimplePhraseSearch::doSeek(uint32_t doc_id) {
void
SimplePhraseSearch::doStrictSeek(uint32_t doc_id) {
uint32_t next_candidate = doc_id;
- while (getDocId() < doc_id || getDocId() == beginId()) {
- getChildren()[0]->seek(next_candidate + 1);
- next_candidate = getChildren()[0]->getDocId();
+ auto &best_child = *getChildren()[_eval_order[0]];
+ while (getDocId() < doc_id) {
+ best_child.seek(next_candidate + 1);
+ next_candidate = best_child.getDocId();
if (isAtEnd(next_candidate)) {
setAtEnd();
return;
}
- // child must behave as strict.
- assert(next_candidate > doc_id && next_candidate != beginId());
-
phraseSeek(next_candidate);
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
index 4c55496822b..48bef125ec3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
@@ -1,42 +1,23 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "parallel_weak_and_blueprint.h"
-#include "wand_parts.h"
#include "parallel_weak_and_search.h"
#include <vespa/searchlib/queryeval/field_spec.hpp>
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/queryeval/flow_tuning.h>
-#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <algorithm>
namespace search::queryeval {
-ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor)
+ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field, uint32_t scoresToTrack,
+ score_t scoreThreshold, double thresholdBoostFactor,
+ bool thread_safe)
: ComplexLeafBlueprint(field),
- _scores(scoresToTrack),
+ _scores(WeakAndPriorityQueue::createHeap(scoresToTrack, thread_safe)),
_scoreThreshold(scoreThreshold),
_thresholdBoostFactor(thresholdBoostFactor),
- _scoresAdjustFrequency(DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
- _layout(),
- _weights(),
- _terms()
-{
-}
-
-ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor,
- uint32_t scoresAdjustFrequency)
- : ComplexLeafBlueprint(field),
- _scores(scoresToTrack),
- _scoreThreshold(scoreThreshold),
- _thresholdBoostFactor(thresholdBoostFactor),
- _scoresAdjustFrequency(scoresAdjustFrequency),
+ _scoresAdjustFrequency(wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
_layout(),
_weights(),
_terms()
@@ -84,7 +65,7 @@ ParallelWeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const
term->update_flow_stats(docid_limit);
}
double child_est = OrFlow::estimate_of(_terms);
- double my_est = abs_to_rel_est(_scores.getScoresToTrack(), docid_limit);
+ double my_est = abs_to_rel_est(_scores->getScoresToTrack(), docid_limit);
double est = (child_est + my_est) / 2.0;
return {est, OrFlow::cost_of(_terms, false),
OrFlow::cost_of(_terms, true) + flow::heap_cost(est, _terms.size())};
@@ -106,14 +87,11 @@ ParallelWeakAndBlueprint::createLeafSearch(const search::fef::TermFieldMatchData
childState.estimate().estHits,
childState.field(0).resolve(*childrenMatchData));
}
- return SearchIterator::UP
- (ParallelWeakAndSearch::create(terms,
- ParallelWeakAndSearch::MatchParams(_scores,
- _scoreThreshold,
- _thresholdBoostFactor,
- _scoresAdjustFrequency).setDocIdLimit(get_docid_limit()),
- ParallelWeakAndSearch::RankParams(*tfmda[0],
- std::move(childrenMatchData)), strict()));
+ return ParallelWeakAndSearch::create(terms,
+ ParallelWeakAndSearch::MatchParams(*_scores, _scoreThreshold, _thresholdBoostFactor,
+ _scoresAdjustFrequency, get_docid_limit()),
+ ParallelWeakAndSearch::RankParams(*tfmda[0],std::move(childrenMatchData)),
+ strict());
}
std::unique_ptr<SearchIterator>
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
index 4a55bf14095..c34d366120e 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
@@ -11,8 +11,6 @@
namespace search::queryeval {
-const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4;
-
/**
* Blueprint for the parallel weak and search operator.
*/
@@ -21,32 +19,24 @@ class ParallelWeakAndBlueprint : public ComplexLeafBlueprint
private:
using score_t = wand::score_t;
- mutable SharedWeakAndPriorityQueue _scores;
- const wand::score_t _scoreThreshold;
- double _thresholdBoostFactor;
- const uint32_t _scoresAdjustFrequency;
- fef::MatchDataLayout _layout;
- std::vector<int32_t> _weights;
- std::vector<Blueprint::UP> _terms;
+ std::unique_ptr<WeakAndPriorityQueue> _scores;
+ const wand::score_t _scoreThreshold;
+ double _thresholdBoostFactor;
+ const uint32_t _scoresAdjustFrequency;
+ fef::MatchDataLayout _layout;
+ std::vector<int32_t> _weights;
+ std::vector<Blueprint::UP> _terms;
public:
ParallelWeakAndBlueprint(const ParallelWeakAndBlueprint &) = delete;
ParallelWeakAndBlueprint &operator=(const ParallelWeakAndBlueprint &) = delete;
- ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor);
- ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor,
- uint32_t scoresAdjustFrequency);
+ ParallelWeakAndBlueprint(FieldSpecBase field, uint32_t scoresToTrack,
+ score_t scoreThreshold, double thresholdBoostFactor,
+ bool thread_safe);
~ParallelWeakAndBlueprint() override;
- const WeakAndHeap &getScores() const { return _scores; }
-
+ const WeakAndHeap &getScores() const { return *_scores; }
score_t getScoreThreshold() const { return _scoreThreshold; }
-
double getThresholdBoostFactor() const { return _thresholdBoostFactor; }
// Used by create visitor
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
index 9e887b9d0f7..78d97e8efa0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
@@ -77,6 +77,7 @@ public:
_matchParams(matchParams),
_localScores()
{
+ _localScores.reserve(_matchParams.scoresAdjustFrequency);
}
size_t get_num_terms() const override { return _terms.size(); }
int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); }
@@ -199,17 +200,17 @@ namespace {
template <typename VectorizedTerms, typename FutureHeap, typename PastHeap>
SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams &params, bool strict) {
if (strict) {
- return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, true>>(tfmd, std::move(terms), params);
+ return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, true>>(tfmd, std::forward<VectorizedTerms>(terms), params);
} else {
- return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, false>>(tfmd, std::move(terms), params);
+ return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, false>>(tfmd, std::forward<VectorizedTerms>(terms), params);
}
}
template <typename VectorizedTerms>
SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams &params, bool strict, bool use_array) {
return (use_array)
- ? create_helper<VectorizedTerms, vespalib::LeftArrayHeap, vespalib::RightArrayHeap>(tfmd, std::move(terms), params, strict)
- : create_helper<VectorizedTerms, vespalib::LeftHeap, vespalib::RightHeap>(tfmd, std::move(terms), params, strict);
+ ? create_helper<VectorizedTerms, vespalib::LeftArrayHeap, vespalib::RightArrayHeap>(tfmd, std::forward<VectorizedTerms>(terms), params, strict)
+ : create_helper<VectorizedTerms, vespalib::LeftHeap, vespalib::RightHeap>(tfmd, std::forward<VectorizedTerms>(terms), params, strict);
}
} // namespace search::queryeval::<unnamed>
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
index bd173ab41eb..70520e267e6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
@@ -20,27 +20,25 @@ struct ParallelWeakAndSearch : public SearchIterator
/**
* Params used to tweak the behavior of the WAND algorithm.
*/
- struct MatchParams
+ struct MatchParams : wand::MatchParams
{
- WeakAndHeap &scores;
- score_t scoreThreshold;
- double thresholdBoostFactor;
- uint32_t scoresAdjustFrequency;
- docid_t docIdLimit;
- MatchParams(WeakAndHeap &scores_,
- score_t scoreThreshold_,
- double thresholdBoostFactor_,
- uint32_t scoresAdjustFrequency_)
- : scores(scores_),
- scoreThreshold(scoreThreshold_),
- thresholdBoostFactor(thresholdBoostFactor_),
- scoresAdjustFrequency(scoresAdjustFrequency_),
- docIdLimit(0)
+ const double thresholdBoostFactor;
+ const docid_t docIdLimit;
+ MatchParams(WeakAndHeap &scores_in,
+ score_t scoreThreshold_in,
+ double thresholdBoostFactor_in,
+ uint32_t scoresAdjustFrequency_in,
+ uint32_t docIdLimit_in) noexcept
+ : wand::MatchParams(scores_in, scoreThreshold_in, scoresAdjustFrequency_in),
+ thresholdBoostFactor(thresholdBoostFactor_in),
+ docIdLimit(docIdLimit_in)
+ {}
+ MatchParams(WeakAndHeap &scores_in,
+ score_t scoreThreshold_in,
+ double thresholdBoostFactor_in,
+ uint32_t scoresAdjustFrequency_in) noexcept
+ : MatchParams(scores_in, scoreThreshold_in, thresholdBoostFactor_in, scoresAdjustFrequency_in, 0)
{}
- MatchParams &setDocIdLimit(docid_t value) {
- docIdLimit = value;
- return *this;
- }
};
/**
@@ -51,7 +49,7 @@ struct ParallelWeakAndSearch : public SearchIterator
fef::TermFieldMatchData &rootMatchData;
fef::MatchData::UP childrenMatchData;
RankParams(fef::TermFieldMatchData &rootMatchData_,
- fef::MatchData::UP &&childrenMatchData_)
+ fef::MatchData::UP &&childrenMatchData_) noexcept
: rootMatchData(rootMatchData_),
childrenMatchData(std::move(childrenMatchData_))
{}
@@ -68,12 +66,10 @@ struct ParallelWeakAndSearch : public SearchIterator
static SearchIterator::UP createHeapWand(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict);
static SearchIterator::UP create(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict);
- static SearchIterator::UP create(fef::TermFieldMatchData &tmd,
- const MatchParams &matchParams,
+ static SearchIterator::UP create(fef::TermFieldMatchData &tmd, const MatchParams &matchParams,
const std::vector<int32_t> &weights,
const std::vector<IDirectPostingStore::LookupResult> &dict_entries,
- const IDocidWithWeightPostingStore &attr,
- bool strict);
+ const IDocidWithWeightPostingStore &attr, bool strict);
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
index 4e781f8497b..9496090cca3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
@@ -2,10 +2,9 @@
#pragma once
-#include <algorithm>
-#include <cmath>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/features/bm25_feature.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/queryeval/iterator_pack.h>
#include <vespa/searchlib/attribute/posting_iterator_pack.h>
@@ -13,23 +12,40 @@
#include <vespa/vespalib/util/priority_queue.h>
#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <cmath>
+namespace search::queryeval { class WeakAndHeap; }
namespace search::queryeval::wand {
//-----------------------------------------------------------------------------
-struct Term;
-using Terms = std::vector<Term>;
using score_t = int64_t;
using docid_t = uint32_t;
using ref_t = uint16_t;
-using Attr = IDirectPostingStore;
-using AttrDictEntry = Attr::LookupResult;
+const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4;
//-----------------------------------------------------------------------------
/**
+ * Params used to tweak the behavior of the WAND algorithm.
+ */
+struct MatchParams
+{
+ WeakAndHeap &scores;
+ score_t scoreThreshold;
+ const uint32_t scoresAdjustFrequency;
+ MatchParams(WeakAndHeap &scores_in) noexcept
+ : MatchParams(scores_in, 1, DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY)
+ {}
+ MatchParams(WeakAndHeap &scores_in, score_t scoreThreshold_in, uint32_t scoresAdjustFrequency_in) noexcept
+ : scores(scores_in),
+ scoreThreshold(scoreThreshold_in),
+ scoresAdjustFrequency(scoresAdjustFrequency_in)
+ {}
+};
+
+/**
* Wrapper used to specify underlying terms during setup
**/
struct Term {
@@ -46,7 +62,7 @@ struct Term {
Term(SearchIterator *s, int32_t w, uint32_t e) noexcept : Term(s, w, e, nullptr) {}
Term(SearchIterator::UP s, int32_t w, uint32_t e) noexcept : Term(s.release(), w, e, nullptr) {}
};
-
+using Terms = std::vector<Term>;
//-----------------------------------------------------------------------------
// input manipulation utilities
@@ -75,7 +91,7 @@ auto assemble(const F &f, const Order &order)->std::vector<decltype(f(0))> {
}
int32_t get_max_weight(const SearchIterator &search) {
- const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo());
+ const auto *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo());
return (minMax != nullptr) ? minMax->getMaxWeight() : std::numeric_limits<int32_t>::max();
}
@@ -291,7 +307,7 @@ struct VectorizedAttributeTerms : VectorizedState<DocidWithWeightIteratorPack> {
**/
struct DocIdOrder {
const docid_t *termPos;
- explicit DocIdOrder(docid_t *pos) noexcept : termPos(pos) {}
+ explicit DocIdOrder(const docid_t *pos) noexcept : termPos(pos) {}
bool at_end(ref_t ref) const noexcept { return termPos[ref] == search::endDocId; }
docid_t get_pos(ref_t ref) const noexcept { return termPos[ref]; }
bool operator()(ref_t a, ref_t b) const noexcept {
@@ -389,7 +405,7 @@ DualHeap<FutureHeap, PastHeap>::stringify() const {
}
//-----------------------------------------------------------------------------
-#define TermFrequencyScorer_TERM_SCORE_FACTOR 1000000.0
+constexpr double TermFrequencyScorer_TERM_SCORE_FACTOR = 1000000.0;
/**
* Scorer used with WeakAndAlgorithm that calculates a pseudo term frequency
@@ -412,6 +428,38 @@ struct TermFrequencyScorer
}
};
+class Bm25TermFrequencyScorer
+{
+public:
+ using Bm25Executor = features::Bm25Executor;
+ Bm25TermFrequencyScorer(uint32_t num_docs, float range) noexcept
+ : _num_docs(num_docs),
+ _range(range),
+ _max_idf(Bm25Executor::calculate_inverse_document_frequency(1, _num_docs))
+ { }
+ double apply_range(double idf) const noexcept {
+ return (1.0 - _range)*_max_idf + _range * idf;
+ }
+ // weight * scaled_bm25_idf, scaled to fixedpoint
+ score_t calculateMaxScore(double estHits, double weight) const noexcept {
+ return score_t(TermFrequencyScorer_TERM_SCORE_FACTOR * weight *
+ apply_range(Bm25Executor::calculate_inverse_document_frequency(estHits, _num_docs)));
+ }
+
+ score_t calculateMaxScore(const Term &term) const noexcept {
+ return calculateMaxScore(term.estHits, term.weight) + 1;
+ }
+
+ template <typename Input>
+ score_t calculate_max_score(const Input &input, ref_t ref) const noexcept {
+ return calculateMaxScore(input.get_est_hits(ref), input.get_weight(ref)) + 1;
+ }
+private:
+ uint32_t _num_docs;
+ float _range;
+ double _max_idf;
+};
+
//-----------------------------------------------------------------------------
/**
@@ -453,14 +501,14 @@ struct DotProductScorer
// used with parallel wand where we can safely discard hits based on score
struct GreaterThan {
score_t threshold;
- GreaterThan(score_t t) : threshold(t) {}
+ explicit GreaterThan(score_t t) noexcept : threshold(t) {}
bool operator()(score_t score) const { return (score > threshold); }
};
// used with old-style vespa wand to ensure at least AND'ish results
struct GreaterThanEqual {
score_t threshold;
- GreaterThanEqual(score_t t) : threshold(t) {}
+ explicit GreaterThanEqual(score_t t) noexcept : threshold(t) {}
bool operator()(score_t score) const { return (score >= threshold); }
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
index d4b92fd67e6..53ebb33e1ea 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
@@ -4,23 +4,28 @@
namespace search::queryeval {
-SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack) :
+WeakAndPriorityQueue::WeakAndPriorityQueue(uint32_t scoresToTrack) :
WeakAndHeap(scoresToTrack),
- _bestScores(),
- _lock()
-{
- _bestScores.reserve(scoresToTrack);
-}
+ _bestScores()
+{ }
-SharedWeakAndPriorityQueue::~SharedWeakAndPriorityQueue() = default;
+WeakAndPriorityQueue::~WeakAndPriorityQueue() = default;
+
+std::unique_ptr<WeakAndPriorityQueue>
+WeakAndPriorityQueue::createHeap(uint32_t scoresToTrack, bool thread_safe) {
+ if (thread_safe) {
+ return std::make_unique<queryeval::SharedWeakAndPriorityQueue>(scoresToTrack);
+ }
+ return std::make_unique<WeakAndPriorityQueue>(scoresToTrack);
+}
void
-SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
+WeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
{
if (getScoresToTrack() == 0) {
return;
}
- std::lock_guard guard(_lock);
+
for (score_t *itr = begin; itr != end; ++itr) {
score_t score = *itr;
if (!is_full()) {
@@ -35,4 +40,17 @@ SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
}
}
+SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack)
+ : WeakAndPriorityQueue(scoresToTrack),
+ _lock()
+{ }
+
+SharedWeakAndPriorityQueue::~SharedWeakAndPriorityQueue() = default;
+
+void
+SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end) {
+ std::lock_guard guard(_lock);
+ WeakAndPriorityQueue::adjust(begin, end);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
index f1c90f5e6ac..db3ddbc39d3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
@@ -17,13 +17,13 @@ namespace search::queryeval {
class WeakAndHeap {
public:
using score_t = wand::score_t;
- WeakAndHeap(uint32_t scoresToTrack) :
+ explicit WeakAndHeap(uint32_t scoresToTrack) noexcept :
_minScore((scoresToTrack == 0)
? std::numeric_limits<score_t>::max()
: 0),
_scoresToTrack(scoresToTrack)
{ }
- virtual ~WeakAndHeap() {}
+ virtual ~WeakAndHeap() = default;
/**
* Consider the given scores for insertion into the underlying structure.
* The implementation may change the given score array to speed up execution.
@@ -33,11 +33,13 @@ public:
/**
* The number of scores this heap is tracking.
**/
- uint32_t getScoresToTrack() const { return _scoresToTrack; }
+ uint32_t getScoresToTrack() const noexcept { return _scoresToTrack; }
- score_t getMinScore() const { return _minScore.load(std::memory_order_relaxed); }
+ score_t getMinScore() const noexcept { return _minScore.load(std::memory_order_relaxed); }
protected:
- void setMinScore(score_t minScore) { _minScore.store(minScore, std::memory_order_relaxed); }
+ void setMinScore(score_t minScore) noexcept {
+ _minScore.store(minScore, std::memory_order_relaxed);
+ }
private:
std::atomic<score_t> _minScore;
const uint32_t _scoresToTrack;
@@ -47,19 +49,28 @@ private:
* An implementation using an underlying priority queue to keep track of the N
* best hits that can be shared among multiple search iterators.
*/
-class SharedWeakAndPriorityQueue : public WeakAndHeap
+class WeakAndPriorityQueue : public WeakAndHeap
{
private:
using Scores = vespalib::PriorityQueue<score_t>;
Scores _bestScores;
- std::mutex _lock;
- bool is_full() const { return (_bestScores.size() >= getScoresToTrack()); }
+ bool is_full() const noexcept { return (_bestScores.size() >= getScoresToTrack()); }
+public:
+ explicit WeakAndPriorityQueue(uint32_t scoresToTrack);
+ ~WeakAndPriorityQueue() override;
+ Scores &getScores() noexcept { return _bestScores; }
+ void adjust(score_t *begin, score_t *end) override;
+ static std::unique_ptr<WeakAndPriorityQueue> createHeap(uint32_t scoresToTrack, bool thread_safe);
+};
+class SharedWeakAndPriorityQueue final : public WeakAndPriorityQueue
+{
+private:
+ std::mutex _lock;
public:
- SharedWeakAndPriorityQueue(uint32_t scoresToTrack);
- ~SharedWeakAndPriorityQueue();
- Scores &getScores() { return _bestScores; }
+ explicit SharedWeakAndPriorityQueue(uint32_t scoresToTrack);
+ ~SharedWeakAndPriorityQueue() override;
void adjust(score_t *begin, score_t *end) override;
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
index 04b1cb75da4..33dd3e46fe5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
@@ -1,7 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "weak_and_search.h"
-#include "wand_parts.h"
+#include "weak_and_heap.h"
#include <vespa/searchlib/queryeval/orsearch.h>
#include <vespa/vespalib/util/left_right_heap.h>
#include <vespa/vespalib/util/priority_queue.h>
@@ -20,7 +20,8 @@ private:
DualHeap<FutureHeap, PastHeap> _heaps;
Algorithm _algo;
score_t _threshold; // current score threshold
- Scores _scores; // best n scores
+ MatchParams _matchParams;
+ std::vector<score_t> _localScores;
const uint32_t _n;
void seek_strict(uint32_t docid) {
@@ -40,16 +41,24 @@ private:
}
}
}
+ void updateThreshold(score_t newThreshold) {
+ if (newThreshold > _threshold) {
+ _threshold = newThreshold;
+ }
+ }
public:
- WeakAndSearchLR(const Terms &terms, uint32_t n)
- : _terms(terms, TermFrequencyScorer(), 0, {}),
+ template<typename Scorer>
+ WeakAndSearchLR(const Terms &terms, const MatchParams & matchParams, const Scorer & scorer, uint32_t n)
+ : _terms(terms, scorer, 0, {}),
_heaps(DocIdOrder(_terms.docId()), _terms.size()),
_algo(),
- _threshold(1),
- _scores(),
+ _threshold(matchParams.scoreThreshold),
+ _matchParams(matchParams),
+ _localScores(),
_n(n)
{
+ _localScores.reserve(_matchParams.scoresAdjustFrequency);
}
size_t get_num_terms() const override { return _terms.size(); }
int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); }
@@ -57,6 +66,7 @@ public:
const Terms &getTerms() const override { return _terms.input_terms(); }
uint32_t getN() const override { return _n; }
void doSeek(uint32_t docid) override {
+ updateThreshold(_matchParams.scores.getMinScore());
if (IS_STRICT) {
seek_strict(docid);
} else {
@@ -65,12 +75,11 @@ public:
}
void doUnpack(uint32_t docid) override {
_algo.find_matching_terms(_terms, _heaps);
- _scores.push(_algo.get_upper_bound());
- if (_scores.size() > _n) {
- _scores.pop_front();
- }
- if (_scores.size() == _n) {
- _threshold = _scores.front();
+ score_t score = _algo.get_upper_bound();
+ _localScores.push_back(score);
+ if (_localScores.size() == _matchParams.scoresAdjustFrequency) {
+ _matchParams.scores.adjust(&_localScores[0], &_localScores[0] + _localScores.size());
+ _localScores.clear();
}
ref_t *end = _heaps.present_end();
for (ref_t *ref = _heaps.present_begin(); ref != end; ++ref) {
@@ -102,36 +111,51 @@ WeakAndSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
//-----------------------------------------------------------------------------
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::createArrayWand(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::createArrayWand(const Terms &terms, const MatchParams & params,
+ const Scorer & scorer, uint32_t n, bool strict)
{
if (strict) {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, params, scorer, n);
} else {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, params, scorer, n);
}
}
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::createHeapWand(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::createHeapWand(const Terms &terms, const MatchParams & params, const Scorer & scorer, uint32_t n, bool strict)
{
if (strict) {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, params, scorer, n);
} else {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, params, scorer, n);
}
}
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::create(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::create(const Terms &terms, const MatchParams & params, const Scorer & scorer, uint32_t n, bool strict)
{
if (terms.size() < 128) {
- return createArrayWand(terms, n, strict);
+ return createArrayWand(terms, params, scorer, n, strict);
} else {
- return createHeapWand(terms, n, strict);
+ return createHeapWand(terms, params, scorer, n, strict);
}
}
+SearchIterator::UP
+WeakAndSearch::create(const Terms &terms, const MatchParams & params, uint32_t n, bool strict)
+{
+ return create(terms, params, wand::TermFrequencyScorer(), n, strict);
+}
+
//-----------------------------------------------------------------------------
+template SearchIterator::UP WeakAndSearch::create<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::create<wand::Bm25TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::Bm25TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::createArrayWand<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::createHeapWand<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
index 6a56a04887c..30292af24ab 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
@@ -9,15 +9,24 @@ namespace search::queryeval {
struct WeakAndSearch : SearchIterator {
using Terms = wand::Terms;
+ using MatchParams = wand::MatchParams;
virtual size_t get_num_terms() const = 0;
virtual int32_t get_term_weight(size_t idx) const = 0;
virtual wand::score_t get_max_score(size_t idx) const = 0;
virtual const Terms &getTerms() const = 0;
virtual uint32_t getN() const = 0;
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
- static SearchIterator::UP createArrayWand(const Terms &terms, uint32_t n, bool strict);
- static SearchIterator::UP createHeapWand(const Terms &terms, uint32_t n, bool strict);
- static SearchIterator::UP create(const Terms &terms, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP createArrayWand(const Terms &terms, const MatchParams & matchParams,
+ const Scorer & scorer, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP createHeapWand(const Terms &terms, const MatchParams & matchParams,
+ const Scorer & scorer, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP create(const Terms &terms, const MatchParams & matchParams,
+ const Scorer & scorer, uint32_t n, bool strict);
+ static SearchIterator::UP create(const Terms &terms, const MatchParams & matchParams,
+ uint32_t n, bool strict);
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index af99260979d..d1ebb1f4e4e 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -9,6 +9,7 @@
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
using vespalib::eval::TypedCells;
+using vespalib::eval::Int8Float;
namespace search::tensor {
@@ -26,16 +27,16 @@ public:
_lhs(_tmpSpace.storeLhs(lhs))
{
auto a = _lhs.data();
- _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
+ _lhs_norm_sq = _computer.dotProduct(cast(a), cast(a), lhs.size);
}
double calc(TypedCells rhs) const noexcept override {
size_t sz = _lhs.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
auto a = _lhs.data();
auto b = rhs_vector.data();
- double b_norm_sq = _computer.dotProduct(b, b, sz);
+ double b_norm_sq = _computer.dotProduct(cast(b), cast(b), sz);
double squared_norms = _lhs_norm_sq * b_norm_sq;
- double dot_product = _computer.dotProduct(a, b, sz);
+ double dot_product = _computer.dotProduct(cast(a), cast(b), sz);
double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
double cosine_similarity = dot_product / div;
double distance = 1.0 - cosine_similarity; // in range [0,2]
@@ -70,19 +71,20 @@ template class BoundAngularDistance<double>;
template <typename FloatType>
BoundDistanceFunction::UP
-AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
using DFT = BoundAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
using DFT = BoundAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template class AngularDistanceFunctionFactory<float>;
template class AngularDistanceFunctionFactory<double>;
+template class AngularDistanceFunctionFactory<Int8Float>;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
index 5e0a060e060..aa51f58b3cd 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -15,8 +15,8 @@ template <typename FloatType>
class AngularDistanceFunctionFactory : public DistanceFunctionFactory {
public:
AngularDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
index 85089196a7a..318271835ad 100644
--- a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
@@ -19,6 +19,7 @@ class BoundDistanceFunction : public DistanceConverter {
public:
using UP = std::unique_ptr<BoundDistanceFunction>;
using TypedCells = vespalib::eval::TypedCells;
+ using Int8Float = vespalib::eval::Int8Float;
BoundDistanceFunction() noexcept = default;
@@ -29,6 +30,10 @@ public:
// calculate internal distance, early return allowed if > limit
virtual double calc_with_limit(TypedCells rhs, double limit) const noexcept = 0;
+protected:
+ static const double *cast(const double * p) { return p; }
+ static const float *cast(const float * p) { return p; }
+ static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index ed08df5866e..f39994dfdcf 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -6,6 +6,7 @@
using search::attribute::DistanceMetric;
using vespalib::eval::CellType;
+using vespalib::eval::Int8Float;
namespace search::tensor {
@@ -16,25 +17,27 @@ make_distance_function_factory(DistanceMetric variant, CellType cell_type)
case DistanceMetric::Angular:
switch (cell_type) {
case CellType::DOUBLE: return std::make_unique<AngularDistanceFunctionFactory<double>>();
+ case CellType::INT8: return std::make_unique<AngularDistanceFunctionFactory<Int8Float>>();
default: return std::make_unique<AngularDistanceFunctionFactory<float>>();
}
case DistanceMetric::Euclidean:
switch (cell_type) {
- case CellType::DOUBLE: return std::make_unique<EuclideanDistanceFunctionFactory<double>>();
- case CellType::INT8: return std::make_unique<EuclideanDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+ case CellType::DOUBLE: return std::make_unique<EuclideanDistanceFunctionFactory<double>>();
+ case CellType::INT8: return std::make_unique<EuclideanDistanceFunctionFactory<Int8Float>>();
case CellType::BFLOAT16: return std::make_unique<EuclideanDistanceFunctionFactory<vespalib::BFloat16>>();
- default: return std::make_unique<EuclideanDistanceFunctionFactory<float>>();
+ default: return std::make_unique<EuclideanDistanceFunctionFactory<float>>();
}
case DistanceMetric::InnerProduct:
case DistanceMetric::PrenormalizedAngular:
switch (cell_type) {
case CellType::DOUBLE: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<double>>();
+ case CellType::INT8: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<Int8Float>>();
default: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>();
}
case DistanceMetric::Dotproduct:
switch (cell_type) {
case CellType::DOUBLE: return std::make_unique<MipsDistanceFunctionFactory<double>>();
- case CellType::INT8: return std::make_unique<MipsDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+ case CellType::INT8: return std::make_unique<MipsDistanceFunctionFactory<Int8Float>>();
default: return std::make_unique<MipsDistanceFunctionFactory<float>>();
}
case DistanceMetric::GeoDegrees:
@@ -42,7 +45,7 @@ make_distance_function_factory(DistanceMetric variant, CellType cell_type)
case DistanceMetric::Hamming:
switch (cell_type) {
case CellType::DOUBLE: return std::make_unique<HammingDistanceFunctionFactory<double>>();
- case CellType::INT8: return std::make_unique<HammingDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+ case CellType::INT8: return std::make_unique<HammingDistanceFunctionFactory<Int8Float>>();
default: return std::make_unique<HammingDistanceFunctionFactory<float>>();
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
index 356366d6a77..3b0a0ac91fd 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
@@ -17,8 +17,8 @@ struct DistanceFunctionFactory {
using TypedCells = vespalib::eval::TypedCells;
DistanceFunctionFactory() noexcept = default;
virtual ~DistanceFunctionFactory() = default;
- virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) = 0;
- virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) = 0;
+ virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const = 0;
+ virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const = 0;
using UP = std::unique_ptr<DistanceFunctionFactory>;
};
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index 3ab3a1123eb..62b92b43ad9 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -16,14 +16,11 @@ using vespalib::BFloat16;
template<typename AttributeCellType>
class BoundEuclideanDistance final : public BoundDistanceFunction {
- using FloatType = std::conditional_t<std::is_same<AttributeCellType,BFloat16>::value,float,AttributeCellType>;
+ using FloatType = std::conditional_t<std::is_same<AttributeCellType, BFloat16>::value, float, AttributeCellType>;
private:
const vespalib::hwaccelrated::IAccelrated & _computer;
mutable TemporaryVectorStore<FloatType> _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs_vector;
- static const double *cast(const double * p) { return p; }
- static const float *cast(const float * p) { return p; }
- static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
public:
explicit BoundEuclideanDistance(TypedCells lhs)
: _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()),
@@ -44,16 +41,8 @@ public:
double score = 1.0 / (1.0 + d);
return score;
}
- double calc_with_limit(TypedCells rhs, double limit) const noexcept override {
- vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.typify<AttributeCellType>();
- double sum = 0.0;
- size_t sz = _lhs_vector.size();
- assert(sz == rhs_vector.size());
- for (size_t i = 0; i < sz && sum <= limit; ++i) {
- double diff = _lhs_vector[i] - rhs_vector[i];
- sum += diff*diff;
- }
- return sum;
+ double calc_with_limit(TypedCells rhs, double) const noexcept override {
+ return calc(rhs);
}
};
@@ -64,14 +53,14 @@ template class BoundEuclideanDistance<double>;
template <typename FloatType>
BoundDistanceFunction::UP
-EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
using DFT = BoundEuclideanDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
using DFT = BoundEuclideanDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
index 8c39a12bf86..78460c93307 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
@@ -15,8 +15,8 @@ template <typename FloatType>
class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory {
public:
EuclideanDistanceFunctionFactory() noexcept = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
index f5484f40271..a8a48ae4116 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
@@ -82,12 +82,12 @@ public:
};
BoundDistanceFunction::UP
-GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) {
+GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) const {
return std::make_unique<BoundGeoDistance>(lhs);
}
BoundDistanceFunction::UP
-GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) {
+GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) const {
return std::make_unique<BoundGeoDistance>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
index 1464898421b..a85e31e8ecc 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
@@ -14,8 +14,8 @@ namespace search::tensor {
class GeoDistanceFunctionFactory : public DistanceFunctionFactory {
public:
GeoDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
index 7f29a100492..7ea2e440a51 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -49,14 +49,14 @@ public:
template <typename FloatType>
BoundDistanceFunction::UP
-HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
using DFT = BoundHammingDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
using DFT = BoundHammingDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
index 6e7f96e1e2f..2e3b75cc61f 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -16,8 +16,8 @@ template <typename FloatType>
class HammingDistanceFunctionFactory : public DistanceFunctionFactory {
public:
HammingDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 1db688156e0..b542c422f50 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -672,15 +672,18 @@ HnswIndex<type>::mutual_reconnect(const LinkArrayRef &cluster, uint32_t level)
std::vector<PairDist> pairs;
for (uint32_t i = 0; i + 1 < cluster.size(); ++i) {
uint32_t n_id_1 = cluster[i];
+ TypedCells n_cells_1 = get_vector(n_id_1);
+ if (n_cells_1.non_existing_attribute_value()) [[unlikely]] continue;
LinkArrayRef n_list_1 = _graph.get_link_array(n_id_1, level);
- std::unique_ptr<BoundDistanceFunction> df;
+ std::unique_ptr<BoundDistanceFunction> df = _distance_ff->for_insertion_vector(n_cells_1);
for (uint32_t j = i + 1; j < cluster.size(); ++j) {
uint32_t n_id_2 = cluster[j];
- if (has_link_to(n_list_1, n_id_2)) continue;
- if (!df) {
- df = _distance_ff->for_insertion_vector(get_vector(n_id_1));
+ if ( ! has_link_to(n_list_1, n_id_2)) {
+ auto n_cells_2 = get_vector(n_id_2);
+ if (!n_cells_2.non_existing_attribute_value()) {
+ pairs.emplace_back(n_id_1, n_id_2, df->calc(n_cells_2));
+ }
}
- pairs.emplace_back(n_id_1, n_id_2, calc_distance(*df, n_id_2));
}
}
std::sort(pairs.begin(), pairs.end());
@@ -1120,6 +1123,32 @@ HnswIndex<type>::count_reachable_nodes() const
return {found_cnt, true};
}
+template <HnswIndexType type>
+uint32_t
+HnswIndex<type>::get_subspaces(uint32_t docid) const noexcept
+{
+ if constexpr (type == HnswIndexType::SINGLE) {
+ return (docid < _graph.nodes.get_size() && _graph.nodes.get_elem_ref(docid).levels_ref().load_relaxed().valid()) ? 1 : 0;
+ } else {
+ return _id_mapping.get_ids(docid).size();
+ }
+}
+
+template <HnswIndexType type>
+uint32_t
+HnswIndex<type>::check_consistency(uint32_t docid_limit) const noexcept
+{
+ uint32_t inconsistencies = 0;
+ for (uint32_t docid = 1; docid < docid_limit; ++docid) {
+ auto index_subspaces = get_subspaces(docid);
+ auto store_subspaces = get_vectors(docid).subspaces();
+ if (index_subspaces != store_subspaces) {
+ ++inconsistencies;
+ }
+ }
+ return inconsistencies;
+}
+
template class HnswIndex<HnswIndexType::SINGLE>;
template class HnswIndex<HnswIndexType::MULTI>;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 616140f426f..4d4440c1bcb 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -193,6 +193,9 @@ protected:
LinkArray filter_valid_nodeids(uint32_t level, const internal::PreparedAddNode::Links &neighbors, uint32_t self_nodeid);
void internal_complete_add(uint32_t docid, internal::PreparedAddDoc &op);
void internal_complete_add_node(uint32_t nodeid, uint32_t docid, uint32_t subspace, internal::PreparedAddNode &prepared_node);
+
+ // Called from writer only.
+ uint32_t get_subspaces(uint32_t docid) const noexcept;
public:
HnswIndex(const DocVectorAccess& vectors, DistanceFunctionFactory::UP distance_ff,
RandomLevelGenerator::UP level_generator, const HnswIndexConfig& cfg);
@@ -248,6 +251,9 @@ public:
uint32_t get_active_nodes() const noexcept { return _graph.get_active_nodes(); }
+ // Called from writer only.
+ uint32_t check_consistency(uint32_t docid_limit) const noexcept override;
+
// Should only be used by unit tests.
HnswTestNode get_node(uint32_t nodeid) const;
void set_node(uint32_t nodeid, const HnswTestNode &node);
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
index c42242d8dc8..5bc727ebd97 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
@@ -19,9 +19,6 @@ class BoundMipsDistanceFunction final : public BoundDistanceFunction {
using ExtraDimT = std::conditional_t<extra_dim,double,std::monostate>;
[[no_unique_address]] ExtraDimT _lhs_extra_dim;
- static const double *cast(const double * p) { return p; }
- static const float *cast(const float * p) { return p; }
- static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
public:
BoundMipsDistanceFunction(TypedCells lhs, MaximumSquaredNormStore& sq_norm_store)
: BoundDistanceFunction(),
@@ -76,13 +73,13 @@ public:
template<typename FloatType>
BoundDistanceFunction::UP
-MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
return std::make_unique<BoundMipsDistanceFunction<FloatType, false>>(lhs, *_sq_norm_store);
}
template<typename FloatType>
BoundDistanceFunction::UP
-MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
return std::make_unique<BoundMipsDistanceFunction<FloatType, true>>(lhs, *_sq_norm_store);
};
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
index 67a6eb58de0..336511ab78f 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
@@ -62,8 +62,8 @@ public:
MipsDistanceFunctionFactory() noexcept = default;
~MipsDistanceFunctionFactory() override = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index 8462ff05eca..c2bbd17ce63 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -114,6 +114,12 @@ public:
double distance_threshold) const = 0;
virtual DistanceFunctionFactory &distance_function_factory() const = 0;
+
+ /*
+ * Used when checking consistency during load.
+ * Called from writer only.
+ */
+ virtual uint32_t check_consistency(uint32_t docid_limit) const noexcept = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
index 4bc90001227..6f0966e7fb3 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
@@ -6,6 +6,7 @@
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
+using vespalib::eval::Int8Float;
namespace search::tensor {
@@ -23,7 +24,7 @@ public:
_lhs(_tmpSpace.storeLhs(lhs))
{
auto a = _lhs.data();
- _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
+ _lhs_norm_sq = _computer.dotProduct(cast(a), cast(a), lhs.size);
if (_lhs_norm_sq <= 0.0) {
_lhs_norm_sq = 1.0;
}
@@ -32,7 +33,7 @@ public:
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
auto a = _lhs.data();
auto b = rhs_vector.data();
- double dot_product = _computer.dotProduct(a, b, _lhs.size());
+ double dot_product = _computer.dotProduct(cast(a), cast(b), _lhs.size());
double distance = _lhs_norm_sq - dot_product;
return distance;
}
@@ -62,19 +63,20 @@ template class BoundPrenormalizedAngularDistance<double>;
template <typename FloatType>
BoundDistanceFunction::UP
-PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
using DFT = BoundPrenormalizedAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
using DFT = BoundPrenormalizedAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template class PrenormalizedAngularDistanceFunctionFactory<float>;
template class PrenormalizedAngularDistanceFunctionFactory<double>;
+template class PrenormalizedAngularDistanceFunctionFactory<Int8Float>;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
index 7e3a8c2c676..6a791e0b6ec 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
@@ -14,8 +14,8 @@ template <typename FloatType>
class PrenormalizedAngularDistanceFunctionFactory : public DistanceFunctionFactory {
public:
PrenormalizedAngularDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
index 4753e9d7c87..097ea67cc9e 100644
--- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
@@ -1,11 +1,13 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "temporary_vector_store.h"
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
using vespalib::ConstArrayRef;
using vespalib::ArrayRef;
using vespalib::eval::CellType;
using vespalib::eval::TypedCells;
+using vespalib::hwaccelrated::IAccelrated;
namespace search::tensor {
@@ -13,18 +15,29 @@ namespace {
template<typename FromType, typename ToType>
ConstArrayRef<ToType>
+convert_cells(ArrayRef<ToType> space, TypedCells cells) noexcept __attribute__((noinline));
+
+template<typename FromType, typename ToType>
+ConstArrayRef<ToType>
convert_cells(ArrayRef<ToType> space, TypedCells cells) noexcept
{
- assert(cells.size == space.size());
- auto old_cells = cells.typify<FromType>();
+ auto old_cells = cells.unsafe_typify<FromType>();
ToType *p = space.data();
for (FromType value : old_cells) {
- ToType conv(value);
- *p++ = conv;
+ *p++ = static_cast<ToType>(value);
}
return space;
}
+template<>
+ConstArrayRef<float>
+convert_cells<vespalib::BFloat16, float>(ArrayRef<float> space, TypedCells cells) noexcept
+{
+ static const IAccelrated & accelerator = IAccelrated::getAccelerator();
+ accelerator.convert_bfloat16_to_float(reinterpret_cast<const uint16_t *>(cells.data), space.data(), space.size());
+ return space;
+}
+
template <typename ToType>
struct ConvertCellsSelector
{
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
index a5d670096ab..9f551166a1d 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
@@ -418,18 +418,25 @@ TensorAttribute::complete_set_tensor(DocId docid, const vespalib::eval::Value& t
std::unique_ptr<PrepareResult> prepare_result)
{
if (_index && !prepare_result) {
- // The tensor cells are unchanged
- if (!_is_dense) {
- // but labels might have changed.
- EntryRef ref = _tensorStore.store_tensor(tensor);
- assert(ref.valid());
- setTensorRef(docid, ref);
+ VectorBundle vectors(tensor.cells().data, tensor.index().size(), _subspace_type);
+ if (tensor_cells_are_unchanged(docid, vectors)) {
+ // The tensor cells are unchanged
+ if (!_is_dense) {
+ // but labels might have changed.
+ EntryRef ref = _tensorStore.store_tensor(tensor);
+ assert(ref.valid());
+ setTensorRef(docid, ref);
+ }
+ return;
}
- return;
}
internal_set_tensor(docid, tensor);
if (_index) {
- _index->complete_add_document(docid, std::move(prepare_result));
+ if (prepare_result) {
+ _index->complete_add_document(docid, std::move(prepare_result));
+ } else {
+ _index->add_document(docid);
+ }
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
index 28c4099c38b..223c9d7d1f2 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
@@ -322,6 +322,9 @@ TensorAttributeLoader::on_load(vespalib::Executor* executor)
if (!load_index()) {
return false;
}
+ if (dense_store == nullptr) {
+ check_consistency(docid_limit);
+ }
} else {
build_index(executor, docid_limit);
}
@@ -329,4 +332,15 @@ TensorAttributeLoader::on_load(vespalib::Executor* executor)
return true;
}
+void
+TensorAttributeLoader::check_consistency(uint32_t docid_limit)
+{
+ auto before = vespalib::steady_clock::now();
+ uint32_t inconsistencies = _index->check_consistency(docid_limit);
+ auto after = vespalib::steady_clock::now();
+ double elapsed = vespalib::to_s(after - before);
+ LOG(info, "%u inconsistencies detected after loading index for attribute %s, (check used %6.3fs)",
+ inconsistencies, _attr.getName().c_str(), elapsed);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
index 6bf68957adc..59baaf0b6dc 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
@@ -34,6 +34,7 @@ class TensorAttributeLoader {
void load_tensor_store(search::attribute::BlobSequenceReader& reader, uint32_t docid_limit);
void build_index(vespalib::Executor* executor, uint32_t docid_limit);
bool load_index();
+ void check_consistency(uint32_t docid_limit);
public:
TensorAttributeLoader(TensorAttribute& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index);
diff --git a/searchlib/src/vespa/searchlib/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
index 83e185dbfb6..4685ad07808 100644
--- a/searchlib/src/vespa/searchlib/test/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
@@ -14,6 +14,7 @@ vespa_add_library(searchlib_test
schema_builder.cpp
string_field_builder.cpp
vector_buffer_writer.cpp
+ weightedchildrenverifiers.cpp
$<TARGET_OBJECTS:searchlib_test_fakedata>
$<TARGET_OBJECTS:searchlib_searchlib_test_diskindex>
$<TARGET_OBJECTS:searchlib_test_gtest_migration>
diff --git a/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.cpp b/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.cpp
new file mode 100644
index 00000000000..b22dd1a3aa9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.cpp
@@ -0,0 +1,71 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "weightedchildrenverifiers.h"
+
+using search::queryeval::SearchIterator;
+
+namespace search::test {
+
+WeightedChildrenVerifier::WeightedChildrenVerifier()
+ : _weights(_num_children, 1)
+{ }
+WeightedChildrenVerifier::~WeightedChildrenVerifier() = default;
+
+
+IteratorChildrenVerifier::IteratorChildrenVerifier()
+ : WeightedChildrenVerifier(),
+ _split_lists(_num_children)
+{
+ auto full_list = getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ _split_lists[i % _num_children].push_back(full_list[i]);
+ }
+}
+IteratorChildrenVerifier::~IteratorChildrenVerifier() = default;
+
+SearchIterator::UP
+IteratorChildrenVerifier::create(bool strict) const {
+ (void) strict;
+ std::vector<SearchIterator*> children;
+ for (size_t i = 0; i < _num_children; ++i) {
+ children.push_back(createIterator(_split_lists[i], true).release());
+ }
+ return create(children);
+}
+
+SearchIterator::UP
+IteratorChildrenVerifier::create(const std::vector<SearchIterator*> &children) const {
+ (void) children;
+ return {};
+}
+
+
+DwwIteratorChildrenVerifier::DwwIteratorChildrenVerifier()
+ : WeightedChildrenVerifier(),
+ _helper()
+{
+ _helper.add_docs(getDocIdLimit());
+ auto full_list = getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ _helper.set_doc(full_list[i], i % _num_children, 1);
+ }
+}
+DwwIteratorChildrenVerifier::~DwwIteratorChildrenVerifier() = default;
+
+SearchIterator::UP
+DwwIteratorChildrenVerifier::create(bool strict) const {
+ (void) strict;
+ std::vector<DocidWithWeightIterator> children;
+ for (size_t i = 0; i < _num_children; ++i) {
+ auto dict_entry = _helper.dww().lookup(vespalib::make_string("%zu", i).c_str(), _helper.dww().get_dictionary_snapshot());
+ _helper.dww().create(dict_entry.posting_idx, children);
+ }
+ return create(std::move(children));
+}
+SearchIterator::UP
+DwwIteratorChildrenVerifier::create(std::vector<DocidWithWeightIterator> &&) const {
+ return {};
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h b/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h
index 86d2fb9aa67..037d1086950 100644
--- a/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h
+++ b/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h
@@ -8,11 +8,8 @@ namespace search::test {
class WeightedChildrenVerifier : public SearchIteratorVerifier {
public:
- WeightedChildrenVerifier()
- : _weights(_num_children, 1)
- { }
- ~WeightedChildrenVerifier() override {}
-
+ WeightedChildrenVerifier();
+ ~WeightedChildrenVerifier() override;
protected:
static constexpr size_t _num_children = 7;
mutable fef::TermFieldMatchData _tfmd;
@@ -21,58 +18,21 @@ protected:
class IteratorChildrenVerifier : public WeightedChildrenVerifier {
public:
- IteratorChildrenVerifier()
- : WeightedChildrenVerifier(),
- _split_lists(_num_children)
- {
- auto full_list = getExpectedDocIds();
- for (size_t i = 0; i < full_list.size(); ++i) {
- _split_lists[i % _num_children].push_back(full_list[i]);
- }
- }
- ~IteratorChildrenVerifier() override { }
- SearchIterator::UP create(bool strict) const override {
- (void) strict;
- std::vector<SearchIterator*> children;
- for (size_t i = 0; i < _num_children; ++i) {
- children.push_back(createIterator(_split_lists[i], true).release());
- }
- return create(children);
- }
+ IteratorChildrenVerifier();
+ ~IteratorChildrenVerifier() override;
+ SearchIterator::UP create(bool strict) const override;
protected:
- virtual SearchIterator::UP create(const std::vector<SearchIterator*> &children) const {
- (void) children;
- return SearchIterator::UP();
- }
+ virtual SearchIterator::UP create(const std::vector<SearchIterator*> &children) const;
std::vector<DocIds> _split_lists;
};
class DwwIteratorChildrenVerifier : public WeightedChildrenVerifier {
public:
- DwwIteratorChildrenVerifier() :
- WeightedChildrenVerifier(),
- _helper()
- {
- _helper.add_docs(getDocIdLimit());
- auto full_list = getExpectedDocIds();
- for (size_t i = 0; i < full_list.size(); ++i) {
- _helper.set_doc(full_list[i], i % _num_children, 1);
- }
- }
- ~DwwIteratorChildrenVerifier() override {}
- SearchIterator::UP create(bool strict) const override {
- (void) strict;
- std::vector<DocidWithWeightIterator> children;
- for (size_t i = 0; i < _num_children; ++i) {
- auto dict_entry = _helper.dww().lookup(vespalib::make_string("%zu", i).c_str(), _helper.dww().get_dictionary_snapshot());
- _helper.dww().create(dict_entry.posting_idx, children);
- }
- return create(std::move(children));
- }
+ DwwIteratorChildrenVerifier();
+ ~DwwIteratorChildrenVerifier() override;
+ SearchIterator::UP create(bool strict) const override;
protected:
- virtual SearchIterator::UP create(std::vector<DocidWithWeightIterator> &&) const {
- return {};
- }
+ virtual SearchIterator::UP create(std::vector<DocidWithWeightIterator> &&) const;
DocumentWeightAttributeHelper _helper;
};
diff --git a/searchlib/src/vespa/searchlib/util/token_extractor.cpp b/searchlib/src/vespa/searchlib/util/token_extractor.cpp
index a78f30afe21..6e1573c4551 100644
--- a/searchlib/src/vespa/searchlib/util/token_extractor.cpp
+++ b/searchlib/src/vespa/searchlib/util/token_extractor.cpp
@@ -143,8 +143,6 @@ TokenExtractor::extract(std::vector<SpanTerm>& terms, const document::StringFiel
{
auto tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
if (tree == nullptr) {
- /* field might not be annotated if match type is exact */
- consider_word(terms, text, Span(0, text.size()), nullptr, doc);
return;
}
for (const Annotation & annotation : *tree) {