aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@vespa.ai>2024-04-16 20:43:25 +0200
committerGitHub <noreply@github.com>2024-04-16 20:43:25 +0200
commit91510296ab4fad61f3755de09cccd2b1bb9fc562 (patch)
treea0f38da275439888bcc82f06e8b7f7a2d74e0285
parentd3c2f37d6565bbb5ba671a16125f61d2a453317b (diff)
parentd4885cda1fee09b8c2828f27a6d06d177a9b8bec (diff)
Merge pull request #30935 from vespa-engine/geirst/adjust-bitvector-strict-cost
Adjust strict cost of bitvector after benchmarking.
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp10
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/common.cpp6
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h11
4 files changed, 41 insertions, 5 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp
index 87004d7e5f2..0c986422be6 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp
@@ -2,10 +2,11 @@
#include "lid_allocator.h"
#include <vespa/searchlib/common/bitvectoriterator.h>
-#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchlib/fef/matchdata.h>
-#include <vespa/searchlib/queryeval/full_search.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/flow_tuning.h>
+#include <vespa/searchlib/queryeval/full_search.h>
#include <mutex>
#include <vespa/log/log.h>
@@ -19,6 +20,8 @@ using search::queryeval::SearchIterator;
using search::queryeval::SimpleLeafBlueprint;
using vespalib::GenerationHolder;
+using namespace search::queryeval::flow;
+
namespace proton::documentmetastore {
LidAllocator::LidAllocator(uint32_t size,
@@ -206,7 +209,8 @@ private:
return search::BitVectorIterator::create(&_activeLids, get_docid_limit(), *tfmd, strict);
}
FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
- return default_flow_stats(docid_limit, _activeLids.size(), 0);
+ double rel_est = abs_to_rel_est(_activeLids.size(), docid_limit);
+ return {rel_est, bitvector_cost(), bitvector_strict_cost(rel_est)};
}
SearchIterator::UP
createLeafSearch(const TermFieldMatchDataArray &tfmda) const override
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp
index d2b5ec2cb8b..1db9cd58d46 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp
@@ -20,7 +20,11 @@ to_string(const Config& attr_config)
oss << col_type.asString() << "<" << basic_type.asString() << ">";
}
if (attr_config.fastSearch()) {
- oss << "(fs)";
+ oss << "(fs";
+ if (attr_config.getIsFilter()) {
+ oss << ",rf";
+ }
+ oss << ")";
}
return oss.str();
}
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index d162ef05b06..f7a358efb26 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -792,10 +792,11 @@ gen_ratios(double middle, double range_multiplier, size_t num_samples)
}
FieldConfig
-make_attr_config(BasicType basic_type, CollectionType col_type, bool fast_search)
+make_attr_config(BasicType basic_type, CollectionType col_type, bool fast_search, bool rank_filter = false)
{
Config cfg(basic_type, col_type);
cfg.setFastSearch(fast_search);
+ cfg.setIsFilter(rank_filter);
return FieldConfig(cfg);
}
@@ -812,6 +813,7 @@ const std::vector<double> base_hit_ratios = {0.0001, 0.001, 0.01, 0.1, 0.5, 1.0}
const std::vector<double> filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0};
const auto int32 = make_attr_config(BasicType::INT32, CollectionType::SINGLE, false);
const auto int32_fs = make_attr_config(BasicType::INT32, CollectionType::SINGLE, true);
+const auto int32_fs_rf = make_attr_config(BasicType::INT32, CollectionType::SINGLE, true, true);
const auto int32_array = make_attr_config(BasicType::INT32, CollectionType::ARRAY, false);
const auto int32_array_fs = make_attr_config(BasicType::INT32, CollectionType::ARRAY, true);
const auto int32_wset = make_attr_config(BasicType::INT32, CollectionType::WSET, false);
@@ -940,6 +942,15 @@ TEST(IteratorBenchmark, analyze_and_with_filter_vs_in)
}
}
+TEST(IteratorBenchmark, analyze_and_with_bitvector_vs_in)
+{
+ for (uint32_t children: {10, 100, 1000, 10000}) {
+ run_and_benchmark({int32_fs, QueryOperator::In, {0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60}, children, true},
+ {int32_fs_rf, QueryOperator::Term, {1.0}, 1, true}, // this setup returns a bitvector matching all documents.
+ num_docs);
+ }
+}
+
TEST(IteratorBenchmark, analyze_and_with_filter_vs_in_array)
{
for (uint32_t children: {10, 100, 1000}) {
@@ -958,6 +969,12 @@ TEST(IteratorBenchmark, analyze_and_with_filter_vs_or)
}
}
+TEST(IteratorBenchmark, analyze_btree_vs_bitvector_iterators_strict)
+{
+ BenchmarkSetup setup(num_docs, {int32_fs, int32_fs_rf}, {QueryOperator::Term}, {true}, {0.1, 0.2, 0.4, 0.5, 0.6, 0.8, 1.0}, {1});
+ run_benchmarks(setup);
+}
+
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
int res = RUN_ALL_TESTS();
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index dae0bd82cd0..356ecd4c992 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -61,6 +61,17 @@ inline double btree_strict_cost(double my_est) {
return my_est;
}
+// Non-strict cost of matching in a bitvector.
+inline double bitvector_cost() {
+ return 1.0;
+}
+
+// Strict cost of matching in a bitvector.
+// Test used: IteratorBenchmark::analyze_btree_vs_bitvector_iterators_strict
+inline double bitvector_strict_cost(double my_est) {
+ return 1.5 * my_est;
+}
+
// Non-strict cost of matching in a disk index posting list.
inline double disk_index_cost() {
return 1.5;