diff options
author | Geir Storli <geirst@vespa.ai> | 2024-04-16 20:43:25 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-16 20:43:25 +0200 |
commit | 91510296ab4fad61f3755de09cccd2b1bb9fc562 (patch) | |
tree | a0f38da275439888bcc82f06e8b7f7a2d74e0285 | |
parent | d3c2f37d6565bbb5ba671a16125f61d2a453317b (diff) | |
parent | d4885cda1fee09b8c2828f27a6d06d177a9b8bec (diff) |
Merge pull request #30935 from vespa-engine/geirst/adjust-bitvector-strict-cost
Adjust strict cost of bitvector after benchmarking.
4 files changed, 41 insertions, 5 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp index 87004d7e5f2..0c986422be6 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp @@ -2,10 +2,11 @@ #include "lid_allocator.h" #include <vespa/searchlib/common/bitvectoriterator.h> -#include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/searchlib/fef/matchdata.h> -#include <vespa/searchlib/queryeval/full_search.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/flow_tuning.h> +#include <vespa/searchlib/queryeval/full_search.h> #include <mutex> #include <vespa/log/log.h> @@ -19,6 +20,8 @@ using search::queryeval::SearchIterator; using search::queryeval::SimpleLeafBlueprint; using vespalib::GenerationHolder; +using namespace search::queryeval::flow; + namespace proton::documentmetastore { LidAllocator::LidAllocator(uint32_t size, @@ -206,7 +209,8 @@ private: return search::BitVectorIterator::create(&_activeLids, get_docid_limit(), *tfmd, strict); } FlowStats calculate_flow_stats(uint32_t docid_limit) const override { - return default_flow_stats(docid_limit, _activeLids.size(), 0); + double rel_est = abs_to_rel_est(_activeLids.size(), docid_limit); + return {rel_est, bitvector_cost(), bitvector_strict_cost(rel_est)}; } SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda) const override diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp index d2b5ec2cb8b..1db9cd58d46 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/common.cpp @@ -20,7 +20,11 @@ to_string(const Config& attr_config) oss << col_type.asString() << "<" << basic_type.asString() << ">"; } if (attr_config.fastSearch()) { - oss << "(fs)"; + oss << "(fs"; + if (attr_config.getIsFilter()) { + oss << ",rf"; + } + oss << ")"; } return oss.str(); } diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index d162ef05b06..f7a358efb26 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -792,10 +792,11 @@ gen_ratios(double middle, double range_multiplier, size_t num_samples) } FieldConfig -make_attr_config(BasicType basic_type, CollectionType col_type, bool fast_search) +make_attr_config(BasicType basic_type, CollectionType col_type, bool fast_search, bool rank_filter = false) { Config cfg(basic_type, col_type); cfg.setFastSearch(fast_search); + cfg.setIsFilter(rank_filter); return FieldConfig(cfg); } @@ -812,6 +813,7 @@ const std::vector<double> base_hit_ratios = {0.0001, 0.001, 0.01, 0.1, 0.5, 1.0} const std::vector<double> filter_hit_ratios = {0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0}; const auto int32 = make_attr_config(BasicType::INT32, CollectionType::SINGLE, false); const auto int32_fs = make_attr_config(BasicType::INT32, CollectionType::SINGLE, true); +const auto int32_fs_rf = make_attr_config(BasicType::INT32, CollectionType::SINGLE, true, true); const auto int32_array = make_attr_config(BasicType::INT32, CollectionType::ARRAY, false); const auto int32_array_fs = make_attr_config(BasicType::INT32, CollectionType::ARRAY, true); const auto int32_wset = make_attr_config(BasicType::INT32, CollectionType::WSET, false); @@ -940,6 +942,15 @@ TEST(IteratorBenchmark, analyze_and_with_filter_vs_in) } } +TEST(IteratorBenchmark, analyze_and_with_bitvector_vs_in) +{ + for (uint32_t children: {10, 100, 1000, 10000}) { + run_and_benchmark({int32_fs, QueryOperator::In, {0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60}, children, true}, + {int32_fs_rf, QueryOperator::Term, {1.0}, 1, true}, // this setup returns a bitvector matching all documents. + num_docs); + } +} + TEST(IteratorBenchmark, analyze_and_with_filter_vs_in_array) { for (uint32_t children: {10, 100, 1000}) { @@ -958,6 +969,12 @@ TEST(IteratorBenchmark, analyze_and_with_filter_vs_or) } } +TEST(IteratorBenchmark, analyze_btree_vs_bitvector_iterators_strict) +{ + BenchmarkSetup setup(num_docs, {int32_fs, int32_fs_rf}, {QueryOperator::Term}, {true}, {0.1, 0.2, 0.4, 0.5, 0.6, 0.8, 1.0}, {1}); + run_benchmarks(setup); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); int res = RUN_ALL_TESTS(); diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h index dae0bd82cd0..356ecd4c992 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h @@ -61,6 +61,17 @@ inline double btree_strict_cost(double my_est) { return my_est; } +// Non-strict cost of matching in a bitvector. +inline double bitvector_cost() { + return 1.0; +} + +// Strict cost of matching in a bitvector. +// Test used: IteratorBenchmark::analyze_btree_vs_bitvector_iterators_strict +inline double bitvector_strict_cost(double my_est) { + return 1.5 * my_est; +} + // Non-strict cost of matching in a disk index posting list. inline double disk_index_cost() { return 1.5; |