summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@vespa.ai>2024-03-05 16:25:46 +0100
committerGitHub <noreply@github.com>2024-03-05 16:25:46 +0100
commitd6e55aade839b51243eac107cce220e4d063c898 (patch)
tree523500fb934b0a9d91962d3fe79777b6aa7ee03a /searchlib
parentc70ca1e6d3b86df2f3f2f9575d2fe863b371b1c0 (diff)
parent65895c139c33e1fc7dd86b580530916decc2e27d (diff)
Merge pull request #30493 from vespa-engine/geirst/flow-tuning-integration
Integrate flow tuning for attribute, memory and disk index search.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp35
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h7
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp8
4 files changed, 47 insertions, 13 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index a5ca37906ba..2129ac40724 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -11,6 +11,7 @@
#include "multi_term_or_filter_search.h"
#include "predicate_attribute.h"
#include <vespa/eval/eval/value.h>
+#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/common/location.h>
#include <vespa/searchlib/common/locationiterators.h>
#include <vespa/searchlib/query/query_term_decoder.h>
@@ -46,10 +47,11 @@
LOG_SETUP(".searchlib.attribute.attribute_blueprint_factory");
using search::attribute::BasicType;
-using search::attribute::SearchContextParams;
using search::attribute::CollectionType;
+using search::attribute::Config;
using search::attribute::IAttributeVector;
using search::attribute::ISearchContext;
+using search::attribute::SearchContextParams;
using search::fef::TermFieldMatchData;
using search::fef::TermFieldMatchDataArray;
using search::fef::TermFieldMatchDataPosition;
@@ -87,6 +89,10 @@ using search::queryeval::SearchIterator;
using search::queryeval::Searchable;
using search::queryeval::SimpleLeafBlueprint;
using search::queryeval::WeightedSetTermBlueprint;
+using search::queryeval::flow::btree_cost;
+using search::queryeval::flow::btree_strict_cost;
+using search::queryeval::flow::lookup_cost;
+using search::queryeval::flow::lookup_strict_cost;
using search::tensor::DenseTensorAttribute;
using search::tensor::ITensorAttribute;
using vespalib::Issue;
@@ -115,6 +121,19 @@ private:
};
//-----------------------------------------------------------------------------
+size_t
+get_num_indirections(const BasicType& basic_type, const CollectionType& col_type)
+{
+ size_t res = 0;
+ if (basic_type == BasicType::STRING) {
+ res += 1;
+ }
+ if (col_type != CollectionType::SINGLE) {
+ res += 1;
+ }
+ return res;
+}
+
/**
* Blueprint for creating regular, stack-based attribute iterators.
**/
@@ -141,11 +160,12 @@ public:
if (_hit_estimate.is_unknown()) {
// E.g. attributes without fast-search are not able to provide a hit estimate.
// In this case we just assume matching half of the document corpus.
- // In addition, we are not able to skip documents efficiently when being strict.
- return {0.5, 1.0, 1.0};
+ // In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict.
+ size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
+ return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)};
} else {
double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit);
- return {rel_est, 1.0, rel_est};
+ return {rel_est, btree_cost(), btree_strict_cost(rel_est)};
}
}
@@ -480,9 +500,12 @@ public:
double estimate(const IDirectPostingStore::LookupResult &term) const noexcept {
return abs_to_rel_est(term.posting_size, docid_limit);
}
- double cost(const IDirectPostingStore::LookupResult &) const noexcept { return 1.0; }
+ double cost(const IDirectPostingStore::LookupResult &) const noexcept {
+ return btree_cost();
+ }
double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept {
- return abs_to_rel_est(term.posting_size, docid_limit);
+ double rel_est = abs_to_rel_est(term.posting_size, docid_limit);
+ return btree_strict_cost(rel_est);
}
};
double child_est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms);
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
index 076c375091a..5aafe4af72b 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
@@ -80,9 +80,12 @@ public:
double estimate(const IDirectPostingStore::LookupResult &term) const noexcept {
return abs_to_rel_est(term.posting_size, docid_limit);
}
- double cost(const IDirectPostingStore::LookupResult &) const noexcept { return 1.0; }
+ double cost(const IDirectPostingStore::LookupResult &) const noexcept {
+ return search::queryeval::flow::btree_cost();
+ }
double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept {
- return abs_to_rel_est(term.posting_size, docid_limit);
+ double rel_est = abs_to_rel_est(term.posting_size, docid_limit);
+ return search::queryeval::flow::btree_strict_cost(rel_est);
}
};
double est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms);
diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
index 97c8dd391ba..99be653a398 100644
--- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
@@ -4,6 +4,7 @@
#include <vespa/searchlib/common/bitvectoriterator.h>
#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
#include <vespa/searchlib/queryeval/filter_wrapper.h>
+#include <vespa/searchlib/queryeval/flow_tuning.h>
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
#include <vespa/vespalib/objects/visit.h>
#include <vespa/vespalib/util/stringfmt.h>
@@ -14,12 +15,14 @@ LOG_SETUP(".diskindex.disktermblueprint");
using search::BitVectorIterator;
using search::fef::TermFieldMatchDataArray;
using search::index::Schema;
+using search::queryeval::Blueprint;
using search::queryeval::BooleanMatchIteratorWrapper;
using search::queryeval::FieldSpec;
using search::queryeval::FieldSpecBaseList;
-using search::queryeval::SearchIterator;
using search::queryeval::LeafBlueprint;
-using search::queryeval::Blueprint;
+using search::queryeval::SearchIterator;
+using search::queryeval::flow::disk_index_cost;
+using search::queryeval::flow::disk_index_strict_cost;
namespace search::diskindex {
@@ -68,7 +71,8 @@ DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo)
queryeval::FlowStats
DiskTermBlueprint::calculate_flow_stats(uint32_t docid_limit) const
{
- return default_flow_stats(docid_limit, _lookupRes->counts._numDocs, 0);
+ double rel_est = abs_to_rel_est(_lookupRes->counts._numDocs, docid_limit);
+ return {rel_est, disk_index_cost(), disk_index_strict_cost(rel_est)};
}
SearchIterator::UP
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index db3a0019d94..e2bb5e76751 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -4,9 +4,10 @@
#include "ordered_field_index_inserter.h"
#include "posting_iterator.h"
#include <vespa/searchlib/bitcompression/posocccompression.h>
-#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
#include <vespa/searchlib/queryeval/filter_wrapper.h>
+#include <vespa/searchlib/queryeval/flow_tuning.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/vespalib/btree/btree.hpp>
#include <vespa/vespalib/btree/btreeiterator.hpp>
@@ -30,6 +31,8 @@ using search::queryeval::BooleanMatchIteratorWrapper;
using search::queryeval::FieldSpecBase;
using search::queryeval::SearchIterator;
using search::queryeval::SimpleLeafBlueprint;
+using search::queryeval::flow::btree_cost;
+using search::queryeval::flow::btree_strict_cost;
using vespalib::GenerationHandler;
namespace search::memoryindex {
@@ -257,7 +260,8 @@ public:
}
queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
- return default_flow_stats(docid_limit, _posting_itr.size(), 0);
+ double rel_est = abs_to_rel_est(_posting_itr.size(), docid_limit);
+ return {rel_est, btree_cost(), btree_strict_cost(rel_est)};
}
SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray& tfmda, bool) const override {