aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/vespa')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h3
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt3
-rw-r--r--searchlib/src/vespa/searchlib/features/bm25_feature.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/features/bm25_feature.h2
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h40
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp67
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h32
-rw-r--r--searchlib/src/vespa/searchlib/features/second_phase_feature.cpp57
-rw-r--r--searchlib/src/vespa/searchlib/features/second_phase_feature.h35
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp56
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h37
-rw-r--r--searchlib/src/vespa/searchlib/fef/objectstore.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/fef/objectstore.h3
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h19
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h10
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.h8
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_simple.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp115
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h28
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/exact_nearest_neighbor_iterator.cpp (renamed from searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp)28
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/exact_nearest_neighbor_iterator.h (renamed from searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h)6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.h25
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h10
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp288
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.h16
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp43
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h14
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/matching_phase.h18
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/scores.h3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp44
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h32
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h44
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h72
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h33
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp66
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h15
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.h14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/bound_distance_function.h5
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp48
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.h14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp29
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.h21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.h8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp99
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h50
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h5
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h16
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h28
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h1
-rw-r--r--searchlib/src/vespa/searchlib/test/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/test/mock_attribute_context.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/test/mock_attribute_context.h4
-rw-r--r--searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h60
-rw-r--r--searchlib/src/vespa/searchlib/util/token_extractor.cpp2
88 files changed, 1607 insertions, 726 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 5b17b491a20..635851f9f1d 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -11,7 +11,6 @@
#include "in_term_search.h"
#include "multi_term_or_filter_search.h"
#include "predicate_attribute.h"
-#include <vespa/eval/eval/value.h>
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h>
#include <vespa/searchlib/common/location.h>
@@ -94,6 +93,7 @@ using search::queryeval::StrictHeapOrSearch;
using search::queryeval::WeightedSetTermBlueprint;
using search::queryeval::flow::btree_cost;
using search::queryeval::flow::btree_strict_cost;
+using search::queryeval::flow::estimate_when_unknown;
using search::queryeval::flow::get_num_indirections;
using search::queryeval::flow::lookup_cost;
using search::queryeval::flow::lookup_strict_cost;
@@ -150,10 +150,9 @@ public:
search::queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
if (_hit_estimate.is_unknown()) {
// E.g. attributes without fast-search are not able to provide a hit estimate.
- // In this case we just assume matching half of the document corpus.
// In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict.
size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
- return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)};
+ return {estimate_when_unknown(), lookup_cost(indirections), lookup_strict_cost(indirections)};
} else {
double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit);
return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)};
@@ -443,7 +442,8 @@ private:
class DirectWandBlueprint : public queryeval::ComplexLeafBlueprint
{
private:
- mutable queryeval::SharedWeakAndPriorityQueue _scores;
+ using WeakAndPriorityQueue = queryeval::WeakAndPriorityQueue;
+ std::unique_ptr<WeakAndPriorityQueue> _scores;
const queryeval::wand::score_t _scoreThreshold;
double _thresholdBoostFactor;
const uint32_t _scoresAdjustFrequency;
@@ -452,14 +452,16 @@ private:
const IDocidWithWeightPostingStore &_attr;
vespalib::datastore::EntryRef _dictionary_snapshot;
+
public:
DirectWandBlueprint(const FieldSpec &field, const IDocidWithWeightPostingStore &attr, uint32_t scoresToTrack,
- queryeval::wand::score_t scoreThreshold, double thresholdBoostFactor, size_t size_hint)
+ queryeval::wand::score_t scoreThreshold, double thresholdBoostFactor, size_t size_hint,
+ bool thread_safe)
: ComplexLeafBlueprint(field),
- _scores(scoresToTrack),
+ _scores(WeakAndPriorityQueue::createHeap(scoresToTrack, thread_safe)),
_scoreThreshold(scoreThreshold),
_thresholdBoostFactor(thresholdBoostFactor),
- _scoresAdjustFrequency(queryeval::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
+ _scoresAdjustFrequency(queryeval::wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
_weights(),
_terms(),
_attr(attr),
@@ -496,7 +498,7 @@ public:
using OrFlow = search::queryeval::OrFlow;
using MyAdapter = attribute::DirectPostingStoreFlowStatsAdapter;
double child_est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms);
- double my_est = abs_to_rel_est(_scores.getScoresToTrack(), docid_limit);
+ double my_est = abs_to_rel_est(_scores->getScoresToTrack(), docid_limit);
double est = (child_est + my_est) / 2.0;
return {est, OrFlow::cost_of(MyAdapter(docid_limit), _terms, false),
OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())};
@@ -508,9 +510,8 @@ public:
return std::make_unique<queryeval::EmptySearch>();
}
return queryeval::ParallelWeakAndSearch::create(*tfmda[0],
- queryeval::ParallelWeakAndSearch::MatchParams(_scores, _scoreThreshold,
- _thresholdBoostFactor, _scoresAdjustFrequency)
- .setDocIdLimit(get_docid_limit()),
+ queryeval::ParallelWeakAndSearch::MatchParams(*_scores, _scoreThreshold, _thresholdBoostFactor,
+ _scoresAdjustFrequency, get_docid_limit()),
_weights, _terms, _attr, strict());
}
std::unique_ptr<SearchIterator> createFilterSearch(FilterConstraint constraint) const override;
@@ -712,15 +713,12 @@ public:
void visit(query::WandTerm &n) override {
if (has_always_btree_iterators_with_docid_and_weight()) {
- auto *bp = new DirectWandBlueprint(_field, *_dwwps,
- n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
- n.getNumTerms());
+ auto *bp = new DirectWandBlueprint(_field, *_dwwps, n.getTargetNumHits(), n.getScoreThreshold(),
+ n.getThresholdBoostFactor(), n.getNumTerms(), is_search_multi_threaded());
createDirectMultiTerm(bp, n);
} else {
- auto *bp = new ParallelWeakAndBlueprint(_field,
- n.getTargetNumHits(),
- n.getScoreThreshold(),
- n.getThresholdBoostFactor());
+ auto *bp = new ParallelWeakAndBlueprint(_field, n.getTargetNumHits(), n.getScoreThreshold(),
+ n.getThresholdBoostFactor(), is_search_multi_threaded());
createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
index e2928710a32..ac6fc6f603a 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
@@ -16,15 +16,18 @@ struct AttributeBlueprintParams
double global_filter_upper_limit;
double target_hits_max_adjustment_factor;
vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm;
+ double weakand_range;
AttributeBlueprintParams(double global_filter_lower_limit_in,
double global_filter_upper_limit_in,
double target_hits_max_adjustment_factor_in,
- vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in)
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in,
+ double weakand_range_in)
: global_filter_lower_limit(global_filter_lower_limit_in),
global_filter_upper_limit(global_filter_upper_limit_in),
target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in),
- fuzzy_matching_algorithm(fuzzy_matching_algorithm_in)
+ fuzzy_matching_algorithm(fuzzy_matching_algorithm_in),
+ weakand_range(weakand_range_in)
{
}
@@ -32,7 +35,8 @@ struct AttributeBlueprintParams
: AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE,
fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE,
fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE,
- fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE)
+ fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE,
+ fef::indexproperties::temporary::WeakAndRange::DEFAULT_VALUE)
{
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
index 928023c0f94..e2d7f0fe312 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -132,7 +132,7 @@ AttributeWeightedSetBlueprint::calculate_flow_stats(uint32_t docid_limit) const
using MyAdapter = attribute::HitEstimateFlowStatsAdapter;
size_t num_indirections = queryeval::flow::get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
double est = OrFlow::estimate_of(MyAdapter(docid_limit, num_indirections), _estimates);
- return {est, OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, false),
+ return {est, queryeval::flow::reverse_hash_lookup(),
OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
index e20d02afe50..6762c0516b2 100644
--- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
@@ -3,6 +3,7 @@
#include "bitvector_search_cache.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/util/memoryusage.h>
#include <mutex>
namespace search::attribute {
@@ -10,6 +11,7 @@ namespace search::attribute {
BitVectorSearchCache::BitVectorSearchCache()
: _mutex(),
_size(0),
+ _entries_extra_memory_usage(0),
_cache()
{}
@@ -18,9 +20,19 @@ BitVectorSearchCache::~BitVectorSearchCache() = default;
void
BitVectorSearchCache::insert(const vespalib::string &term, std::shared_ptr<Entry> entry)
{
+ size_t entry_extra_memory_usage = 0;
+ if (entry) {
+ entry_extra_memory_usage = sizeof(Entry);
+ if (entry->bitVector) {
+ entry_extra_memory_usage += entry->bitVector->getFileBytes();
+ }
+ }
std::unique_lock guard(_mutex);
- _cache.insert(std::make_pair(term, std::move(entry)));
+ auto ins_res = _cache.insert(std::make_pair(term, std::move(entry)));
_size.store(_cache.size());
+ if (ins_res.second) {
+ _entries_extra_memory_usage += entry_extra_memory_usage;
+ }
}
std::shared_ptr<BitVectorSearchCache::Entry>
@@ -36,12 +48,25 @@ BitVectorSearchCache::find(const vespalib::string &term) const
return {};
}
+vespalib::MemoryUsage
+BitVectorSearchCache::get_memory_usage() const
+{
+ std::lock_guard guard(_mutex);
+ size_t cache_memory_consumption = _cache.getMemoryConsumption();
+ size_t cache_memory_used = _cache.getMemoryUsed();
+ size_t self_memory_used = sizeof(BitVectorSearchCache) - sizeof(_cache);
+ size_t allocated = self_memory_used + cache_memory_consumption + _entries_extra_memory_usage;
+ size_t used = self_memory_used + cache_memory_used + _entries_extra_memory_usage;
+ return vespalib::MemoryUsage(allocated, used, 0, 0);
+}
+
void
BitVectorSearchCache::clear()
{
std::unique_lock guard(_mutex);
_cache.clear();
_size.store(0ul, std::memory_order_relaxed);
+ _entries_extra_memory_usage = 0;
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
index 233f8315aaf..3a38cdcea26 100644
--- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
+++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
@@ -10,6 +10,8 @@
#include <atomic>
namespace search { class BitVector; }
+namespace vespalib { class MemoryUsage; }
+
namespace search::attribute {
/**
@@ -37,6 +39,7 @@ private:
mutable std::shared_mutex _mutex;
std::atomic<uint64_t> _size;
+ size_t _entries_extra_memory_usage;
Cache _cache;
public:
@@ -45,6 +48,7 @@ public:
void insert(const vespalib::string &term, std::shared_ptr<Entry> entry);
std::shared_ptr<Entry> find(const vespalib::string &term) const;
size_t size() const { return _size.load(std::memory_order_relaxed); }
+ vespalib::MemoryUsage get_memory_usage() const;
void clear();
};
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
index 029dc155785..f6a33165f0c 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
@@ -3,6 +3,7 @@
#include "imported_attribute_vector.h"
#include "imported_attribute_vector_read_guard.h"
#include "imported_search_context.h"
+#include <vespa/vespalib/util/memoryusage.h>
namespace search::attribute {
@@ -58,4 +59,15 @@ void ImportedAttributeVector::clearSearchCache() {
}
}
+vespalib::MemoryUsage
+ImportedAttributeVector::get_memory_usage() const
+{
+ constexpr auto self_memory_usage = sizeof(ImportedAttributeVector);
+ vespalib::MemoryUsage result(self_memory_usage, self_memory_usage, 0, 0);
+ if (_search_cache) {
+ result.merge(_search_cache->get_memory_usage());
+ }
+ return result;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
index bd018df5273..5b68957b7f5 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
@@ -6,6 +6,8 @@
#include <vespa/searchcommon/attribute/i_document_meta_store_context.h>
#include <vespa/vespalib/stllike/string.h>
+namespace vespalib { class MemoryUsage; }
+
namespace search::attribute {
class BitVectorSearchCache;
@@ -62,6 +64,7 @@ public:
std::unique_ptr<AttributeReadGuard> makeReadGuard(bool stableEnumGuard) const override;
virtual std::unique_ptr<AttributeReadGuard> makeReadGuard(std::shared_ptr<MetaStoreReadGuard> targetMetaStoreReadGuard, bool stableEnumGuard) const;
+ vespalib::MemoryUsage get_memory_usage() const;
protected:
vespalib::string _name;
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index 0690801ee61..27c2b6d5e41 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -26,6 +26,8 @@ vespa_add_library(searchlib_features OBJECT
fieldmatchfeature.cpp
fieldtermmatchfeature.cpp
firstphasefeature.cpp
+ first_phase_rank_feature.cpp
+ first_phase_rank_lookup.cpp
flow_completeness_feature.cpp
foreachfeature.cpp
freshnessfeature.cpp
@@ -57,6 +59,7 @@ vespa_add_library(searchlib_features OBJECT
rankingexpressionfeature.cpp
raw_score_feature.cpp
reverseproximityfeature.cpp
+ second_phase_feature.cpp
setup.cpp
subqueries_feature.cpp
tensor_attribute_executor.cpp
diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
index 505b8166ee7..03d2e94b5d0 100644
--- a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
@@ -68,7 +68,7 @@ Bm25Executor::Bm25Executor(const fef::FieldInfo& field,
}
double
-Bm25Executor::calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count)
+Bm25Executor::calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) noexcept
{
return std::log(1 + (static_cast<double>(total_doc_count - matching_doc_count + 0.5) /
static_cast<double>(matching_doc_count + 0.5)));
diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.h b/searchlib/src/vespa/searchlib/features/bm25_feature.h
index a1b45375285..637d656990b 100644
--- a/searchlib/src/vespa/searchlib/features/bm25_feature.h
+++ b/searchlib/src/vespa/searchlib/features/bm25_feature.h
@@ -39,7 +39,7 @@ public:
double k1_param,
double b_param);
- double static calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count);
+ double static calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) noexcept;
void handle_bind_match_data(const fef::MatchData& match_data) override;
void execute(uint32_t docId) override;
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp
new file mode 100644
index 00000000000..5c8a9a391ff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp
@@ -0,0 +1,71 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "first_phase_rank_feature.h"
+#include "valuefeature.h"
+#include <vespa/vespalib/util/stash.h>
+
+namespace search::features {
+
+FirstPhaseRankExecutor::FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup)
+ : FeatureExecutor(),
+ _lookup(lookup)
+{
+}
+FirstPhaseRankExecutor::~FirstPhaseRankExecutor() = default;
+
+void
+FirstPhaseRankExecutor::execute(uint32_t docid)
+{
+ outputs().set_number(0, _lookup.lookup(docid));
+}
+
+FirstPhaseRankBlueprint::FirstPhaseRankBlueprint()
+ : Blueprint("firstPhaseRank")
+{
+}
+
+FirstPhaseRankBlueprint::~FirstPhaseRankBlueprint() = default;
+
+void
+FirstPhaseRankBlueprint::visitDumpFeatures(const fef::IIndexEnvironment&, fef::IDumpFeatureVisitor&) const
+{
+}
+
+std::unique_ptr<fef::Blueprint>
+FirstPhaseRankBlueprint::createInstance() const
+{
+ return std::make_unique<FirstPhaseRankBlueprint>();
+}
+
+fef::ParameterDescriptions
+FirstPhaseRankBlueprint::getDescriptions() const
+{
+ return fef::ParameterDescriptions().desc();
+}
+
+bool
+FirstPhaseRankBlueprint::setup(const fef::IIndexEnvironment&, const fef::ParameterList&)
+{
+ describeOutput("score", "The first phase rank.");
+ return true;
+}
+
+void
+FirstPhaseRankBlueprint::prepareSharedState(const fef::IQueryEnvironment&, fef::IObjectStore& store) const
+{
+ FirstPhaseRankLookup::make_shared_state(store);
+}
+
+fef::FeatureExecutor&
+FirstPhaseRankBlueprint::createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const
+{
+ const auto* lookup = FirstPhaseRankLookup::get_shared_state(env.getObjectStore());
+ if (lookup != nullptr) {
+ return stash.create<FirstPhaseRankExecutor>(*lookup);
+ } else {
+ std::vector<feature_t> values{std::numeric_limits<feature_t>::max()};
+ return stash.create<ValueExecutor>(values);
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h
new file mode 100644
index 00000000000..f90ea26f859
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h
@@ -0,0 +1,40 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "first_phase_rank_lookup.h"
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search::features {
+
+class FirstPhaseRankLookup;
+
+/*
+ * Executor for first phase rank feature that outputs the first phase rank
+ * for the given docid on this search node (1.0, 2.0, 3.0, etc.).
+ */
+class FirstPhaseRankExecutor : public fef::FeatureExecutor {
+ const FirstPhaseRankLookup& _lookup;
+public:
+ FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup);
+ ~FirstPhaseRankExecutor() override;
+ void execute(uint32_t docid) override;
+};
+
+/*
+ * Blueprint for first phase rank feature.
+ */
+class FirstPhaseRankBlueprint : public fef::Blueprint {
+public:
+ FirstPhaseRankBlueprint();
+ ~FirstPhaseRankBlueprint() override;
+ void visitDumpFeatures(const fef::IIndexEnvironment& env, fef::IDumpFeatureVisitor& visitor) const override;
+ std::unique_ptr<fef::Blueprint> createInstance() const override;
+ fef::ParameterDescriptions getDescriptions() const override;
+ bool setup(const fef::IIndexEnvironment& env, const fef::ParameterList& params) override;
+ void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override;
+ fef::FeatureExecutor& createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp
new file mode 100644
index 00000000000..2dfaabb8326
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp
@@ -0,0 +1,67 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "first_phase_rank_lookup.h"
+#include <vespa/searchlib/fef/objectstore.h>
+#include <cassert>
+#include <limits>
+
+using search::fef::AnyWrapper;
+
+namespace search::features {
+
+namespace {
+
+const vespalib::string key = "firstPhaseRankLookup";
+
+}
+
+FirstPhaseRankLookup::FirstPhaseRankLookup()
+ : _map()
+{
+}
+
+FirstPhaseRankLookup::FirstPhaseRankLookup(FirstPhaseRankLookup&&) = default;
+
+FirstPhaseRankLookup::~FirstPhaseRankLookup() = default;
+
+feature_t
+FirstPhaseRankLookup::lookup(uint32_t docid) const noexcept
+{
+ auto itr = _map.find(docid);
+ if (itr != _map.end()) [[likely]] {
+ return itr->second;
+ } else {
+ return std::numeric_limits<feature_t>::max();
+ }
+}
+
+void
+FirstPhaseRankLookup::add(uint32_t docid, uint32_t rank)
+{
+ auto insres = _map.insert(std::make_pair(docid, rank));
+ assert(insres.second);
+}
+
+void
+FirstPhaseRankLookup::make_shared_state(fef::IObjectStore& store)
+{
+ if (store.get(key) == nullptr) {
+ store.add(key, std::make_unique<AnyWrapper<FirstPhaseRankLookup>>(FirstPhaseRankLookup()));
+ }
+}
+
+FirstPhaseRankLookup*
+FirstPhaseRankLookup::get_mutable_shared_state(fef::IObjectStore& store)
+{
+ auto* wrapper = dynamic_cast<AnyWrapper<FirstPhaseRankLookup>*>(store.get_mutable(key));
+ return (wrapper == nullptr) ? nullptr : &wrapper->getValue();
+}
+
+const FirstPhaseRankLookup*
+FirstPhaseRankLookup::get_shared_state(const fef::IObjectStore& store)
+{
+ const auto* wrapper = dynamic_cast<const AnyWrapper<FirstPhaseRankLookup>*>(store.get(key));
+ return (wrapper == nullptr) ? nullptr : &wrapper->getValue();
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h
new file mode 100644
index 00000000000..83d89ed2dd1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h
@@ -0,0 +1,32 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search::fef { class IObjectStore; }
+
+namespace search::features {
+
+/*
+ * This class contains a mapping from docids used by second phase to
+ * first phase rank.
+ */
+class FirstPhaseRankLookup {
+ vespalib::hash_map<uint32_t, uint32_t> _map;
+public:
+ FirstPhaseRankLookup();
+ FirstPhaseRankLookup(const FirstPhaseRankLookup&) = delete;
+ FirstPhaseRankLookup(FirstPhaseRankLookup&&);
+ ~FirstPhaseRankLookup();
+ FirstPhaseRankLookup& operator=(const FirstPhaseRankLookup&) = delete;
+ FirstPhaseRankLookup& operator=(FirstPhaseRankLookup&&) = delete;
+ feature_t lookup(uint32_t docid) const noexcept;
+ void add(uint32_t docid, uint32_t rank);
+ static void make_shared_state(fef::IObjectStore& store);
+ static FirstPhaseRankLookup* get_mutable_shared_state(fef::IObjectStore& store);
+ static const FirstPhaseRankLookup* get_shared_state(const fef::IObjectStore& store);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/second_phase_feature.cpp b/searchlib/src/vespa/searchlib/features/second_phase_feature.cpp
new file mode 100644
index 00000000000..82ce36be859
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/second_phase_feature.cpp
@@ -0,0 +1,57 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "second_phase_feature.h"
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/vespalib/util/stash.h>
+
+using namespace search::fef;
+
+namespace search::features {
+
+void
+SecondPhaseExecutor::execute(uint32_t)
+{
+ outputs().set_number(0, inputs().get_number(0));
+}
+
+
+SecondPhaseBlueprint::SecondPhaseBlueprint()
+ : Blueprint("secondPhase")
+{
+}
+
+void
+SecondPhaseBlueprint::visitDumpFeatures(const IIndexEnvironment&,
+ IDumpFeatureVisitor&) const
+{
+}
+
+Blueprint::UP
+SecondPhaseBlueprint::createInstance() const
+{
+ return std::make_unique<SecondPhaseBlueprint>();
+}
+
+bool
+SecondPhaseBlueprint::setup(const IIndexEnvironment& env,
+ const ParameterList&)
+{
+ if (auto maybe_input = defineInput(indexproperties::rank::SecondPhase::lookup(env.getProperties()),
+ AcceptInput::ANY))
+ {
+ describeOutput("score", "The ranking score for second phase.", maybe_input.value());
+ return true;
+ } else {
+ return false;
+ }
+}
+
+FeatureExecutor &
+SecondPhaseBlueprint::createExecutor(const IQueryEnvironment&, vespalib::Stash& stash) const
+{
+ return stash.create<SecondPhaseExecutor>();
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/second_phase_feature.h b/searchlib/src/vespa/searchlib/features/second_phase_feature.h
new file mode 100644
index 00000000000..61805186453
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/second_phase_feature.h
@@ -0,0 +1,35 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+
+namespace search::features {
+
+/**
+ * Implements the executor outputting the second phase ranking.
+ */
+class SecondPhaseExecutor : public fef::FeatureExecutor {
+public:
+ bool isPure() override { return true; }
+ void execute(uint32_t docId) override;
+};
+
+/**
+ * Implements the blueprint for the second phase feature.
+ */
+class SecondPhaseBlueprint : public fef::Blueprint {
+public:
+ SecondPhaseBlueprint();
+ void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
+ fef::Blueprint::UP createInstance() const override;
+
+ fef::ParameterDescriptions getDescriptions() const override {
+ return fef::ParameterDescriptions().desc();
+ }
+ bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+
+ fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index 71e083e2326..d65459817f0 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -22,6 +22,7 @@
#include "fieldmatchfeature.h"
#include "fieldtermmatchfeature.h"
#include "firstphasefeature.h"
+#include "first_phase_rank_feature.h"
#include "flow_completeness_feature.h"
#include "foreachfeature.h"
#include "freshnessfeature.h"
@@ -48,6 +49,7 @@
#include "rankingexpressionfeature.h"
#include "raw_score_feature.h"
#include "reverseproximityfeature.h"
+#include "second_phase_feature.h"
#include "subqueries_feature.h"
#include "tensor_from_labels_feature.h"
#include "tensor_from_weighted_set_feature.h"
@@ -90,6 +92,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<FieldMatchBlueprint>());
registry.addPrototype(std::make_shared<FieldTermMatchBlueprint>());
registry.addPrototype(std::make_shared<FirstPhaseBlueprint>());
+ registry.addPrototype(std::make_shared<FirstPhaseRankBlueprint>());
registry.addPrototype(std::make_shared<FlowCompletenessBlueprint>());
registry.addPrototype(std::make_shared<ForeachBlueprint>());
registry.addPrototype(std::make_shared<FreshnessBlueprint>());
@@ -109,6 +112,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<RandomNormalBlueprint>());
registry.addPrototype(std::make_shared<RandomNormalStableBlueprint>());
registry.addPrototype(std::make_shared<RawScoreBlueprint>());
+ registry.addPrototype(std::make_shared<SecondPhaseBlueprint>());
registry.addPrototype(std::make_shared<SubqueriesBlueprint>());
registry.addPrototype(std::make_shared<TensorFromLabelsBlueprint>());
registry.addPrototype(std::make_shared<TensorFromWeightedSetBlueprint>());
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 4637ad5a4e8..3ca53d3f777 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -36,14 +36,20 @@ lookupStringVector(const Properties &props, const vespalib::string &name,
return defaultValue;
}
-double
-lookupDouble(const Properties &props, const vespalib::string &name, double defaultValue)
+std::optional<double>
+lookup_opt_double(const Properties &props, vespalib::stringref name, std::optional<double> default_value)
{
Property p = props.lookup(name);
if (p.found()) {
return vespalib::locale::c::strtod(p.get().c_str(), nullptr);
}
- return defaultValue;
+ return default_value;
+}
+
+double
+lookupDouble(const Properties &props, const vespalib::string &name, double defaultValue)
+{
+ return lookup_opt_double(props, name, defaultValue).value();
}
uint32_t
@@ -179,6 +185,21 @@ namespace onsummary {
namespace temporary {
+const vespalib::string WeakAndRange::NAME("vespa.weakand.range");
+const double WeakAndRange::DEFAULT_VALUE(1.0);
+
+double
+WeakAndRange::lookup(const Properties &props)
+{
+ return lookup(props, DEFAULT_VALUE);
+}
+
+double
+WeakAndRange::lookup(const Properties &props, double defaultValue)
+{
+ return lookupDouble(props, NAME, defaultValue);
+}
+
}
namespace mutate {
@@ -668,19 +689,34 @@ EstimateLimit::lookup(const Properties &props)
return lookupUint32(props, NAME, DEFAULT_VALUE);
}
-const vespalib::string RankScoreDropLimit::NAME("vespa.hitcollector.rankscoredroplimit");
-const feature_t RankScoreDropLimit::DEFAULT_VALUE(-std::numeric_limits<feature_t>::quiet_NaN());
+const vespalib::string FirstPhaseRankScoreDropLimit::NAME("vespa.hitcollector.rankscoredroplimit");
+const std::optional<feature_t> FirstPhaseRankScoreDropLimit::DEFAULT_VALUE(std::nullopt);
-feature_t
-RankScoreDropLimit::lookup(const Properties &props)
+std::optional<feature_t>
+FirstPhaseRankScoreDropLimit::lookup(const Properties &props)
{
return lookup(props, DEFAULT_VALUE);
}
-feature_t
-RankScoreDropLimit::lookup(const Properties &props, feature_t defaultValue)
+std::optional<feature_t>
+FirstPhaseRankScoreDropLimit::lookup(const Properties &props, std::optional<feature_t> default_value)
{
- return lookupDouble(props, NAME, defaultValue);
+ return lookup_opt_double(props, NAME, default_value);
+}
+
+const vespalib::string SecondPhaseRankScoreDropLimit::NAME("vespa.hitcollector.secondphase.rankscoredroplimit");
+const std::optional<feature_t> SecondPhaseRankScoreDropLimit::DEFAULT_VALUE(std::nullopt);
+
+std::optional<feature_t>
+SecondPhaseRankScoreDropLimit::lookup(const Properties &props)
+{
+ return lookup_opt_double(props, NAME, DEFAULT_VALUE);
+}
+
+std::optional<feature_t>
+SecondPhaseRankScoreDropLimit::lookup(const Properties &props, std::optional<feature_t> default_value)
+{
+ return lookup_opt_double(props, NAME, default_value);
}
} // namspace hitcollector
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index db8de8209a9..afaf7ac1a61 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -5,6 +5,7 @@
#include <vespa/searchlib/common/feature.h>
#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
#include <vespa/vespalib/stllike/string.h>
+#include <optional>
#include <vector>
namespace search::fef { class Properties; }
@@ -178,6 +179,18 @@ namespace mutate {
// Add temporary flags used for safe rollout of new features here
namespace temporary {
+/**
+ * A number in the range [0,1] for the effective idf range for WeakAndOperator.
+ * 1.0 will give the complete range as used by default by bm25.
+ * scaled_idf = (1.0 - range) * max_idf + (range * idf)
+ * 0.0 which is default gives default legacy behavior.
+ **/
+struct WeakAndRange {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ static double lookup(const Properties &props, double defaultValue);
+};
}
namespace mutate::on_match {
@@ -550,16 +563,28 @@ namespace hitcollector {
};
/**
- * Property for the rank score drop limit used in parallel query evaluation.
- * Drop a hit if the rank score <= drop limit.
+ * Property for the first phase rank score drop limit used in parallel
+ * query evaluation.
+ * Drop a hit if the first phase rank score <= drop limit.
**/
- struct RankScoreDropLimit {
+ struct FirstPhaseRankScoreDropLimit {
static const vespalib::string NAME;
- static const feature_t DEFAULT_VALUE;
- static feature_t lookup(const Properties &props);
- static feature_t lookup(const Properties &props, feature_t defaultValue);
+ static const std::optional<feature_t> DEFAULT_VALUE;
+ static std::optional<feature_t> lookup(const Properties &props);
+ static std::optional<feature_t> lookup(const Properties &props, std::optional<feature_t> default_value);
};
+ /**
+ * Property for the second phase rank score drop limit used in
+ * parallel query evaluation. Drop a hit if the score (reranked or
+ * rescored) <= drop limit.
+ **/
+ struct SecondPhaseRankScoreDropLimit {
+ static const vespalib::string NAME;
+ static const std::optional<feature_t> DEFAULT_VALUE;
+ static std::optional<feature_t> lookup(const Properties &props);
+ static std::optional<feature_t> lookup(const Properties &props, std::optional<double> default_value);
+ };
} // namespace hitcollector
diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.cpp b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
index 3e5baf49116..a90702a88a6 100644
--- a/searchlib/src/vespa/searchlib/fef/objectstore.cpp
+++ b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
@@ -35,4 +35,11 @@ ObjectStore::get(const vespalib::string & key) const
return (found != _objectMap.end()) ? found->second : NULL;
}
+Anything *
+ObjectStore::get_mutable(const vespalib::string& key)
+{
+ auto found = _objectMap.find(key);
+ return (found != _objectMap.end()) ? found->second : nullptr;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.h b/searchlib/src/vespa/searchlib/fef/objectstore.h
index 9d1671e521c..d2d768ee338 100644
--- a/searchlib/src/vespa/searchlib/fef/objectstore.h
+++ b/searchlib/src/vespa/searchlib/fef/objectstore.h
@@ -24,6 +24,7 @@ class AnyWrapper : public Anything
public:
explicit AnyWrapper(T value) : _value(std::move(value)) { }
const T & getValue() const { return _value; }
+ T& getValue() { return _value; }
static const T & getValue(const Anything & any) { return static_cast<const AnyWrapper &>(any).getValue(); }
private:
T _value;
@@ -38,6 +39,7 @@ public:
virtual ~IObjectStore() = default;
virtual void add(const vespalib::string & key, Anything::UP value) = 0;
virtual const Anything * get(const vespalib::string & key) const = 0;
+ virtual Anything* get_mutable(const vespalib::string& key) = 0;
};
/**
@@ -50,6 +52,7 @@ public:
~ObjectStore() override;
void add(const vespalib::string & key, Anything::UP value) override;
const Anything * get(const vespalib::string & key) const override;
+ Anything* get_mutable(const vespalib::string & key) override;
private:
using ObjectMap = vespalib::hash_map<vespalib::string, Anything *>;
ObjectMap _objectMap;
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index aadc5300ede..f62dda66b76 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -50,7 +50,8 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_degradationMaxFilterCoverage(1.0),
_degradationSamplePercentage(0.2),
_degradationPostFilterMultiplier(1.0),
- _rankScoreDropLimit(0),
+ _first_phase_rank_score_drop_limit(),
+ _second_phase_rank_score_drop_limit(),
_match_features(),
_summaryFeatures(),
_dumpFeatures(),
@@ -71,6 +72,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_global_filter_lower_limit(0.0),
_global_filter_upper_limit(1.0),
_target_hits_max_adjustment_factor(20.0),
+ _weakand_range(0.0),
_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::DfaTable),
_mutateOnMatch(),
_mutateOnFirstPhase(),
@@ -119,13 +121,15 @@ RankSetup::configure()
setDiversityCutoffStrategy(matchphase::DiversityCutoffStrategy::lookup(_indexEnv.getProperties()));
setEstimatePoint(hitcollector::EstimatePoint::lookup(_indexEnv.getProperties()));
setEstimateLimit(hitcollector::EstimateLimit::lookup(_indexEnv.getProperties()));
- setRankScoreDropLimit(hitcollector::RankScoreDropLimit::lookup(_indexEnv.getProperties()));
+ set_first_phase_rank_score_drop_limit(hitcollector::FirstPhaseRankScoreDropLimit::lookup(_indexEnv.getProperties()));
+ set_second_phase_rank_score_drop_limit(hitcollector::SecondPhaseRankScoreDropLimit::lookup(_indexEnv.getProperties()));
setSoftTimeoutEnabled(softtimeout::Enabled::lookup(_indexEnv.getProperties()));
setSoftTimeoutTailCost(softtimeout::TailCost::lookup(_indexEnv.getProperties()));
set_global_filter_lower_limit(matching::GlobalFilterLowerLimit::lookup(_indexEnv.getProperties()));
set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties()));
set_target_hits_max_adjustment_factor(matching::TargetHitsMaxAdjustmentFactor::lookup(_indexEnv.getProperties()));
set_fuzzy_matching_algorithm(matching::FuzzyAlgorithm::lookup(_indexEnv.getProperties()));
+ set_weakand_range(temporary::WeakAndRange::lookup(_indexEnv.getProperties()));
_mutateOnMatch._attribute = mutate::on_match::Attribute::lookup(_indexEnv.getProperties());
_mutateOnMatch._operation = mutate::on_match::Operation::lookup(_indexEnv.getProperties());
_mutateOnFirstPhase._attribute = mutate::on_first_phase::Attribute::lookup(_indexEnv.getProperties());
@@ -193,7 +197,7 @@ RankSetup::compile()
_firstPhaseRankFeature = parser.featureName();
_first_phase_resolver->addSeed(_firstPhaseRankFeature);
} else {
- vespalib::string e = fmt("invalid feature name for initial rank: '%s'", _firstPhaseRankFeature.c_str());
+ vespalib::string e = fmt("invalid feature name for first phase rank: '%s'", _firstPhaseRankFeature.c_str());
_warnings.emplace_back(e);
_compileError = true;
}
@@ -204,7 +208,7 @@ RankSetup::compile()
_secondPhaseRankFeature = parser.featureName();
_second_phase_resolver->addSeed(_secondPhaseRankFeature);
} else {
- vespalib::string e = fmt("invalid feature name for final rank: '%s'", _secondPhaseRankFeature.c_str());
+ vespalib::string e = fmt("invalid feature name for second phase rank: '%s'", _secondPhaseRankFeature.c_str());
_warnings.emplace_back(e);
_compileError = true;
}
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index d8b977a0331..b5309b128e2 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -9,6 +9,7 @@
#include "rank_program.h"
#include <vespa/searchlib/common/stringmap.h>
#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
+#include <optional>
namespace search::fef {
@@ -59,7 +60,8 @@ private:
double _degradationMaxFilterCoverage;
double _degradationSamplePercentage;
double _degradationPostFilterMultiplier;
- feature_t _rankScoreDropLimit;
+ std::optional<feature_t> _first_phase_rank_score_drop_limit;
+ std::optional<feature_t> _second_phase_rank_score_drop_limit;
std::vector<vespalib::string> _match_features;
std::vector<vespalib::string> _summaryFeatures;
std::vector<vespalib::string> _dumpFeatures;
@@ -80,6 +82,7 @@ private:
double _global_filter_lower_limit;
double _global_filter_upper_limit;
double _target_hits_max_adjustment_factor;
+ double _weakand_range;
vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm;
MutateOperation _mutateOnMatch;
MutateOperation _mutateOnFirstPhase;
@@ -331,18 +334,22 @@ public:
uint32_t getEstimateLimit() const { return _estimateLimit; }
/**
- * Sets the rank score drop limit to be used in parallel query evaluation.
+ * Sets the first phase rank score drop limit to be used in parallel query evaluation.
*
- * @param rankScoreDropLimit the rank score drop limit
+ * @param value the first phase rank score drop limit
**/
- void setRankScoreDropLimit(feature_t rankScoreDropLimit) { _rankScoreDropLimit = rankScoreDropLimit; }
+ void set_first_phase_rank_score_drop_limit(std::optional<feature_t> value) { _first_phase_rank_score_drop_limit = value; }
/**
* Returns the rank score drop limit to be used in parallel query evaluation.
*
* @return the rank score drop limit
**/
- feature_t getRankScoreDropLimit() const { return _rankScoreDropLimit; }
+ std::optional<feature_t> get_first_phase_rank_score_drop_limit() const noexcept { return _first_phase_rank_score_drop_limit; }
+
+ void set_second_phase_rank_score_drop_limit(std::optional<feature_t> value) { _second_phase_rank_score_drop_limit = value; }
+
+ std::optional<feature_t> get_second_phase_rank_score_drop_limit() const noexcept { return _second_phase_rank_score_drop_limit; }
/**
* This method may be used to indicate that certain features
@@ -402,6 +409,8 @@ public:
double get_target_hits_max_adjustment_factor() const { return _target_hits_max_adjustment_factor; }
void set_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm v) { _fuzzy_matching_algorithm = v; }
vespalib::FuzzyMatchingAlgorithm get_fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; }
+ void set_weakand_range(double v) { _weakand_range = v; }
+ double get_weakand_range() const { return _weakand_range; }
/**
* This method may be used to indicate that certain features
diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
index 3645496e4fb..41551ac1062 100644
--- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
+++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
@@ -10,11 +10,11 @@ LOG_SETUP(".fef.matchdatabuilder");
namespace search::fef::test {
-MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) :
- _queryEnv(queryEnv),
- _data(data),
- _index(),
- _match()
+MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data)
+ : _queryEnv(queryEnv),
+ _data(data),
+ _index(),
+ _match()
{
// reset all match data objects.
for (TermFieldHandle handle = 0; handle < _data.getNumTermFields(); ++handle) {
@@ -22,7 +22,7 @@ MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data)
}
}
-MatchDataBuilder::~MatchDataBuilder() {}
+MatchDataBuilder::~MatchDataBuilder() = default;
TermFieldMatchData *
MatchDataBuilder::getTermFieldMatchData(uint32_t termId, uint32_t fieldId)
@@ -59,7 +59,7 @@ MatchDataBuilder::addElement(const vespalib::string &fieldName, int32_t weight,
LOG(error, "Field '%s' does not exist.", fieldName.c_str());
return false;
}
- _index[info->id()].elements.push_back(MyElement(weight, length));
+ _index[info->id()].elements.emplace_back(weight, length);
return true;
}
@@ -77,8 +77,7 @@ MatchDataBuilder::addOccurence(const vespalib::string &fieldName, uint32_t termI
}
const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id());
if (tfd == nullptr) {
- LOG(error, "Field '%s' is not searched by the given term.",
- fieldName.c_str());
+ LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str());
return false;
}
_match[termId][info->id()].insert(Position(pos, element));
@@ -99,14 +98,13 @@ MatchDataBuilder::setWeight(const vespalib::string &fieldName, uint32_t termId,
}
const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id());
if (tfd == nullptr) {
- LOG(error, "Field '%s' is not searched by the given term.",
- fieldName.c_str());
+ LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str());
return false;
}
uint32_t eid = _index[info->id()].elements.size();
_match[termId][info->id()].clear();
_match[termId][info->id()].insert(Position(0, eid));
- _index[info->id()].elements.push_back(MyElement(weight, 1));
+ _index[info->id()].elements.emplace_back(weight, 1);
return true;
}
@@ -142,19 +140,13 @@ MatchDataBuilder::apply(uint32_t docId)
// For each occurence of that term, in that field, do
for (const auto& occ : field_elem.second) {
// Append a term match position to the term match data.
- match->appendPosition(TermFieldMatchDataPosition(
- occ.eid,
- occ.pos,
- field.getWeight(occ.eid),
- field.getLength(occ.eid)));
- LOG(debug,
- "Added occurence of term '%u' in field '%s'"
- " at position '%u'.",
+ match->appendPosition(TermFieldMatchDataPosition(occ.eid, occ.pos,
+ field.getWeight(occ.eid),
+ field.getLength(occ.eid)));
+ LOG(debug, "Added occurence of term '%u' in field '%s' at position '%u'.",
termId, name.c_str(), occ.pos);
if (occ.pos >= field.getLength(occ.eid)) {
- LOG(warning,
- "Added occurence of term '%u' in field '%s'"
- " at position '%u' >= fieldLen '%u'.",
+ LOG(warning, "Added occurence of term '%u' in field '%s' at position '%u' >= fieldLen '%u'.",
termId, name.c_str(), occ.pos, field.getLength(occ.eid));
}
}
diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
index 0e5025efd37..753e1596520 100644
--- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
+++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
@@ -13,7 +13,7 @@ public:
struct MyElement {
int32_t weight;
uint32_t length;
- MyElement(int32_t w, uint32_t l) : weight(w), length(l) {}
+ MyElement(int32_t w, uint32_t l) noexcept : weight(w), length(l) {}
};
struct MyField {
uint32_t fieldLength;
@@ -21,7 +21,7 @@ public:
MyField() : fieldLength(0), elements() {}
MyElement &getElement(uint32_t eid) {
while (elements.size() <= eid) {
- elements.push_back(MyElement(0, 0));
+ elements.emplace_back(0, 0);
}
return elements[eid];
}
@@ -68,6 +68,8 @@ public:
* @param data The match data to build in.
*/
MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data);
+ MatchDataBuilder(const MatchDataBuilder &) = delete;
+ MatchDataBuilder & operator=(const MatchDataBuilder &) = delete;
~MatchDataBuilder();
/**
@@ -133,10 +135,6 @@ public:
bool apply(uint32_t docId);
private:
- MatchDataBuilder(const MatchDataBuilder &); // hide
- MatchDataBuilder & operator=(const MatchDataBuilder &); // hide
-
-private:
QueryEnvironment &_queryEnv;
MatchData &_data;
IndexData _index;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index 2bc94073c92..49a0f0621d2 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -213,6 +213,17 @@ FieldIndex<interleaved_features>::getMemoryUsage() const
}
template <bool interleaved_features>
+void
+FieldIndex<interleaved_features>::commit()
+{
+ _remover.flush();
+ freeze();
+ assign_generation();
+ incGeneration();
+ reclaim_memory();
+}
+
+template <bool interleaved_features>
queryeval::SearchIterator::UP
FieldIndex<interleaved_features>::make_search_iterator(const vespalib::string& term,
uint32_t field_id,
@@ -248,7 +259,7 @@ public:
: SimpleLeafBlueprint(field),
_guard(),
_field(field),
- _posting_itr(posting_itr),
+ _posting_itr(std::move(posting_itr)),
_feature_store(feature_store),
_field_id(field_id),
_query_term(query_term),
@@ -302,7 +313,7 @@ FieldIndex<interleaved_features>::make_term_blueprint(const vespalib::string& te
auto posting_itr = findFrozen(term);
bool use_bit_vector = field.isFilter();
return std::make_unique<MemoryTermBlueprint<interleaved_features>>
- (std::move(guard), posting_itr, getFeatureStore(), field, field_id, term, use_bit_vector);
+ (std::move(guard), std::move(posting_itr), getFeatureStore(), field, field_id, term, use_bit_vector);
}
template class FieldIndex<false>;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index 0b245300a7b..18e60cf2194 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -87,13 +87,7 @@ public:
vespalib::MemoryUsage getMemoryUsage() const override;
PostingListStore &getPostingListStore() { return _postingListStore; }
- void commit() override {
- _remover.flush();
- freeze();
- assign_generation();
- incGeneration();
- reclaim_memory();
- }
+ void commit() override;
/**
* Should only by used by unit tests.
diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
index 09fc443cf0e..9e46df57416 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
+++ b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
@@ -49,7 +49,7 @@ template <typename T>
struct FloatDecoder {
static T fromstr(const char * q, const char * qend, const char ** end) noexcept {
T v(0);
-#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 180000
+#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 190000
vespalib::string tmp(q, qend - q);
char* tmp_end = nullptr;
const char *tmp_cstring = tmp.c_str();
diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
index 51fe2d12637..31799b6935c 100644
--- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
@@ -14,6 +14,7 @@ vespa_add_library(searchlib_queryeval OBJECT
emptysearch.cpp
equiv_blueprint.cpp
equivsearch.cpp
+ exact_nearest_neighbor_iterator.cpp
executeinfo.cpp
fake_requestcontext.cpp
fake_result.cpp
@@ -21,6 +22,7 @@ vespa_add_library(searchlib_queryeval OBJECT
fake_searchable.cpp
field_spec.cpp
filter_wrapper.cpp
+ first_phase_rescorer.cpp
flow.cpp
full_search.cpp
get_weight_from_node.cpp
@@ -37,7 +39,6 @@ vespa_add_library(searchlib_queryeval OBJECT
multibitvectoriterator.cpp
multisearch.cpp
nearest_neighbor_blueprint.cpp
- nearest_neighbor_iterator.cpp
nearsearch.cpp
nns_index_iterator.cpp
orsearch.cpp
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 7334db4b716..f86edfc3faa 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -1,14 +1,15 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "blueprint.h"
-#include "leaf_blueprints.h"
+#include "andnotsearch.h"
+#include "andsearch.h"
#include "emptysearch.h"
-#include "full_search.h"
#include "field_spec.hpp"
-#include "andsearch.h"
-#include "orsearch.h"
-#include "andnotsearch.h"
+#include "flow_tuning.h"
+#include "full_search.h"
+#include "leaf_blueprints.h"
#include "matching_elements_search.h"
+#include "orsearch.h"
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <vespa/vespalib/objects/objectdumper.h>
@@ -121,6 +122,7 @@ Blueprint::Blueprint() noexcept
_flow_stats(0.0, 0.0, 0.0),
_sourceId(0xffffffff),
_docid_limit(0),
+ _id(0),
_strict(false),
_frozen(false)
{
@@ -140,6 +142,13 @@ Blueprint::resolve_strict(InFlow &in_flow) noexcept
_strict = in_flow.strict();
}
+uint32_t
+Blueprint::enumerate(uint32_t next_id) noexcept
+{
+ set_id(next_id++);
+ return next_id;
+}
+
void
Blueprint::each_node_post_order(const std::function<void(Blueprint&)> &f)
{
@@ -168,31 +177,6 @@ Blueprint::null_plan(InFlow in_flow, uint32_t docid_limit)
sort(in_flow);
}
-double
-Blueprint::estimate_actual_cost(InFlow in_flow) const noexcept
-{
- double res = estimate_strict_cost_diff(in_flow);
- if (in_flow.strict()) {
- res += strict_cost();
- } else {
- res += in_flow.rate() * cost();
- }
- return res;
-}
-
-double
-Blueprint::estimate_strict_cost_diff(InFlow &in_flow) const noexcept
-{
- if (in_flow.strict()) {
- REQUIRE(strict());
- } else if (strict()) {
- double rate = in_flow.rate();
- in_flow.force_strict();
- return flow::strict_cost_diff(estimate(), rate);
- }
- return 0.0;
-}
-
Blueprint::UP
Blueprint::optimize(Blueprint::UP bp) {
Blueprint *root = bp.release();
@@ -238,7 +222,7 @@ Blueprint::default_flow_stats(uint32_t docid_limit, uint32_t abs_est, size_t chi
FlowStats
Blueprint::default_flow_stats(size_t child_cnt)
{
- return {0.5, 1.0 + child_cnt, 1.0 + child_cnt};
+ return {flow::estimate_when_unknown(), 1.0 + child_cnt, 1.0 + child_cnt};
}
std::unique_ptr<MatchingElementsSearch>
@@ -278,8 +262,8 @@ create_op_filter(const Blueprint::Children &children, bool strict, Blueprint::Fi
MultiSearch::Children list;
std::unique_ptr<SearchIterator> spare;
list.reserve(children.size());
- for (size_t i = 0; i < children.size(); ++i) {
- auto filter = children[i]->createFilterSearch(constraint);
+ for (const auto & child : children) {
+ auto filter = child->createFilterSearch(constraint);
auto matches_any = filter->matches_any();
if (should_short_circuit<Op>(matches_any)) {
return filter;
@@ -435,6 +419,7 @@ Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
visitor.visitFloat("strict_cost", strict_cost());
visitor.visitInt("sourceId", _sourceId);
visitor.visitInt("docid_limit", _docid_limit);
+ visitor.visitInt("id", _id);
visitor.visitBool("strict", _strict);
}
@@ -474,11 +459,21 @@ IntermediateBlueprint::setDocIdLimit(uint32_t limit) noexcept
}
}
+uint32_t
+IntermediateBlueprint::enumerate(uint32_t next_id) noexcept
+{
+ set_id(next_id++);
+ for (Blueprint::UP &child: _children) {
+ next_id = child->enumerate(next_id);
+ }
+ return next_id;
+}
+
void
IntermediateBlueprint::each_node_post_order(const std::function<void(Blueprint&)> &f)
{
- for (Blueprint::UP &child : _children) {
- f(*child);
+ for (Blueprint::UP &child: _children) {
+ child->each_node_post_order(f);
}
f(*this);
}
@@ -623,24 +618,6 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double
return (count_termwise_nodes(unpack) > 1);
}
-double
-IntermediateBlueprint::estimate_self_cost(InFlow) const noexcept
-{
- return 0.0;
-}
-
-double
-IntermediateBlueprint::estimate_actual_cost(InFlow in_flow) const noexcept
-{
- double res = estimate_strict_cost_diff(in_flow);
- auto cost_of = [](const auto &child, InFlow child_flow)noexcept{
- return child->estimate_actual_cost(child_flow);
- };
- res += flow::actual_cost_of(flow::DefaultAdapter(), _children, my_flow(in_flow), cost_of);
- res += estimate_self_cost(in_flow);
- return res;
-}
-
void
IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass)
{
@@ -665,9 +642,9 @@ IntermediateBlueprint::sort(InFlow in_flow)
sort(_children, in_flow);
}
auto flow = my_flow(in_flow);
- for (size_t i = 0; i < _children.size(); ++i) {
- _children[i]->sort(InFlow(flow.strict(), flow.flow()));
- flow.add(_children[i]->estimate());
+ for (const auto & child : _children) {
+ child->sort(InFlow(flow.strict(), flow.flow()));
+ flow.add(child->estimate());
}
}
@@ -686,8 +663,8 @@ IntermediateBlueprint::createSearch(fef::MatchData &md) const
{
MultiSearch::Children subSearches;
subSearches.reserve(_children.size());
- for (size_t i = 0; i < _children.size(); ++i) {
- subSearches.push_back(_children[i]->createSearch(md));
+ for (const auto & child : _children) {
+ subSearches.push_back(child->createSearch(md));
}
return createIntermediateSearch(std::move(subSearches), md);
}
@@ -735,23 +712,30 @@ void
IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo)
{
auto flow = my_flow(InFlow(strict(), execInfo.hit_rate()));
- for (size_t i = 0; i < _children.size(); ++i) {
+ for (const auto & child : _children) {
double nextHitRate = flow.flow();
- Blueprint & child = *_children[i];
- child.fetchPostings(ExecuteInfo::create(nextHitRate, execInfo));
- flow.add(child.estimate());
+ child->fetchPostings(ExecuteInfo::create(nextHitRate, execInfo));
+ flow.add(child->estimate());
}
}
void
IntermediateBlueprint::freeze()
{
- for (Blueprint::UP &child: _children) {
+ for (auto &child: _children) {
child->freeze();
}
freeze_self();
}
+void
+IntermediateBlueprint::set_matching_phase(MatchingPhase matching_phase) noexcept
+{
+ for (auto &child : _children) {
+ child->set_matching_phase(matching_phase);
+ }
+}
+
namespace {
bool
@@ -826,6 +810,11 @@ LeafBlueprint::freeze()
freeze_self();
}
+void
+LeafBlueprint::set_matching_phase(MatchingPhase) noexcept
+{
+}
+
SearchIterator::UP
LeafBlueprint::createSearch(fef::MatchData &md) const
{
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index a493c725407..5e156853ffb 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -7,6 +7,7 @@
#include "unpackinfo.h"
#include "executeinfo.h"
#include "global_filter.h"
+#include "matching_phase.h"
#include "multisearch.h"
#include <vespa/searchlib/common/bitvector.h>
@@ -240,6 +241,7 @@ private:
FlowStats _flow_stats;
uint32_t _sourceId;
uint32_t _docid_limit;
+ uint32_t _id;
bool _strict;
bool _frozen;
@@ -288,6 +290,10 @@ public:
virtual void setDocIdLimit(uint32_t limit) noexcept { _docid_limit = limit; }
uint32_t get_docid_limit() const noexcept { return _docid_limit; }
+ void set_id(uint32_t value) noexcept { _id = value; }
+ uint32_t id() const noexcept { return _id; }
+ virtual uint32_t enumerate(uint32_t next_id) noexcept;
+
bool strict() const noexcept { return _strict; }
virtual void each_node_post_order(const std::function<void(Blueprint&)> &f);
@@ -313,20 +319,6 @@ public:
// optimal ordering. Used for testing.
void null_plan(InFlow in_flow, uint32_t docid_limit);
- // Estimate the actual cost of evaluating the (sub-)query
- // represented by this blueprint with the given in-flow. This
- // function should be called after query planning has been
- // performed. This function could be useful to predict very
- // expensive queries, but the initial use-case is to understand
- // query cost better in micro-benchmarks to improve low-level cost
- // tuning.
- virtual double estimate_actual_cost(InFlow in_flow) const noexcept;
- // Estimate the change in cost caused by having a strict iterator
- // with a non-strict in-flow. Note that this function might force
- // the in_flow to be strict in order to align it with the
- // strictness of this blueprint.
- double estimate_strict_cost_diff(InFlow &in_flow) const noexcept;
-
static Blueprint::UP optimize(Blueprint::UP bp);
virtual void sort(InFlow in_flow) = 0;
static Blueprint::UP optimize_and_sort(Blueprint::UP bp, InFlow in_flow, const Options &opts) {
@@ -396,6 +388,8 @@ public:
virtual void freeze() = 0;
bool frozen() const { return _frozen; }
+ virtual void set_matching_phase(MatchingPhase matching_phase) noexcept = 0;
+
virtual SearchIteratorUP createSearch(fef::MatchData &md) const = 0;
virtual SearchIteratorUP createFilterSearch(FilterConstraint constraint) const = 0;
static SearchIteratorUP create_and_filter(const Children &children, bool strict, FilterConstraint constraint);
@@ -494,11 +488,9 @@ public:
~IntermediateBlueprint() override;
void setDocIdLimit(uint32_t limit) noexcept final;
+ uint32_t enumerate(uint32_t next_id) noexcept override;
void each_node_post_order(const std::function<void(Blueprint&)> &f) override;
- // additional cost not attributed to the children flow (heap merge/unpack/etc)
- virtual double estimate_self_cost(InFlow in_flow) const noexcept;
- double estimate_actual_cost(InFlow in_flow) const noexcept override;
void optimize(Blueprint* &self, OptimizePass pass) final;
void sort(InFlow in_flow) override;
void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override;
@@ -523,6 +515,7 @@ public:
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
void fetchPostings(const ExecuteInfo &execInfo) override;
void freeze() final;
+ void set_matching_phase(MatchingPhase matching_phase) noexcept override;
UnpackInfo calculateUnpackInfo(const fef::MatchData & md) const;
IntermediateBlueprint * asIntermediate() noexcept final { return this; }
@@ -569,6 +562,7 @@ public:
const State &getState() const final { return _state; }
void fetchPostings(const ExecuteInfo &execInfo) override;
void freeze() final;
+ void set_matching_phase(MatchingPhase matching_phase) noexcept override;
SearchIteratorUP createSearch(fef::MatchData &md) const override;
const LeafBlueprint * asLeaf() const noexcept final { return this; }
diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp
index 27ff0d235a3..c4aea7deae8 100644
--- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp
@@ -22,6 +22,11 @@ CreateBlueprintVisitorHelper::CreateBlueprintVisitorHelper(Searchable &searchabl
CreateBlueprintVisitorHelper::~CreateBlueprintVisitorHelper() = default;
+bool
+CreateBlueprintVisitorHelper::is_search_multi_threaded() const noexcept {
+ return getRequestContext().thread_bundle().size() > 1;
+}
+
attribute::SearchContextParams
CreateBlueprintVisitorHelper::createContextParams() const {
return attribute::SearchContextParams().metaStoreReadGuard(_requestContext.getMetaStoreReadGuard());
@@ -104,7 +109,8 @@ void
CreateBlueprintVisitorHelper::visitWandTerm(query::WandTerm &n)
{
createWeightedSet(std::make_unique<ParallelWeakAndBlueprint>(_field, n.getTargetNumHits(),
- n.getScoreThreshold(), n.getThresholdBoostFactor()),
+ n.getScoreThreshold(), n.getThresholdBoostFactor(),
+ is_search_multi_threaded()),
n);
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
index 98f62fa3249..ec163260dc3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
+++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
@@ -29,6 +29,7 @@ protected:
const IRequestContext & getRequestContext() const { return _requestContext; }
attribute::SearchContextParams createContextParams() const;
attribute::SearchContextParams createContextParams(bool isFilter) const;
+ bool is_search_multi_threaded() const noexcept;
public:
CreateBlueprintVisitorHelper(Searchable &searchable, const FieldSpec &field, const IRequestContext & requestContext);
~CreateBlueprintVisitorHelper() override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/exact_nearest_neighbor_iterator.cpp
index c76fe3363e4..791c99d062b 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/exact_nearest_neighbor_iterator.cpp
@@ -1,6 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "nearest_neighbor_iterator.h"
+#include "exact_nearest_neighbor_iterator.h"
#include "global_filter.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/tensor/distance_calculator.h>
@@ -20,16 +20,16 @@ namespace search::queryeval {
* Currently always does brute-force scanning, which is very expensive.
**/
template <bool strict, bool has_filter, bool has_single_subspace>
-class NearestNeighborImpl final : public NearestNeighborIterator
+class ExactNearestNeighborImpl final : public ExactNearestNeighborIterator
{
public:
- explicit NearestNeighborImpl(Params params_in)
- : NearestNeighborIterator(std::move(params_in)),
+ explicit ExactNearestNeighborImpl(Params params_in)
+ : ExactNearestNeighborIterator(std::move(params_in)),
_lastScore(0.0)
{
}
- ~NearestNeighborImpl() override;
+ ~ExactNearestNeighborImpl() override;
void doSeek(uint32_t docId) override {
double distanceLimit = params().distanceHeap.distanceLimit();
@@ -68,26 +68,26 @@ private:
};
template <bool strict, bool has_filter, bool has_single_subspace>
-NearestNeighborImpl<strict, has_filter, has_single_subspace>::~NearestNeighborImpl() = default;
+ExactNearestNeighborImpl<strict, has_filter, has_single_subspace>::~ExactNearestNeighborImpl() = default;
namespace {
template <bool strict, bool has_filter>
-std::unique_ptr<NearestNeighborIterator>
-resolve_single_subspace(NearestNeighborIterator::Params params)
+std::unique_ptr<ExactNearestNeighborIterator>
+resolve_single_subspace(ExactNearestNeighborIterator::Params params)
{
if (params.distance_calc->has_single_subspace()) {
- using NNI = NearestNeighborImpl<strict, has_filter, true>;
+ using NNI = ExactNearestNeighborImpl<strict, has_filter, true>;
return std::make_unique<NNI>(std::move(params));
} else {
- using NNI = NearestNeighborImpl<strict, has_filter, false>;
+ using NNI = ExactNearestNeighborImpl<strict, has_filter, false>;
return std::make_unique<NNI>(std::move(params));
}
}
template <bool has_filter>
-std::unique_ptr<NearestNeighborIterator>
-resolve_strict(bool strict, NearestNeighborIterator::Params params)
+std::unique_ptr<ExactNearestNeighborIterator>
+resolve_strict(bool strict, ExactNearestNeighborIterator::Params params)
{
if (strict) {
return resolve_single_subspace<true, has_filter>(std::move(params));
@@ -98,8 +98,8 @@ resolve_strict(bool strict, NearestNeighborIterator::Params params)
} // namespace <unnamed>
-std::unique_ptr<NearestNeighborIterator>
-NearestNeighborIterator::create(bool strict, fef::TermFieldMatchData &tfmd,
+std::unique_ptr<ExactNearestNeighborIterator>
+ExactNearestNeighborIterator::create(bool strict, fef::TermFieldMatchData &tfmd,
std::unique_ptr<search::tensor::DistanceCalculator> distance_calc,
NearestNeighborDistanceHeap &distanceHeap, const GlobalFilter &filter)
{
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h b/searchlib/src/vespa/searchlib/queryeval/exact_nearest_neighbor_iterator.h
index 177c732a44d..73a0232a870 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/exact_nearest_neighbor_iterator.h
@@ -16,7 +16,7 @@ namespace search::queryeval {
class GlobalFilter;
-class NearestNeighborIterator : public SearchIterator
+class ExactNearestNeighborIterator : public SearchIterator
{
public:
using ITensorAttribute = search::tensor::ITensorAttribute;
@@ -39,11 +39,11 @@ public:
{}
};
- explicit NearestNeighborIterator(Params params_in)
+ explicit ExactNearestNeighborIterator(Params params_in)
: _params(std::move(params_in))
{}
- static std::unique_ptr<NearestNeighborIterator> create(
+ static std::unique_ptr<ExactNearestNeighborIterator> create(
bool strict,
fef::TermFieldMatchData &tfmd,
std::unique_ptr<search::tensor::DistanceCalculator> distance_calc,
diff --git a/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.cpp b/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.cpp
new file mode 100644
index 00000000000..a7b1e3a7c92
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.cpp
@@ -0,0 +1,38 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "first_phase_rescorer.h"
+
+namespace search::queryeval {
+
+FirstPhaseRescorer::FirstPhaseRescorer(const std::pair<Scores,Scores>& ranges)
+ : _scale(1.0),
+ _adjust(0.0)
+{
+ if (need_rescore(ranges)) {
+ auto& first_phase_scores = ranges.first;
+ auto& second_phase_scores = ranges.second;
+ // scale and adjust the first phase score according to the
+ // first phase and second phase heap score values to avoid that
+ // a score from the first phase is larger than second_phase_scores.low
+ double first_phase_range = first_phase_scores.high - first_phase_scores.low;
+ if (first_phase_range < 1.0) {
+ first_phase_range = 1.0;
+ }
+ double second_phase_range = second_phase_scores.high - second_phase_scores.low;
+ if (second_phase_range < 1.0) {
+ second_phase_range = 1.0;
+ }
+ _scale = second_phase_range / first_phase_range;
+ _adjust = first_phase_scores.low * _scale - second_phase_scores.low;
+ }
+}
+
+bool
+FirstPhaseRescorer::need_rescore(const std::pair<Scores,Scores>& ranges)
+{
+ auto& first_phase_scores = ranges.first;
+ auto& second_phase_scores = ranges.second;
+ return (first_phase_scores.low > second_phase_scores.low);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.h b/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.h
new file mode 100644
index 00000000000..301e2aa78d0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/first_phase_rescorer.h
@@ -0,0 +1,25 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "scores.h"
+#include <cstdint>
+
+namespace search::queryeval {
+
+/*
+ * Rescore hits not selected for second phase to prevent them from getting
+ * a better score than hits selected for second phase ranking.
+ */
+class FirstPhaseRescorer {
+ double _scale;
+ double _adjust;
+public:
+ FirstPhaseRescorer(const std::pair<Scores,Scores>& ranges);
+ static bool need_rescore(const std::pair<Scores,Scores>& ranges);
+ double rescore(uint32_t, double score) const noexcept {
+ return ((score * _scale) - _adjust);
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index be7b9031c00..b7841dc2017 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -204,16 +204,6 @@ double ordered_cost_of(ADAPTER adapter, const T &children, F flow, bool allow_fo
return total_cost;
}
-static double actual_cost_of(auto adapter, const auto &children, auto flow, auto cost_of) noexcept {
- double total_cost = 0.0;
- for (const auto &child: children) {
- double child_cost = cost_of(child, InFlow(flow.strict(), flow.flow()));
- flow.update_cost(total_cost, child_cost);
- flow.add(adapter.estimate(child));
- }
- return total_cost;
-}
-
auto select_strict_and_child(auto adapter, const auto &children, size_t first, double est, bool native_strict) {
double cost = 0.0;
size_t best_idx = first;
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index cf1d1a8c09f..5ed61ef9fc8 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -60,6 +60,12 @@ inline size_t get_num_indirections(const attribute::BasicType& basic_type,
return res;
}
+// Some blueprints are not able to provide a hit estimate (e.g. attributes without fast-search).
+// In such cases the following estimate is used instead. In most cases this is an overestimate.
+inline double estimate_when_unknown() {
+ return 0.1;
+}
+
// Non-strict cost of lookup based matching in an attribute (not fast-search).
// Test used: IteratorBenchmark::analyze_term_search_in_attributes_non_strict
inline double lookup_cost(size_t num_indirections) {
@@ -90,7 +96,7 @@ inline double lookup_strict_cost(size_t num_indirections) {
* as the latency (time) penalty is higher if choosing wrong.
*/
inline double non_strict_cost_of_strict_iterator(double estimate, double strict_cost) {
- return strict_cost + strict_cost_diff(estimate, 1.0);
+ return 2.0 * (strict_cost + strict_cost_diff(estimate, 0.5));
}
// Strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field).
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
index bf7f44f0e7a..01587ef485a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "hitcollector.h"
+#include "first_phase_rescorer.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/sort.h>
#include <cassert>
@@ -43,9 +44,7 @@ HitCollector::HitCollector(uint32_t numDocs, uint32_t maxHitsSize)
_unordered(false),
_docIdVector(),
_bitVector(),
- _reRankedHits(),
- _scale(1.0),
- _adjust(0)
+ _reRankedHits()
{
if (_maxHitsSize > 0) {
_collector = std::make_unique<RankedHitCollector>(*this);
@@ -71,7 +70,7 @@ HitCollector::RankedHitCollector::collect(uint32_t docId, feature_t score)
}
hc._hits.emplace_back(docId, score);
} else {
- collectAndChangeCollector(docId, score);
+ collectAndChangeCollector(docId, score); // note - self-destruct.
}
}
@@ -101,11 +100,10 @@ HitCollector::RankedHitCollector::collectAndChangeCollector(uint32_t docId, feat
if (hc._maxDocIdVectorSize > hc._maxHitsSize) {
// start using docid vector
hc._docIdVector.reserve(hc._maxDocIdVectorSize);
- uint32_t iSize = hc._hits.size();
- for (uint32_t i = 0; i < iSize; ++i) {
- hc._docIdVector.push_back(hc._hits[i].first);
+ for (const auto& hit : hc._hits) {
+ hc._docIdVector.push_back(hit.first);
}
- if ((iSize > 0) && (docId < hc._docIdVector.back())) {
+ if (!hc._docIdVector.empty() && (docId < hc._docIdVector.back())) {
hc._unordered = true;
}
hc._docIdVector.push_back(docId);
@@ -114,9 +112,8 @@ HitCollector::RankedHitCollector::collectAndChangeCollector(uint32_t docId, feat
// start using bit vector
hc._bitVector = BitVector::create(hc._numDocs);
hc._bitVector->invalidateCachedCount();
- uint32_t iSize = hc._hits.size();
- for (uint32_t i = 0; i < iSize; ++i) {
- hc._bitVector->setBit(hc._hits[i].first);
+ for (const auto& hit : _hc._hits) {
+ hc._bitVector->setBit(hit.first);
}
hc._bitVector->setBit(docId);
newCollector = std::make_unique<BitVectorCollector<true>>(hc);
@@ -125,7 +122,7 @@ HitCollector::RankedHitCollector::collectAndChangeCollector(uint32_t docId, feat
std::make_heap(hc._hits.begin(), hc._hits.end(), ScoreComparator());
hc._hitsSortOrder = SortOrder::HEAP;
this->considerForHitVector(docId, score);
- hc._collector = std::move(newCollector);
+ hc._collector = std::move(newCollector); // note - self-destruct.
}
template<bool CollectRankedHit>
@@ -145,7 +142,7 @@ HitCollector::DocIdCollector<CollectRankedHit>::collect(uint32_t docId, feature_
}
hc._docIdVector.push_back(docId);
} else {
- collectAndChangeCollector(docId);
+ collectAndChangeCollector(docId); // note - self-destruct.
}
}
@@ -157,9 +154,8 @@ HitCollector::DocIdCollector<CollectRankedHit>::collectAndChangeCollector(uint32
// start using bit vector instead of docid array.
hc._bitVector = BitVector::create(hc._numDocs);
hc._bitVector->invalidateCachedCount();
- uint32_t iSize = static_cast<uint32_t>(hc._docIdVector.size());
- for (uint32_t i = 0; i < iSize; ++i) {
- hc._bitVector->setBit(hc._docIdVector[i]);
+ for (auto docid : hc._docIdVector) {
+ hc._bitVector->setBit(docid);
}
std::vector<uint32_t> emptyVector;
emptyVector.swap(hc._docIdVector);
@@ -191,91 +187,231 @@ HitCollector::setRanges(const std::pair<Scores, Scores> &ranges)
namespace {
+struct NoRescorer
+{
+ static double rescore(uint32_t, double score) noexcept { return score; }
+};
+
+template <typename Rescorer>
+class RerankRescorer {
+ Rescorer _rescorer;
+ using HitVector = std::vector<HitCollector::Hit>;
+ using Iterator = typename HitVector::const_iterator;
+ Iterator _reranked_cur;
+ Iterator _reranked_end;
+public:
+ RerankRescorer(const Rescorer& rescorer,
+ const HitVector& reranked_hits)
+ : _rescorer(rescorer),
+ _reranked_cur(reranked_hits.begin()),
+ _reranked_end(reranked_hits.end())
+ {
+ }
+
+ double rescore(uint32_t docid, double score) noexcept {
+ if (_reranked_cur != _reranked_end && _reranked_cur->first == docid) {
+ double result = _reranked_cur->second;
+ ++_reranked_cur;
+ return result;
+ } else {
+ return _rescorer.rescore(docid, score);
+ }
+ }
+};
+
+class SimpleHitAdder {
+protected:
+ ResultSet& _rs;
+public:
+ SimpleHitAdder(ResultSet& rs)
+ : _rs(rs)
+ {
+ }
+ void add(uint32_t docid, double rank_value) {
+ _rs.push_back({docid, rank_value});
+ }
+};
+
+class ConditionalHitAdder : public SimpleHitAdder {
+protected:
+ double _second_phase_rank_drop_limit;
+public:
+ ConditionalHitAdder(ResultSet& rs, double second_phase_rank_drop_limit)
+ : SimpleHitAdder(rs),
+ _second_phase_rank_drop_limit(second_phase_rank_drop_limit)
+ {
+ }
+ void add(uint32_t docid, double rank_value) {
+ if (rank_value > _second_phase_rank_drop_limit) {
+ _rs.push_back({docid, rank_value});
+ }
+ }
+};
+
+class TrackingConditionalHitAdder : public ConditionalHitAdder {
+ std::vector<uint32_t>& _dropped;
+public:
+ TrackingConditionalHitAdder(ResultSet& rs, double second_phase_rank_drop_limit, std::vector<uint32_t>& dropped)
+ : ConditionalHitAdder(rs, second_phase_rank_drop_limit),
+ _dropped(dropped)
+ {
+ }
+ void add(uint32_t docid, double rank_value) {
+ if (rank_value > _second_phase_rank_drop_limit) {
+ _rs.push_back({docid, rank_value});
+ } else {
+ _dropped.emplace_back(docid);
+ }
+ }
+};
+
+template <typename HitAdder, typename Rescorer>
void
-mergeHitsIntoResultSet(const std::vector<HitCollector::Hit> &hits, ResultSet &result)
+add_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, Rescorer rescorer)
{
- uint32_t rhCur(0);
- uint32_t rhEnd(result.getArrayUsed());
- for (const auto &hit : hits) {
- while (rhCur != rhEnd && result[rhCur].getDocId() != hit.first) {
- // just set the iterators right
- ++rhCur;
+ for (auto& hit : hits) {
+ hit_adder.add(hit.first, rescorer.rescore(hit.first, hit.second));
+ }
+}
+
+template <typename HitAdder, typename Rescorer>
+void
+add_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer)
+{
+ if (reranked_hits.empty()) {
+ add_rescored_hits(hit_adder, hits, rescorer);
+ } else {
+ add_rescored_hits(hit_adder, hits, RerankRescorer(rescorer, reranked_hits));
+ }
+}
+
+template <typename Rescorer>
+void
+add_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped, Rescorer rescorer)
+{
+ if (second_phase_rank_drop_limit.has_value()) {
+ if (dropped != nullptr) {
+ add_rescored_hits(TrackingConditionalHitAdder(rs, second_phase_rank_drop_limit.value(), *dropped), hits, reranked_hits, rescorer);
+ } else {
+ add_rescored_hits(ConditionalHitAdder(rs, second_phase_rank_drop_limit.value()), hits, reranked_hits, rescorer);
}
- assert(rhCur != rhEnd); // the hits should be a subset of the hits in ranked hit array.
- result[rhCur]._rankValue = hit.second;
+ } else {
+ add_rescored_hits(SimpleHitAdder(rs), hits, reranked_hits, rescorer);
+ }
+}
+
+template <typename HitAdder, typename Rescorer>
+void
+mixin_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, Rescorer rescorer)
+{
+ auto hits_cur = hits.begin();
+ auto hits_end = hits.end();
+ for (auto docid : docids) {
+ if (hits_cur != hits_end && docid == hits_cur->first) {
+ hit_adder.add(docid, rescorer.rescore(docid, hits_cur->second));
+ ++hits_cur;
+ } else {
+ hit_adder.add(docid, default_value);
+ }
+ }
+}
+
+template <typename HitAdder, typename Rescorer>
+void
+mixin_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer)
+{
+ if (reranked_hits.empty()) {
+ mixin_rescored_hits(hit_adder, hits, docids, default_value, rescorer);
+ } else {
+ mixin_rescored_hits(hit_adder, hits, docids, default_value, RerankRescorer(rescorer, reranked_hits));
+ }
+}
+
+template <typename Rescorer>
+void
+mixin_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped, Rescorer rescorer)
+{
+ if (second_phase_rank_drop_limit.has_value()) {
+ if (dropped != nullptr) {
+ mixin_rescored_hits(TrackingConditionalHitAdder(rs, second_phase_rank_drop_limit.value(), *dropped), hits, docids, default_value, reranked_hits, rescorer);
+ } else {
+ mixin_rescored_hits(ConditionalHitAdder(rs, second_phase_rank_drop_limit.value()), hits, docids, default_value, reranked_hits, rescorer);
+ }
+ } else {
+ mixin_rescored_hits(SimpleHitAdder(rs), hits, docids, default_value, reranked_hits, rescorer);
+ }
+}
+
+void
+add_bitvector_to_dropped(std::vector<uint32_t>& dropped, vespalib::ConstArrayRef<RankedHit> hits, const BitVector& bv)
+{
+ auto hits_cur = hits.begin();
+ auto hits_end = hits.end();
+ auto docid = bv.getFirstTrueBit();
+ auto docid_limit = bv.size();
+ while (docid < docid_limit) {
+ if (hits_cur != hits_end && hits_cur->getDocId() == docid) {
+ ++hits_cur;
+ } else {
+ dropped.emplace_back(docid);
+ }
+ docid = bv.getNextTrueBit(docid + 1);
}
}
}
std::unique_ptr<ResultSet>
-HitCollector::getResultSet(HitRank default_value)
+HitCollector::get_result_set(std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped)
{
- bool needReScore = false;
- Scores &initHeapScores = _ranges.first;
- Scores &finalHeapScores = _ranges.second;
- if (initHeapScores.low > finalHeapScores.low) {
- // scale and adjust the score according to the range
- // of the initial and final heap score values to avoid that
- // a score from the first phase is larger than finalHeapScores.low
- feature_t initRange = initHeapScores.high - initHeapScores.low;
- if (initRange < 1.0) initRange = 1.0f;
- feature_t finalRange = finalHeapScores.high - finalHeapScores.low;
- if (finalRange < 1.0) finalRange = 1.0f;
- _scale = finalRange / initRange;
- _adjust = initHeapScores.low * _scale - finalHeapScores.low;
- needReScore = true;
+ /*
+ * Use default_rank_value (i.e. -HUGE_VAL) when hit collector saves
+ * rank scores, otherwise use zero_rank_value (i.e. 0.0).
+ */
+ auto default_value = save_rank_scores() ? search::default_rank_value : search::zero_rank_value;
+
+ bool needReScore = FirstPhaseRescorer::need_rescore(_ranges);
+ FirstPhaseRescorer rescorer(_ranges);
+
+ if (dropped != nullptr) {
+ dropped->clear();
}
// destroys the heap property or score sort order
sortHitsByDocId();
auto rs = std::make_unique<ResultSet>();
- if ( ! _collector->isDocIdCollector() ) {
- unsigned int iSize = _hits.size();
- rs->allocArray(iSize);
+ if ( ! _collector->isDocIdCollector() ||
+ (second_phase_rank_drop_limit.has_value() &&
+ (_bitVector || dropped == nullptr))) {
+ rs->allocArray(_hits.size());
+ auto* dropped_or_null = dropped;
+ if (second_phase_rank_drop_limit.has_value() && _bitVector) {
+ dropped_or_null = nullptr;
+ }
if (needReScore) {
- for (uint32_t i = 0; i < iSize; ++i) {
- rs->push_back(RankedHit(_hits[i].first, getReScore(_hits[i].second)));
- }
+ add_rescored_hits(*rs, _hits, _reRankedHits, second_phase_rank_drop_limit, dropped_or_null, rescorer);
} else {
- for (uint32_t i = 0; i < iSize; ++i) {
- rs->push_back(RankedHit(_hits[i].first, _hits[i].second));
- }
+ add_rescored_hits(*rs, _hits, _reRankedHits, second_phase_rank_drop_limit, dropped_or_null, NoRescorer());
}
} else {
if (_unordered) {
std::sort(_docIdVector.begin(), _docIdVector.end());
}
- unsigned int iSize = _hits.size();
- unsigned int jSize = _docIdVector.size();
- rs->allocArray(jSize);
- uint32_t i = 0;
+ rs->allocArray(_docIdVector.size());
if (needReScore) {
- for (uint32_t j = 0; j < jSize; ++j) {
- uint32_t docId = _docIdVector[j];
- if (i < iSize && docId == _hits[i].first) {
- rs->push_back(RankedHit(docId, getReScore(_hits[i].second)));
- ++i;
- } else {
- rs->push_back(RankedHit(docId, default_value));
- }
- }
+ mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, second_phase_rank_drop_limit, dropped, rescorer);
} else {
- for (uint32_t j = 0; j < jSize; ++j) {
- uint32_t docId = _docIdVector[j];
- if (i < iSize && docId == _hits[i].first) {
- rs->push_back(RankedHit(docId, _hits[i].second));
- ++i;
- } else {
- rs->push_back(RankedHit(docId, default_value));
- }
- }
+ mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, second_phase_rank_drop_limit, dropped, NoRescorer());
}
}
- if (!_reRankedHits.empty()) {
- mergeHitsIntoResultSet(_reRankedHits, *rs);
+ if (second_phase_rank_drop_limit.has_value() && _bitVector) {
+ if (dropped != nullptr) {
+ assert(dropped->empty());
+ add_bitvector_to_dropped(*dropped, {rs->getArray(), rs->getArrayUsed()}, *_bitVector);
+ }
+ _bitVector.reset();
}
if (_bitVector) {
@@ -285,4 +421,10 @@ HitCollector::getResultSet(HitRank default_value)
return rs;
}
+std::unique_ptr<ResultSet>
+HitCollector::getResultSet()
+{
+ return get_result_set(std::nullopt, nullptr);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
index 94ffe619bab..c23fb0a6ef6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
@@ -8,6 +8,7 @@
#include <vespa/searchlib/common/resultset.h>
#include <vespa/vespalib/util/sort.h>
#include <algorithm>
+#include <optional>
#include <vector>
namespace search::queryeval {
@@ -35,8 +36,6 @@ private:
std::vector<Hit> _reRankedHits;
std::pair<Scores, Scores> _ranges;
- feature_t _scale;
- feature_t _adjust;
struct ScoreComparator {
bool operator() (const Hit & lhs, const Hit & rhs) const noexcept {
@@ -120,12 +119,11 @@ private:
void collect(uint32_t docId, feature_t score) override;
};
- HitRank getReScore(feature_t score) const {
- return ((score * _scale) - _adjust);
- }
VESPA_DLL_LOCAL void sortHitsByScore(size_t topn);
VESPA_DLL_LOCAL void sortHitsByDocId();
+ bool save_rank_scores() const noexcept { return _maxHitsSize != 0; }
+
public:
HitCollector(const HitCollector &) = delete;
HitCollector &operator=(const HitCollector &) = delete;
@@ -169,15 +167,17 @@ public:
const std::pair<Scores, Scores> &getRanges() const { return _ranges; }
void setRanges(const std::pair<Scores, Scores> &ranges);
+ std::unique_ptr<ResultSet>
+ get_result_set(std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped);
+
/**
* Returns a result set based on the content of this collector.
* Invoking this method will destroy the heap property of the
* ranked hits and the match data heap.
*
- * @param auto pointer to the result set
- * @param default_value rank value to be used for results without rank value
+ * @return unique pointer to the result set
**/
- std::unique_ptr<ResultSet> getResultSet(HitRank default_value = default_rank_value);
+ std::unique_ptr<ResultSet> getResultSet();
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 33b249572f0..93cb8d68c33 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -26,7 +26,7 @@ size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources,
return i;
}
}
- sources.push_back(std::unique_ptr<CombineType>(new CombineType()));
+ sources.push_back(std::make_unique<CombineType>());
sources.back()->setSourceId(child_source);
sources.back()->setDocIdLimit(docid_limit);
return (sources.size() - 1);
@@ -318,11 +318,6 @@ OrBlueprint::calculate_flow_stats(uint32_t) const {
OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())};
}
-double
-OrBlueprint::estimate_self_cost(InFlow in_flow) const noexcept {
- return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0;
-}
-
Blueprint::HitEstimate
OrBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -424,6 +419,13 @@ WeakAndBlueprint::my_flow(InFlow in_flow) const
return AnyFlow::create<OrFlow>(in_flow);
}
+WeakAndBlueprint::WeakAndBlueprint(uint32_t n, float idf_range, bool thread_safe)
+ : _scores(WeakAndPriorityQueue::createHeap(n, thread_safe)),
+ _n(n),
+ _idf_range(idf_range),
+ _weights()
+{}
+
WeakAndBlueprint::~WeakAndBlueprint() = default;
FlowStats
@@ -436,11 +438,6 @@ WeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const {
OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())};
}
-double
-WeakAndBlueprint::estimate_self_cost(InFlow in_flow) const noexcept {
- return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0;
-}
-
Blueprint::HitEstimate
WeakAndBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -488,11 +485,12 @@ WeakAndBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches,
assert(_weights.size() == childCnt());
for (size_t i = 0; i < sub_searches.size(); ++i) {
// TODO: pass ownership with unique_ptr
- terms.emplace_back(sub_searches[i].release(),
- _weights[i],
+ terms.emplace_back(sub_searches[i].release(), _weights[i],
getChild(i).getState().estimate().estHits);
}
- return WeakAndSearch::create(terms, _n, strict());
+ return (_idf_range == 0.0)
+ ? WeakAndSearch::create(terms, wand::MatchParams(*_scores), wand::TermFrequencyScorer(), _n, strict())
+ : WeakAndSearch::create(terms, wand::MatchParams(*_scores), wand::Bm25TermFrequencyScorer(get_docid_limit(), _idf_range), _n, strict());
}
SearchIterator::UP
@@ -517,11 +515,6 @@ NearBlueprint::calculate_flow_stats(uint32_t) const {
AndFlow::cost_of(get_children(), true) + childCnt() * est};
}
-double
-NearBlueprint::estimate_self_cost(InFlow) const noexcept {
- return childCnt() * estimate();
-}
-
Blueprint::HitEstimate
NearBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -562,7 +555,7 @@ NearBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches,
tfmda.add(cs.field(j).resolve(md));
}
}
- return SearchIterator::UP(new NearSearch(std::move(sub_searches), tfmda, _window, strict()));
+ return std::make_unique<NearSearch>(std::move(sub_searches), tfmda, _window, strict());
}
SearchIterator::UP
@@ -587,11 +580,6 @@ ONearBlueprint::calculate_flow_stats(uint32_t) const {
AndFlow::cost_of(get_children(), true) + childCnt() * est};
}
-double
-ONearBlueprint::estimate_self_cost(InFlow) const noexcept {
- return childCnt() * estimate();
-}
-
Blueprint::HitEstimate
ONearBlueprint::combine(const std::vector<HitEstimate> &data) const
{
@@ -630,7 +618,7 @@ ONearBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches,
}
// could sort sub_searches here
// but then strictness inheritance would also need to be fixed
- return SearchIterator::UP(new ONearSearch(std::move(sub_searches), tfmda, _window, strict()));
+ return std::make_unique<ONearSearch>(std::move(sub_searches), tfmda, _window, strict());
}
SearchIterator::UP
@@ -756,7 +744,8 @@ SourceBlenderBlueprint::calculate_flow_stats(uint32_t) const {
my_cost = std::max(my_cost, child->cost());
my_strict_cost = std::max(my_strict_cost, child->strict_cost());
}
- return {OrFlow::estimate_of(get_children()), my_cost, my_strict_cost};
+ double my_est = OrFlow::estimate_of(get_children());
+ return {my_est, my_cost + 1.0, my_strict_cost + my_est};
}
Blueprint::HitEstimate
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index ade4c9318e4..87331ca83c5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -4,6 +4,7 @@
#include "blueprint.h"
#include "multisearch.h"
+#include <vespa/searchlib/queryeval/wand/weak_and_heap.h>
namespace search::queryeval {
@@ -67,7 +68,6 @@ public:
~OrBlueprint() override;
bool supports_termwise_children() const override { return true; }
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
- double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
void optimize_self(OptimizePass pass) override;
@@ -89,13 +89,14 @@ private:
class WeakAndBlueprint : public IntermediateBlueprint
{
private:
+ std::unique_ptr<WeakAndPriorityQueue> _scores;
uint32_t _n;
+ float _idf_range;
std::vector<uint32_t> _weights;
AnyFlow my_flow(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
- double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
Blueprint::UP get_replacement() override;
@@ -107,14 +108,15 @@ public:
fef::MatchData &md) const override;
SearchIterator::UP createFilterSearch(FilterConstraint constraint) const override;
- explicit WeakAndBlueprint(uint32_t n) noexcept : _n(n) {}
+ explicit WeakAndBlueprint(uint32_t n) : WeakAndBlueprint(n, 0.0, true) {}
+ WeakAndBlueprint(uint32_t n, float idf_range, bool thread_safe);
~WeakAndBlueprint() override;
void addTerm(Blueprint::UP bp, uint32_t weight) {
addChild(std::move(bp));
_weights.push_back(weight);
}
- uint32_t getN() const { return _n; }
- const std::vector<uint32_t> &getWeights() const { return _weights; }
+ uint32_t getN() const noexcept { return _n; }
+ const std::vector<uint32_t> &getWeights() const noexcept { return _weights; }
};
//-----------------------------------------------------------------------------
@@ -127,7 +129,6 @@ private:
AnyFlow my_flow(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
- double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
void sort(Children &children, InFlow in_flow) const override;
@@ -150,7 +151,6 @@ private:
AnyFlow my_flow(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
- double estimate_self_cost(InFlow in_flow) const noexcept override;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
void sort(Children &children, InFlow in_flow) const override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
index d825c9e1a20..9d19ba87af7 100644
--- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
@@ -11,9 +11,9 @@ namespace search::queryeval {
//-----------------------------------------------------------------------------
FlowStats
-EmptyBlueprint::calculate_flow_stats(uint32_t docid_limit) const
+EmptyBlueprint::calculate_flow_stats(uint32_t) const
{
- return default_flow_stats(docid_limit, 0, 0);
+ return {0.0, 0.2, 0.0};
}
SearchIterator::UP
diff --git a/searchlib/src/vespa/searchlib/queryeval/matching_phase.h b/searchlib/src/vespa/searchlib/queryeval/matching_phase.h
new file mode 100644
index 00000000000..cf475099c58
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/matching_phase.h
@@ -0,0 +1,18 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::queryeval {
+
+/*
+ * The different matching phases when evaluating a query.
+ */
+enum class MatchingPhase {
+ FIRST_PHASE,
+ SECOND_PHASE,
+ MATCH_FEATURES,
+ SUMMARY_FEATURES,
+ DUMP_FEATURES
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
index da1e29875be..0a303b0a613 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
@@ -2,7 +2,7 @@
#include "nearest_neighbor_blueprint.h"
#include "emptysearch.h"
-#include "nearest_neighbor_iterator.h"
+#include "exact_nearest_neighbor_iterator.h"
#include "nns_index_iterator.h"
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
@@ -143,9 +143,9 @@ NearestNeighborBlueprint::createLeafSearch(const search::fef::TermFieldMatchData
default:
;
}
- return NearestNeighborIterator::create(strict(), tfmd,
- std::make_unique<search::tensor::DistanceCalculator>(_attr_tensor, _query_tensor),
- _distance_heap, *_global_filter);
+ return ExactNearestNeighborIterator::create(strict(), tfmd,
+ std::make_unique<search::tensor::DistanceCalculator>(_attr_tensor, _query_tensor),
+ _distance_heap, *_global_filter);
}
void
diff --git a/searchlib/src/vespa/searchlib/queryeval/scores.h b/searchlib/src/vespa/searchlib/queryeval/scores.h
index 19976a2831b..83b1ba9d406 100644
--- a/searchlib/src/vespa/searchlib/queryeval/scores.h
+++ b/searchlib/src/vespa/searchlib/queryeval/scores.h
@@ -24,6 +24,9 @@ struct Scores {
high = score;
}
}
+ bool operator==(const Scores& rhs) const {
+ return low == rhs.low && high == rhs.high;
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
index 2b25aa29747..c5435b557b0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
@@ -191,16 +191,14 @@ SimplePhraseSearch::doSeek(uint32_t doc_id) {
void
SimplePhraseSearch::doStrictSeek(uint32_t doc_id) {
uint32_t next_candidate = doc_id;
- while (getDocId() < doc_id || getDocId() == beginId()) {
- getChildren()[0]->seek(next_candidate + 1);
- next_candidate = getChildren()[0]->getDocId();
+ auto &best_child = *getChildren()[_eval_order[0]];
+ while (getDocId() < doc_id) {
+ best_child.seek(next_candidate + 1);
+ next_candidate = best_child.getDocId();
if (isAtEnd(next_candidate)) {
setAtEnd();
return;
}
- // child must behave as strict.
- assert(next_candidate > doc_id && next_candidate != beginId());
-
phraseSeek(next_candidate);
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
index 4c55496822b..48bef125ec3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
@@ -1,42 +1,23 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "parallel_weak_and_blueprint.h"
-#include "wand_parts.h"
#include "parallel_weak_and_search.h"
#include <vespa/searchlib/queryeval/field_spec.hpp>
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/queryeval/flow_tuning.h>
-#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <algorithm>
namespace search::queryeval {
-ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor)
+ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field, uint32_t scoresToTrack,
+ score_t scoreThreshold, double thresholdBoostFactor,
+ bool thread_safe)
: ComplexLeafBlueprint(field),
- _scores(scoresToTrack),
+ _scores(WeakAndPriorityQueue::createHeap(scoresToTrack, thread_safe)),
_scoreThreshold(scoreThreshold),
_thresholdBoostFactor(thresholdBoostFactor),
- _scoresAdjustFrequency(DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
- _layout(),
- _weights(),
- _terms()
-{
-}
-
-ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor,
- uint32_t scoresAdjustFrequency)
- : ComplexLeafBlueprint(field),
- _scores(scoresToTrack),
- _scoreThreshold(scoreThreshold),
- _thresholdBoostFactor(thresholdBoostFactor),
- _scoresAdjustFrequency(scoresAdjustFrequency),
+ _scoresAdjustFrequency(wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
_layout(),
_weights(),
_terms()
@@ -84,7 +65,7 @@ ParallelWeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const
term->update_flow_stats(docid_limit);
}
double child_est = OrFlow::estimate_of(_terms);
- double my_est = abs_to_rel_est(_scores.getScoresToTrack(), docid_limit);
+ double my_est = abs_to_rel_est(_scores->getScoresToTrack(), docid_limit);
double est = (child_est + my_est) / 2.0;
return {est, OrFlow::cost_of(_terms, false),
OrFlow::cost_of(_terms, true) + flow::heap_cost(est, _terms.size())};
@@ -106,14 +87,11 @@ ParallelWeakAndBlueprint::createLeafSearch(const search::fef::TermFieldMatchData
childState.estimate().estHits,
childState.field(0).resolve(*childrenMatchData));
}
- return SearchIterator::UP
- (ParallelWeakAndSearch::create(terms,
- ParallelWeakAndSearch::MatchParams(_scores,
- _scoreThreshold,
- _thresholdBoostFactor,
- _scoresAdjustFrequency).setDocIdLimit(get_docid_limit()),
- ParallelWeakAndSearch::RankParams(*tfmda[0],
- std::move(childrenMatchData)), strict()));
+ return ParallelWeakAndSearch::create(terms,
+ ParallelWeakAndSearch::MatchParams(*_scores, _scoreThreshold, _thresholdBoostFactor,
+ _scoresAdjustFrequency, get_docid_limit()),
+ ParallelWeakAndSearch::RankParams(*tfmda[0],std::move(childrenMatchData)),
+ strict());
}
std::unique_ptr<SearchIterator>
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
index 4a55bf14095..c34d366120e 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
@@ -11,8 +11,6 @@
namespace search::queryeval {
-const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4;
-
/**
* Blueprint for the parallel weak and search operator.
*/
@@ -21,32 +19,24 @@ class ParallelWeakAndBlueprint : public ComplexLeafBlueprint
private:
using score_t = wand::score_t;
- mutable SharedWeakAndPriorityQueue _scores;
- const wand::score_t _scoreThreshold;
- double _thresholdBoostFactor;
- const uint32_t _scoresAdjustFrequency;
- fef::MatchDataLayout _layout;
- std::vector<int32_t> _weights;
- std::vector<Blueprint::UP> _terms;
+ std::unique_ptr<WeakAndPriorityQueue> _scores;
+ const wand::score_t _scoreThreshold;
+ double _thresholdBoostFactor;
+ const uint32_t _scoresAdjustFrequency;
+ fef::MatchDataLayout _layout;
+ std::vector<int32_t> _weights;
+ std::vector<Blueprint::UP> _terms;
public:
ParallelWeakAndBlueprint(const ParallelWeakAndBlueprint &) = delete;
ParallelWeakAndBlueprint &operator=(const ParallelWeakAndBlueprint &) = delete;
- ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor);
- ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor,
- uint32_t scoresAdjustFrequency);
+ ParallelWeakAndBlueprint(FieldSpecBase field, uint32_t scoresToTrack,
+ score_t scoreThreshold, double thresholdBoostFactor,
+ bool thread_safe);
~ParallelWeakAndBlueprint() override;
- const WeakAndHeap &getScores() const { return _scores; }
-
+ const WeakAndHeap &getScores() const { return *_scores; }
score_t getScoreThreshold() const { return _scoreThreshold; }
-
double getThresholdBoostFactor() const { return _thresholdBoostFactor; }
// Used by create visitor
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
index 9e887b9d0f7..78d97e8efa0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
@@ -77,6 +77,7 @@ public:
_matchParams(matchParams),
_localScores()
{
+ _localScores.reserve(_matchParams.scoresAdjustFrequency);
}
size_t get_num_terms() const override { return _terms.size(); }
int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); }
@@ -199,17 +200,17 @@ namespace {
template <typename VectorizedTerms, typename FutureHeap, typename PastHeap>
SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams &params, bool strict) {
if (strict) {
- return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, true>>(tfmd, std::move(terms), params);
+ return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, true>>(tfmd, std::forward<VectorizedTerms>(terms), params);
} else {
- return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, false>>(tfmd, std::move(terms), params);
+ return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, false>>(tfmd, std::forward<VectorizedTerms>(terms), params);
}
}
template <typename VectorizedTerms>
SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams &params, bool strict, bool use_array) {
return (use_array)
- ? create_helper<VectorizedTerms, vespalib::LeftArrayHeap, vespalib::RightArrayHeap>(tfmd, std::move(terms), params, strict)
- : create_helper<VectorizedTerms, vespalib::LeftHeap, vespalib::RightHeap>(tfmd, std::move(terms), params, strict);
+ ? create_helper<VectorizedTerms, vespalib::LeftArrayHeap, vespalib::RightArrayHeap>(tfmd, std::forward<VectorizedTerms>(terms), params, strict)
+ : create_helper<VectorizedTerms, vespalib::LeftHeap, vespalib::RightHeap>(tfmd, std::forward<VectorizedTerms>(terms), params, strict);
}
} // namespace search::queryeval::<unnamed>
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
index bd173ab41eb..70520e267e6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
@@ -20,27 +20,25 @@ struct ParallelWeakAndSearch : public SearchIterator
/**
* Params used to tweak the behavior of the WAND algorithm.
*/
- struct MatchParams
+ struct MatchParams : wand::MatchParams
{
- WeakAndHeap &scores;
- score_t scoreThreshold;
- double thresholdBoostFactor;
- uint32_t scoresAdjustFrequency;
- docid_t docIdLimit;
- MatchParams(WeakAndHeap &scores_,
- score_t scoreThreshold_,
- double thresholdBoostFactor_,
- uint32_t scoresAdjustFrequency_)
- : scores(scores_),
- scoreThreshold(scoreThreshold_),
- thresholdBoostFactor(thresholdBoostFactor_),
- scoresAdjustFrequency(scoresAdjustFrequency_),
- docIdLimit(0)
+ const double thresholdBoostFactor;
+ const docid_t docIdLimit;
+ MatchParams(WeakAndHeap &scores_in,
+ score_t scoreThreshold_in,
+ double thresholdBoostFactor_in,
+ uint32_t scoresAdjustFrequency_in,
+ uint32_t docIdLimit_in) noexcept
+ : wand::MatchParams(scores_in, scoreThreshold_in, scoresAdjustFrequency_in),
+ thresholdBoostFactor(thresholdBoostFactor_in),
+ docIdLimit(docIdLimit_in)
+ {}
+ MatchParams(WeakAndHeap &scores_in,
+ score_t scoreThreshold_in,
+ double thresholdBoostFactor_in,
+ uint32_t scoresAdjustFrequency_in) noexcept
+ : MatchParams(scores_in, scoreThreshold_in, thresholdBoostFactor_in, scoresAdjustFrequency_in, 0)
{}
- MatchParams &setDocIdLimit(docid_t value) {
- docIdLimit = value;
- return *this;
- }
};
/**
@@ -51,7 +49,7 @@ struct ParallelWeakAndSearch : public SearchIterator
fef::TermFieldMatchData &rootMatchData;
fef::MatchData::UP childrenMatchData;
RankParams(fef::TermFieldMatchData &rootMatchData_,
- fef::MatchData::UP &&childrenMatchData_)
+ fef::MatchData::UP &&childrenMatchData_) noexcept
: rootMatchData(rootMatchData_),
childrenMatchData(std::move(childrenMatchData_))
{}
@@ -68,12 +66,10 @@ struct ParallelWeakAndSearch : public SearchIterator
static SearchIterator::UP createHeapWand(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict);
static SearchIterator::UP create(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict);
- static SearchIterator::UP create(fef::TermFieldMatchData &tmd,
- const MatchParams &matchParams,
+ static SearchIterator::UP create(fef::TermFieldMatchData &tmd, const MatchParams &matchParams,
const std::vector<int32_t> &weights,
const std::vector<IDirectPostingStore::LookupResult> &dict_entries,
- const IDocidWithWeightPostingStore &attr,
- bool strict);
+ const IDocidWithWeightPostingStore &attr, bool strict);
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
index 4e781f8497b..9496090cca3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
@@ -2,10 +2,9 @@
#pragma once
-#include <algorithm>
-#include <cmath>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/features/bm25_feature.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/queryeval/iterator_pack.h>
#include <vespa/searchlib/attribute/posting_iterator_pack.h>
@@ -13,23 +12,40 @@
#include <vespa/vespalib/util/priority_queue.h>
#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <cmath>
+namespace search::queryeval { class WeakAndHeap; }
namespace search::queryeval::wand {
//-----------------------------------------------------------------------------
-struct Term;
-using Terms = std::vector<Term>;
using score_t = int64_t;
using docid_t = uint32_t;
using ref_t = uint16_t;
-using Attr = IDirectPostingStore;
-using AttrDictEntry = Attr::LookupResult;
+const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4;
//-----------------------------------------------------------------------------
/**
+ * Params used to tweak the behavior of the WAND algorithm.
+ */
+struct MatchParams
+{
+ WeakAndHeap &scores;
+ score_t scoreThreshold;
+ const uint32_t scoresAdjustFrequency;
+ MatchParams(WeakAndHeap &scores_in) noexcept
+ : MatchParams(scores_in, 1, DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY)
+ {}
+ MatchParams(WeakAndHeap &scores_in, score_t scoreThreshold_in, uint32_t scoresAdjustFrequency_in) noexcept
+ : scores(scores_in),
+ scoreThreshold(scoreThreshold_in),
+ scoresAdjustFrequency(scoresAdjustFrequency_in)
+ {}
+};
+
+/**
* Wrapper used to specify underlying terms during setup
**/
struct Term {
@@ -46,7 +62,7 @@ struct Term {
Term(SearchIterator *s, int32_t w, uint32_t e) noexcept : Term(s, w, e, nullptr) {}
Term(SearchIterator::UP s, int32_t w, uint32_t e) noexcept : Term(s.release(), w, e, nullptr) {}
};
-
+using Terms = std::vector<Term>;
//-----------------------------------------------------------------------------
// input manipulation utilities
@@ -75,7 +91,7 @@ auto assemble(const F &f, const Order &order)->std::vector<decltype(f(0))> {
}
int32_t get_max_weight(const SearchIterator &search) {
- const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo());
+ const auto *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo());
return (minMax != nullptr) ? minMax->getMaxWeight() : std::numeric_limits<int32_t>::max();
}
@@ -291,7 +307,7 @@ struct VectorizedAttributeTerms : VectorizedState<DocidWithWeightIteratorPack> {
**/
struct DocIdOrder {
const docid_t *termPos;
- explicit DocIdOrder(docid_t *pos) noexcept : termPos(pos) {}
+ explicit DocIdOrder(const docid_t *pos) noexcept : termPos(pos) {}
bool at_end(ref_t ref) const noexcept { return termPos[ref] == search::endDocId; }
docid_t get_pos(ref_t ref) const noexcept { return termPos[ref]; }
bool operator()(ref_t a, ref_t b) const noexcept {
@@ -389,7 +405,7 @@ DualHeap<FutureHeap, PastHeap>::stringify() const {
}
//-----------------------------------------------------------------------------
-#define TermFrequencyScorer_TERM_SCORE_FACTOR 1000000.0
+constexpr double TermFrequencyScorer_TERM_SCORE_FACTOR = 1000000.0;
/**
* Scorer used with WeakAndAlgorithm that calculates a pseudo term frequency
@@ -412,6 +428,38 @@ struct TermFrequencyScorer
}
};
+class Bm25TermFrequencyScorer
+{
+public:
+ using Bm25Executor = features::Bm25Executor;
+ Bm25TermFrequencyScorer(uint32_t num_docs, float range) noexcept
+ : _num_docs(num_docs),
+ _range(range),
+ _max_idf(Bm25Executor::calculate_inverse_document_frequency(1, _num_docs))
+ { }
+ double apply_range(double idf) const noexcept {
+ return (1.0 - _range)*_max_idf + _range * idf;
+ }
+ // weight * scaled_bm25_idf, scaled to fixedpoint
+ score_t calculateMaxScore(double estHits, double weight) const noexcept {
+ return score_t(TermFrequencyScorer_TERM_SCORE_FACTOR * weight *
+ apply_range(Bm25Executor::calculate_inverse_document_frequency(estHits, _num_docs)));
+ }
+
+ score_t calculateMaxScore(const Term &term) const noexcept {
+ return calculateMaxScore(term.estHits, term.weight) + 1;
+ }
+
+ template <typename Input>
+ score_t calculate_max_score(const Input &input, ref_t ref) const noexcept {
+ return calculateMaxScore(input.get_est_hits(ref), input.get_weight(ref)) + 1;
+ }
+private:
+ uint32_t _num_docs;
+ float _range;
+ double _max_idf;
+};
+
//-----------------------------------------------------------------------------
/**
@@ -453,14 +501,14 @@ struct DotProductScorer
// used with parallel wand where we can safely discard hits based on score
struct GreaterThan {
score_t threshold;
- GreaterThan(score_t t) : threshold(t) {}
+ explicit GreaterThan(score_t t) noexcept : threshold(t) {}
bool operator()(score_t score) const { return (score > threshold); }
};
// used with old-style vespa wand to ensure at least AND'ish results
struct GreaterThanEqual {
score_t threshold;
- GreaterThanEqual(score_t t) : threshold(t) {}
+ explicit GreaterThanEqual(score_t t) noexcept : threshold(t) {}
bool operator()(score_t score) const { return (score >= threshold); }
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
index d4b92fd67e6..53ebb33e1ea 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
@@ -4,23 +4,28 @@
namespace search::queryeval {
-SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack) :
+WeakAndPriorityQueue::WeakAndPriorityQueue(uint32_t scoresToTrack) :
WeakAndHeap(scoresToTrack),
- _bestScores(),
- _lock()
-{
- _bestScores.reserve(scoresToTrack);
-}
+ _bestScores()
+{ }
-SharedWeakAndPriorityQueue::~SharedWeakAndPriorityQueue() = default;
+WeakAndPriorityQueue::~WeakAndPriorityQueue() = default;
+
+std::unique_ptr<WeakAndPriorityQueue>
+WeakAndPriorityQueue::createHeap(uint32_t scoresToTrack, bool thread_safe) {
+ if (thread_safe) {
+ return std::make_unique<queryeval::SharedWeakAndPriorityQueue>(scoresToTrack);
+ }
+ return std::make_unique<WeakAndPriorityQueue>(scoresToTrack);
+}
void
-SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
+WeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
{
if (getScoresToTrack() == 0) {
return;
}
- std::lock_guard guard(_lock);
+
for (score_t *itr = begin; itr != end; ++itr) {
score_t score = *itr;
if (!is_full()) {
@@ -35,4 +40,17 @@ SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
}
}
+SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack)
+ : WeakAndPriorityQueue(scoresToTrack),
+ _lock()
+{ }
+
+SharedWeakAndPriorityQueue::~SharedWeakAndPriorityQueue() = default;
+
+void
+SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end) {
+ std::lock_guard guard(_lock);
+ WeakAndPriorityQueue::adjust(begin, end);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
index f1c90f5e6ac..db3ddbc39d3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
@@ -17,13 +17,13 @@ namespace search::queryeval {
class WeakAndHeap {
public:
using score_t = wand::score_t;
- WeakAndHeap(uint32_t scoresToTrack) :
+ explicit WeakAndHeap(uint32_t scoresToTrack) noexcept :
_minScore((scoresToTrack == 0)
? std::numeric_limits<score_t>::max()
: 0),
_scoresToTrack(scoresToTrack)
{ }
- virtual ~WeakAndHeap() {}
+ virtual ~WeakAndHeap() = default;
/**
* Consider the given scores for insertion into the underlying structure.
* The implementation may change the given score array to speed up execution.
@@ -33,11 +33,13 @@ public:
/**
* The number of scores this heap is tracking.
**/
- uint32_t getScoresToTrack() const { return _scoresToTrack; }
+ uint32_t getScoresToTrack() const noexcept { return _scoresToTrack; }
- score_t getMinScore() const { return _minScore.load(std::memory_order_relaxed); }
+ score_t getMinScore() const noexcept { return _minScore.load(std::memory_order_relaxed); }
protected:
- void setMinScore(score_t minScore) { _minScore.store(minScore, std::memory_order_relaxed); }
+ void setMinScore(score_t minScore) noexcept {
+ _minScore.store(minScore, std::memory_order_relaxed);
+ }
private:
std::atomic<score_t> _minScore;
const uint32_t _scoresToTrack;
@@ -47,19 +49,28 @@ private:
* An implementation using an underlying priority queue to keep track of the N
* best hits that can be shared among multiple search iterators.
*/
-class SharedWeakAndPriorityQueue : public WeakAndHeap
+class WeakAndPriorityQueue : public WeakAndHeap
{
private:
using Scores = vespalib::PriorityQueue<score_t>;
Scores _bestScores;
- std::mutex _lock;
- bool is_full() const { return (_bestScores.size() >= getScoresToTrack()); }
+ bool is_full() const noexcept { return (_bestScores.size() >= getScoresToTrack()); }
+public:
+ explicit WeakAndPriorityQueue(uint32_t scoresToTrack);
+ ~WeakAndPriorityQueue() override;
+ Scores &getScores() noexcept { return _bestScores; }
+ void adjust(score_t *begin, score_t *end) override;
+ static std::unique_ptr<WeakAndPriorityQueue> createHeap(uint32_t scoresToTrack, bool thread_safe);
+};
+class SharedWeakAndPriorityQueue final : public WeakAndPriorityQueue
+{
+private:
+ std::mutex _lock;
public:
- SharedWeakAndPriorityQueue(uint32_t scoresToTrack);
- ~SharedWeakAndPriorityQueue();
- Scores &getScores() { return _bestScores; }
+ explicit SharedWeakAndPriorityQueue(uint32_t scoresToTrack);
+ ~SharedWeakAndPriorityQueue() override;
void adjust(score_t *begin, score_t *end) override;
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
index 04b1cb75da4..33dd3e46fe5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
@@ -1,7 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "weak_and_search.h"
-#include "wand_parts.h"
+#include "weak_and_heap.h"
#include <vespa/searchlib/queryeval/orsearch.h>
#include <vespa/vespalib/util/left_right_heap.h>
#include <vespa/vespalib/util/priority_queue.h>
@@ -20,7 +20,8 @@ private:
DualHeap<FutureHeap, PastHeap> _heaps;
Algorithm _algo;
score_t _threshold; // current score threshold
- Scores _scores; // best n scores
+ MatchParams _matchParams;
+ std::vector<score_t> _localScores;
const uint32_t _n;
void seek_strict(uint32_t docid) {
@@ -40,16 +41,24 @@ private:
}
}
}
+ void updateThreshold(score_t newThreshold) {
+ if (newThreshold > _threshold) {
+ _threshold = newThreshold;
+ }
+ }
public:
- WeakAndSearchLR(const Terms &terms, uint32_t n)
- : _terms(terms, TermFrequencyScorer(), 0, {}),
+ template<typename Scorer>
+ WeakAndSearchLR(const Terms &terms, const MatchParams & matchParams, const Scorer & scorer, uint32_t n)
+ : _terms(terms, scorer, 0, {}),
_heaps(DocIdOrder(_terms.docId()), _terms.size()),
_algo(),
- _threshold(1),
- _scores(),
+ _threshold(matchParams.scoreThreshold),
+ _matchParams(matchParams),
+ _localScores(),
_n(n)
{
+ _localScores.reserve(_matchParams.scoresAdjustFrequency);
}
size_t get_num_terms() const override { return _terms.size(); }
int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); }
@@ -57,6 +66,7 @@ public:
const Terms &getTerms() const override { return _terms.input_terms(); }
uint32_t getN() const override { return _n; }
void doSeek(uint32_t docid) override {
+ updateThreshold(_matchParams.scores.getMinScore());
if (IS_STRICT) {
seek_strict(docid);
} else {
@@ -65,12 +75,11 @@ public:
}
void doUnpack(uint32_t docid) override {
_algo.find_matching_terms(_terms, _heaps);
- _scores.push(_algo.get_upper_bound());
- if (_scores.size() > _n) {
- _scores.pop_front();
- }
- if (_scores.size() == _n) {
- _threshold = _scores.front();
+ score_t score = _algo.get_upper_bound();
+ _localScores.push_back(score);
+ if (_localScores.size() == _matchParams.scoresAdjustFrequency) {
+ _matchParams.scores.adjust(&_localScores[0], &_localScores[0] + _localScores.size());
+ _localScores.clear();
}
ref_t *end = _heaps.present_end();
for (ref_t *ref = _heaps.present_begin(); ref != end; ++ref) {
@@ -102,36 +111,51 @@ WeakAndSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
//-----------------------------------------------------------------------------
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::createArrayWand(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::createArrayWand(const Terms &terms, const MatchParams & params,
+ const Scorer & scorer, uint32_t n, bool strict)
{
if (strict) {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, params, scorer, n);
} else {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, params, scorer, n);
}
}
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::createHeapWand(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::createHeapWand(const Terms &terms, const MatchParams & params, const Scorer & scorer, uint32_t n, bool strict)
{
if (strict) {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, params, scorer, n);
} else {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, params, scorer, n);
}
}
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::create(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::create(const Terms &terms, const MatchParams & params, const Scorer & scorer, uint32_t n, bool strict)
{
if (terms.size() < 128) {
- return createArrayWand(terms, n, strict);
+ return createArrayWand(terms, params, scorer, n, strict);
} else {
- return createHeapWand(terms, n, strict);
+ return createHeapWand(terms, params, scorer, n, strict);
}
}
+SearchIterator::UP
+WeakAndSearch::create(const Terms &terms, const MatchParams & params, uint32_t n, bool strict)
+{
+ return create(terms, params, wand::TermFrequencyScorer(), n, strict);
+}
+
//-----------------------------------------------------------------------------
+template SearchIterator::UP WeakAndSearch::create<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::create<wand::Bm25TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::Bm25TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::createArrayWand<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::createHeapWand<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
index 6a56a04887c..30292af24ab 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
@@ -9,15 +9,24 @@ namespace search::queryeval {
struct WeakAndSearch : SearchIterator {
using Terms = wand::Terms;
+ using MatchParams = wand::MatchParams;
virtual size_t get_num_terms() const = 0;
virtual int32_t get_term_weight(size_t idx) const = 0;
virtual wand::score_t get_max_score(size_t idx) const = 0;
virtual const Terms &getTerms() const = 0;
virtual uint32_t getN() const = 0;
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
- static SearchIterator::UP createArrayWand(const Terms &terms, uint32_t n, bool strict);
- static SearchIterator::UP createHeapWand(const Terms &terms, uint32_t n, bool strict);
- static SearchIterator::UP create(const Terms &terms, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP createArrayWand(const Terms &terms, const MatchParams & matchParams,
+ const Scorer & scorer, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP createHeapWand(const Terms &terms, const MatchParams & matchParams,
+ const Scorer & scorer, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP create(const Terms &terms, const MatchParams & matchParams,
+ const Scorer & scorer, uint32_t n, bool strict);
+ static SearchIterator::UP create(const Terms &terms, const MatchParams & matchParams,
+ uint32_t n, bool strict);
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index af99260979d..2f07fa4cdc7 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -9,14 +9,16 @@
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
using vespalib::eval::TypedCells;
+using vespalib::eval::Int8Float;
namespace search::tensor {
-template<typename FloatType>
+template <typename VectorStoreType>
class BoundAngularDistance final : public BoundDistanceFunction {
private:
+ using FloatType = VectorStoreType::FloatType;
const vespalib::hwaccelrated::IAccelrated & _computer;
- mutable TemporaryVectorStore<FloatType> _tmpSpace;
+ mutable VectorStoreType _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs;
double _lhs_norm_sq;
public:
@@ -26,16 +28,16 @@ public:
_lhs(_tmpSpace.storeLhs(lhs))
{
auto a = _lhs.data();
- _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
+ _lhs_norm_sq = _computer.dotProduct(cast(a), cast(a), lhs.size);
}
double calc(TypedCells rhs) const noexcept override {
size_t sz = _lhs.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
auto a = _lhs.data();
auto b = rhs_vector.data();
- double b_norm_sq = _computer.dotProduct(b, b, sz);
+ double b_norm_sq = _computer.dotProduct(cast(b), cast(b), sz);
double squared_norms = _lhs_norm_sq * b_norm_sq;
- double dot_product = _computer.dotProduct(a, b, sz);
+ double dot_product = _computer.dotProduct(cast(a), cast(b), sz);
double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
double cosine_similarity = dot_product / div;
double distance = 1.0 - cosine_similarity; // in range [0,2]
@@ -65,24 +67,34 @@ public:
}
};
-template class BoundAngularDistance<float>;
-template class BoundAngularDistance<double>;
+template class BoundAngularDistance<TemporaryVectorStore<float>>;
+template class BoundAngularDistance<TemporaryVectorStore<double>>;
+template class BoundAngularDistance<TemporaryVectorStore<Int8Float>>;
+template class BoundAngularDistance<ReferenceVectorStore<float>>;
+template class BoundAngularDistance<ReferenceVectorStore<double>>;
+template class BoundAngularDistance<ReferenceVectorStore<Int8Float>>;
template <typename FloatType>
BoundDistanceFunction::UP
-AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
- using DFT = BoundAngularDistance<FloatType>;
+AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
+ using DFT = BoundAngularDistance<TemporaryVectorStore<FloatType>>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
- using DFT = BoundAngularDistance<FloatType>;
- return std::make_unique<DFT>(lhs);
+AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
+ if (_reference_insertion_vector) {
+ using DFT = BoundAngularDistance<ReferenceVectorStore<FloatType>>;
+ return std::make_unique<DFT>(lhs);
+ } else {
+ using DFT = BoundAngularDistance<TemporaryVectorStore<FloatType>>;
+ return std::make_unique<DFT>(lhs);
+ }
}
template class AngularDistanceFunctionFactory<float>;
template class AngularDistanceFunctionFactory<double>;
+template class AngularDistanceFunctionFactory<Int8Float>;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
index 5e0a060e060..7dcc6e80484 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -10,13 +10,21 @@ namespace search::tensor {
* Calculates angular distance between vectors
* Will use instruction optimal for the cpu it is running on
* after converting both vectors to an optimal cell type.
+ *
+ * When reference_insertion_vector == true:
+ * - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ * - The TypedCells memory is just referenced and used directly in calculations,
+ * and thus no transformation via a temporary memory buffer occurs.
*/
template <typename FloatType>
class AngularDistanceFunctionFactory : public DistanceFunctionFactory {
+private:
+ bool _reference_insertion_vector;
public:
- AngularDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ AngularDistanceFunctionFactory() noexcept : AngularDistanceFunctionFactory(false) {}
+ AngularDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
index 85089196a7a..318271835ad 100644
--- a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
@@ -19,6 +19,7 @@ class BoundDistanceFunction : public DistanceConverter {
public:
using UP = std::unique_ptr<BoundDistanceFunction>;
using TypedCells = vespalib::eval::TypedCells;
+ using Int8Float = vespalib::eval::Int8Float;
BoundDistanceFunction() noexcept = default;
@@ -29,6 +30,10 @@ public:
// calculate internal distance, early return allowed if > limit
virtual double calc_with_limit(TypedCells rhs, double limit) const noexcept = 0;
+protected:
+ static const double *cast(const double * p) { return p; }
+ static const float *cast(const float * p) { return p; }
+ static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index ed08df5866e..b8918e23ce7 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -6,6 +6,7 @@
using search::attribute::DistanceMetric;
using vespalib::eval::CellType;
+using vespalib::eval::Int8Float;
namespace search::tensor {
@@ -15,38 +16,44 @@ make_distance_function_factory(DistanceMetric variant, CellType cell_type)
switch (variant) {
case DistanceMetric::Angular:
switch (cell_type) {
- case CellType::DOUBLE: return std::make_unique<AngularDistanceFunctionFactory<double>>();
+ case CellType::DOUBLE: return std::make_unique<AngularDistanceFunctionFactory<double>>(true);
+ case CellType::INT8: return std::make_unique<AngularDistanceFunctionFactory<Int8Float>>(true);
+ case CellType::FLOAT: return std::make_unique<AngularDistanceFunctionFactory<float>>(true);
default: return std::make_unique<AngularDistanceFunctionFactory<float>>();
}
case DistanceMetric::Euclidean:
switch (cell_type) {
- case CellType::DOUBLE: return std::make_unique<EuclideanDistanceFunctionFactory<double>>();
- case CellType::INT8: return std::make_unique<EuclideanDistanceFunctionFactory<vespalib::eval::Int8Float>>();
- case CellType::BFLOAT16: return std::make_unique<EuclideanDistanceFunctionFactory<vespalib::BFloat16>>();
- default: return std::make_unique<EuclideanDistanceFunctionFactory<float>>();
+ case CellType::DOUBLE: return std::make_unique<EuclideanDistanceFunctionFactory<double>>(true);
+ case CellType::INT8: return std::make_unique<EuclideanDistanceFunctionFactory<Int8Float>>(true);
+ case CellType::FLOAT: return std::make_unique<EuclideanDistanceFunctionFactory<float>>(true);
+ default: return std::make_unique<EuclideanDistanceFunctionFactory<float>>();
}
case DistanceMetric::InnerProduct:
case DistanceMetric::PrenormalizedAngular:
switch (cell_type) {
- case CellType::DOUBLE: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<double>>();
- default: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>();
+ case CellType::DOUBLE: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<double>>(true);
+ case CellType::INT8: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<Int8Float>>(true);
+ case CellType::FLOAT: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>(true);
+ default: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>();
}
case DistanceMetric::Dotproduct:
switch (cell_type) {
- case CellType::DOUBLE: return std::make_unique<MipsDistanceFunctionFactory<double>>();
- case CellType::INT8: return std::make_unique<MipsDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+ case CellType::DOUBLE: return std::make_unique<MipsDistanceFunctionFactory<double>>(true);
+ case CellType::INT8: return std::make_unique<MipsDistanceFunctionFactory<Int8Float>>(true);
+ case CellType::FLOAT: return std::make_unique<MipsDistanceFunctionFactory<float>>(true);
default: return std::make_unique<MipsDistanceFunctionFactory<float>>();
}
case DistanceMetric::GeoDegrees:
return std::make_unique<GeoDistanceFunctionFactory>();
case DistanceMetric::Hamming:
switch (cell_type) {
- case CellType::DOUBLE: return std::make_unique<HammingDistanceFunctionFactory<double>>();
- case CellType::INT8: return std::make_unique<HammingDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+ case CellType::DOUBLE: return std::make_unique<HammingDistanceFunctionFactory<double>>(true);
+ case CellType::INT8: return std::make_unique<HammingDistanceFunctionFactory<Int8Float>>(true);
+ case CellType::FLOAT: return std::make_unique<HammingDistanceFunctionFactory<float>>(true);
default: return std::make_unique<HammingDistanceFunctionFactory<float>>();
}
}
- // not reached:
+ // Not reached:
return {};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
index 356366d6a77..3b0a0ac91fd 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
@@ -17,8 +17,8 @@ struct DistanceFunctionFactory {
using TypedCells = vespalib::eval::TypedCells;
DistanceFunctionFactory() noexcept = default;
virtual ~DistanceFunctionFactory() = default;
- virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) = 0;
- virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) = 0;
+ virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const = 0;
+ virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const = 0;
using UP = std::unique_ptr<DistanceFunctionFactory>;
};
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index 3ab3a1123eb..02da4f496af 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -12,18 +12,14 @@ using vespalib::eval::TypedCells;
namespace search::tensor {
using vespalib::eval::Int8Float;
-using vespalib::BFloat16;
-template<typename AttributeCellType>
+template <typename VectorStoreType>
class BoundEuclideanDistance final : public BoundDistanceFunction {
- using FloatType = std::conditional_t<std::is_same<AttributeCellType,BFloat16>::value,float,AttributeCellType>;
private:
+ using FloatType = VectorStoreType::FloatType;
const vespalib::hwaccelrated::IAccelrated & _computer;
- mutable TemporaryVectorStore<FloatType> _tmpSpace;
+ mutable VectorStoreType _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs_vector;
- static const double *cast(const double * p) { return p; }
- static const float *cast(const float * p) { return p; }
- static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
public:
explicit BoundEuclideanDistance(TypedCells lhs)
: _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()),
@@ -44,40 +40,38 @@ public:
double score = 1.0 / (1.0 + d);
return score;
}
- double calc_with_limit(TypedCells rhs, double limit) const noexcept override {
- vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.typify<AttributeCellType>();
- double sum = 0.0;
- size_t sz = _lhs_vector.size();
- assert(sz == rhs_vector.size());
- for (size_t i = 0; i < sz && sum <= limit; ++i) {
- double diff = _lhs_vector[i] - rhs_vector[i];
- sum += diff*diff;
- }
- return sum;
+ double calc_with_limit(TypedCells rhs, double) const noexcept override {
+ return calc(rhs);
}
};
-template class BoundEuclideanDistance<Int8Float>;
-template class BoundEuclideanDistance<BFloat16>;
-template class BoundEuclideanDistance<float>;
-template class BoundEuclideanDistance<double>;
+template class BoundEuclideanDistance<TemporaryVectorStore<Int8Float>>;
+template class BoundEuclideanDistance<TemporaryVectorStore<float>>;
+template class BoundEuclideanDistance<TemporaryVectorStore<double>>;
+template class BoundEuclideanDistance<ReferenceVectorStore<Int8Float>>;
+template class BoundEuclideanDistance<ReferenceVectorStore<float>>;
+template class BoundEuclideanDistance<ReferenceVectorStore<double>>;
template <typename FloatType>
BoundDistanceFunction::UP
-EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
- using DFT = BoundEuclideanDistance<FloatType>;
+EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
+ using DFT = BoundEuclideanDistance<TemporaryVectorStore<FloatType>>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
- using DFT = BoundEuclideanDistance<FloatType>;
- return std::make_unique<DFT>(lhs);
+EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
+ if (_reference_insertion_vector) {
+ using DFT = BoundEuclideanDistance<ReferenceVectorStore<FloatType>>;
+ return std::make_unique<DFT>(lhs);
+ } else {
+ using DFT = BoundEuclideanDistance<TemporaryVectorStore<FloatType>>;
+ return std::make_unique<DFT>(lhs);
+ }
}
template class EuclideanDistanceFunctionFactory<Int8Float>;
-template class EuclideanDistanceFunctionFactory<BFloat16>;
template class EuclideanDistanceFunctionFactory<float>;
template class EuclideanDistanceFunctionFactory<double>;
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
index 8c39a12bf86..bd82e48fb0b 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
@@ -10,13 +10,21 @@ namespace search::tensor {
* Calculates the square of the standard Euclidean distance.
* Will use instruction optimal for the cpu it is running on
* after converting both vectors to an optimal cell type.
+ *
+ * When reference_insertion_vector == true:
+ * - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ * - The TypedCells memory is just referenced and used directly in calculations,
+ * and thus no transformation via a temporary memory buffer occurs.
*/
template <typename FloatType>
class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory {
+private:
+ bool _reference_insertion_vector;
public:
- EuclideanDistanceFunctionFactory() noexcept = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ EuclideanDistanceFunctionFactory() noexcept : EuclideanDistanceFunctionFactory(false) {}
+ EuclideanDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
index f5484f40271..a8a48ae4116 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
@@ -82,12 +82,12 @@ public:
};
BoundDistanceFunction::UP
-GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) {
+GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) const {
return std::make_unique<BoundGeoDistance>(lhs);
}
BoundDistanceFunction::UP
-GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) {
+GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) const {
return std::make_unique<BoundGeoDistance>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
index 1464898421b..a85e31e8ecc 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
@@ -14,8 +14,8 @@ namespace search::tensor {
class GeoDistanceFunctionFactory : public DistanceFunctionFactory {
public:
GeoDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
index 7f29a100492..281a20cef87 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -5,17 +5,18 @@
#include <vespa/vespalib/util/binary_hamming_distance.h>
using vespalib::typify_invoke;
-using vespalib::eval::TypifyCellType;
using vespalib::eval::TypedCells;
+using vespalib::eval::TypifyCellType;
namespace search::tensor {
using vespalib::eval::Int8Float;
-template<typename FloatType>
+template <typename VectorStoreType>
class BoundHammingDistance final : public BoundDistanceFunction {
private:
- mutable TemporaryVectorStore<FloatType> _tmpSpace;
+ using FloatType = VectorStoreType::FloatType;
+ mutable VectorStoreType _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs_vector;
public:
explicit BoundHammingDistance(TypedCells lhs)
@@ -47,18 +48,30 @@ public:
}
};
+template class BoundHammingDistance<TemporaryVectorStore<Int8Float>>;
+template class BoundHammingDistance<TemporaryVectorStore<float>>;
+template class BoundHammingDistance<TemporaryVectorStore<double>>;
+template class BoundHammingDistance<ReferenceVectorStore<Int8Float>>;
+template class BoundHammingDistance<ReferenceVectorStore<float>>;
+template class BoundHammingDistance<ReferenceVectorStore<double>>;
+
template <typename FloatType>
BoundDistanceFunction::UP
-HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
- using DFT = BoundHammingDistance<FloatType>;
+HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
+ using DFT = BoundHammingDistance<TemporaryVectorStore<FloatType>>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
- using DFT = BoundHammingDistance<FloatType>;
- return std::make_unique<DFT>(lhs);
+HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
+ if (_reference_insertion_vector) {
+ using DFT = BoundHammingDistance<ReferenceVectorStore<FloatType>>;
+ return std::make_unique<DFT>(lhs);
+ } else {
+ using DFT = BoundHammingDistance<TemporaryVectorStore<FloatType>>;
+ return std::make_unique<DFT>(lhs);
+ }
}
template class HammingDistanceFunctionFactory<Int8Float>;
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
index 6e7f96e1e2f..768665653c0 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -7,17 +7,24 @@
namespace search::tensor {
/**
- * Calculates the Hamming distance defined as
- * "number of cells where the values are different"
- * or (for int8 cells, aka binary data only)
- * "number of bits that are different"
+ * Calculates the Hamming distance defined as "number of cells where the values are different"
+ * or (for int8 cells, aka binary data only) "number of bits that are different".
+ *
+ * When reference_insertion_vector == true:
+ * - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ * - The TypedCells memory is referenced and used directly in calculations,
+ * and thus no transformation via a temporary memory buffer occurs.
*/
template <typename FloatType>
class HammingDistanceFunctionFactory : public DistanceFunctionFactory {
+private:
+ bool _reference_insertion_vector;
public:
- HammingDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ HammingDistanceFunctionFactory() noexcept : HammingDistanceFunctionFactory(false) {}
+ HammingDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
+
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
index a1a9e9632be..88b7681f23c 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
@@ -82,7 +82,7 @@ struct HnswGraph {
if (levels_ref.valid()) {
return levels_store.get(levels_ref);
}
- return LevelArrayRef();
+ return {};
}
LevelArrayRef get_level_array(uint32_t nodeid) const {
@@ -102,7 +102,7 @@ struct HnswGraph {
return links_store.get(links_ref);
}
}
- return LinkArrayRef();
+ return {};
}
LinkArrayRef get_link_array(uint32_t nodeid, uint32_t level) const {
@@ -126,12 +126,12 @@ struct HnswGraph {
uint32_t nodeid;
LevelsRef levels_ref;
int32_t level;
- EntryNode()
+ EntryNode() noexcept
: nodeid(0), // Note that nodeid 0 is reserved and never used
levels_ref(),
level(-1)
{}
- EntryNode(uint32_t nodeid_in, LevelsRef levels_ref_in, int32_t level_in)
+ EntryNode(uint32_t nodeid_in, LevelsRef levels_ref_in, int32_t level_in) noexcept
: nodeid(nodeid_in),
levels_ref(levels_ref_in),
level(level_in)
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 1db688156e0..f9ab5d705c9 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -155,8 +155,8 @@ HnswIndex<type>::make_default_level_array_store_config()
vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE,
vespalib::alloc::MemoryAllocator::PAGE_SIZE,
ArrayStoreConfig::default_max_buffer_size,
- min_num_arrays_for_new_buffer,
- alloc_grow_factor).enable_free_lists(true);
+ min_num_arrays_for_new_buffer,
+ alloc_grow_factor).enable_free_lists(true);
}
template <HnswIndexType type>
@@ -373,9 +373,7 @@ HnswIndex<type>::estimate_visited_nodes(uint32_t level, uint32_t nodeid_limit, u
template <HnswIndexType type>
HnswCandidate
-HnswIndex<type>::find_nearest_in_layer(
- const BoundDistanceFunction &df,
- const HnswCandidate& entry_point, uint32_t level) const
+HnswIndex<type>::find_nearest_in_layer(const BoundDistanceFunction &df, const HnswCandidate& entry_point, uint32_t level) const
{
HnswCandidate nearest = entry_point;
bool keep_searching = true;
@@ -401,12 +399,10 @@ HnswIndex<type>::find_nearest_in_layer(
template <HnswIndexType type>
template <class VisitedTracker, class BestNeighbors>
void
-HnswIndex<type>::search_layer_helper(
- const BoundDistanceFunction &df,
- uint32_t neighbors_to_find,
- BestNeighbors& best_neighbors, uint32_t level, const GlobalFilter *filter,
- uint32_t nodeid_limit, const vespalib::Doom* const doom,
- uint32_t estimated_visited_nodes) const
+HnswIndex<type>::search_layer_helper(const BoundDistanceFunction &df, uint32_t neighbors_to_find,
+ BestNeighbors& best_neighbors, uint32_t level, const GlobalFilter *filter,
+ uint32_t nodeid_limit, const vespalib::Doom* const doom,
+ uint32_t estimated_visited_nodes) const
{
NearestPriQ candidates;
GlobalFilterWrapper<type> filter_wrapper(filter);
@@ -471,11 +467,8 @@ HnswIndex<type>::search_layer_helper(
template <HnswIndexType type>
template <class BestNeighbors>
void
-HnswIndex<type>::search_layer(
- const BoundDistanceFunction &df,
- uint32_t neighbors_to_find,
- BestNeighbors& best_neighbors, uint32_t level,
- const vespalib::Doom* const doom, const GlobalFilter *filter) const
+HnswIndex<type>::search_layer(const BoundDistanceFunction &df, uint32_t neighbors_to_find, BestNeighbors& best_neighbors,
+ uint32_t level, const vespalib::Doom* const doom, const GlobalFilter *filter) const
{
uint32_t nodeid_limit = _graph.nodes_size.load(std::memory_order_acquire);
uint32_t estimated_visited_nodes = estimate_visited_nodes(level, nodeid_limit, neighbors_to_find, filter);
@@ -488,7 +481,7 @@ HnswIndex<type>::search_layer(
template <HnswIndexType type>
HnswIndex<type>::HnswIndex(const DocVectorAccess& vectors, DistanceFunctionFactory::UP distance_ff,
- RandomLevelGenerator::UP level_generator, const HnswIndexConfig& cfg)
+ RandomLevelGenerator::UP level_generator, const HnswIndexConfig& cfg)
: _graph(),
_vectors(vectors),
_distance_ff(std::move(distance_ff)),
@@ -633,9 +626,7 @@ HnswIndex<type>::internal_complete_add_node(uint32_t nodeid, uint32_t docid, uin
template <HnswIndexType type>
std::unique_ptr<PrepareResult>
-HnswIndex<type>::prepare_add_document(uint32_t docid,
- VectorBundle vectors,
- vespalib::GenerationHandler::Guard read_guard) const
+HnswIndex<type>::prepare_add_document(uint32_t docid, VectorBundle vectors, vespalib::GenerationHandler::Guard read_guard) const
{
uint32_t active_nodes = _graph.get_active_nodes();
if (active_nodes < _cfg.min_size_before_two_phase()) {
@@ -672,15 +663,18 @@ HnswIndex<type>::mutual_reconnect(const LinkArrayRef &cluster, uint32_t level)
std::vector<PairDist> pairs;
for (uint32_t i = 0; i + 1 < cluster.size(); ++i) {
uint32_t n_id_1 = cluster[i];
+ TypedCells n_cells_1 = get_vector(n_id_1);
+ if (n_cells_1.non_existing_attribute_value()) [[unlikely]] continue;
LinkArrayRef n_list_1 = _graph.get_link_array(n_id_1, level);
- std::unique_ptr<BoundDistanceFunction> df;
+ std::unique_ptr<BoundDistanceFunction> df = _distance_ff->for_insertion_vector(n_cells_1);
for (uint32_t j = i + 1; j < cluster.size(); ++j) {
uint32_t n_id_2 = cluster[j];
- if (has_link_to(n_list_1, n_id_2)) continue;
- if (!df) {
- df = _distance_ff->for_insertion_vector(get_vector(n_id_1));
+ if ( ! has_link_to(n_list_1, n_id_2)) {
+ auto n_cells_2 = get_vector(n_id_2);
+ if (!n_cells_2.non_existing_attribute_value()) {
+ pairs.emplace_back(n_id_1, n_id_2, df->calc(n_cells_2));
+ }
}
- pairs.emplace_back(n_id_1, n_id_2, calc_distance(*df, n_id_2));
}
}
std::sort(pairs.begin(), pairs.end());
@@ -927,12 +921,8 @@ struct NeighborsByDocId {
template <HnswIndexType type>
std::vector<NearestNeighborIndex::Neighbor>
-HnswIndex<type>::top_k_by_docid(
- uint32_t k,
- const BoundDistanceFunction &df,
- const GlobalFilter *filter, uint32_t explore_k,
- const vespalib::Doom& doom,
- double distance_threshold) const
+HnswIndex<type>::top_k_by_docid(uint32_t k, const BoundDistanceFunction &df, const GlobalFilter *filter,
+ uint32_t explore_k, const vespalib::Doom& doom, double distance_threshold) const
{
SearchBestNeighbors candidates = top_k_candidates(df, std::max(k, explore_k), filter, doom);
auto result = candidates.get_neighbors(k, distance_threshold);
@@ -942,34 +932,23 @@ HnswIndex<type>::top_k_by_docid(
template <HnswIndexType type>
std::vector<NearestNeighborIndex::Neighbor>
-HnswIndex<type>::find_top_k(
- uint32_t k,
- const BoundDistanceFunction &df,
- uint32_t explore_k,
- const vespalib::Doom& doom,
- double distance_threshold) const
+HnswIndex<type>::find_top_k(uint32_t k, const BoundDistanceFunction &df, uint32_t explore_k,
+ const vespalib::Doom& doom, double distance_threshold) const
{
return top_k_by_docid(k, df, nullptr, explore_k, doom, distance_threshold);
}
template <HnswIndexType type>
std::vector<NearestNeighborIndex::Neighbor>
-HnswIndex<type>::find_top_k_with_filter(
- uint32_t k,
- const BoundDistanceFunction &df,
- const GlobalFilter &filter, uint32_t explore_k,
- const vespalib::Doom& doom,
- double distance_threshold) const
+HnswIndex<type>::find_top_k_with_filter(uint32_t k, const BoundDistanceFunction &df, const GlobalFilter &filter,
+ uint32_t explore_k, const vespalib::Doom& doom, double distance_threshold) const
{
return top_k_by_docid(k, df, &filter, explore_k, doom, distance_threshold);
}
template <HnswIndexType type>
typename HnswIndex<type>::SearchBestNeighbors
-HnswIndex<type>::top_k_candidates(
- const BoundDistanceFunction &df,
- uint32_t k, const GlobalFilter *filter,
- const vespalib::Doom& doom) const
+HnswIndex<type>::top_k_candidates(const BoundDistanceFunction &df, uint32_t k, const GlobalFilter *filter, const vespalib::Doom& doom) const
{
SearchBestNeighbors best_neighbors;
auto entry = _graph.get_entry_node();
@@ -1120,6 +1099,32 @@ HnswIndex<type>::count_reachable_nodes() const
return {found_cnt, true};
}
+template <HnswIndexType type>
+uint32_t
+HnswIndex<type>::get_subspaces(uint32_t docid) const noexcept
+{
+ if constexpr (type == HnswIndexType::SINGLE) {
+ return (docid < _graph.nodes.get_size() && _graph.nodes.get_elem_ref(docid).levels_ref().load_relaxed().valid()) ? 1 : 0;
+ } else {
+ return _id_mapping.get_ids(docid).size();
+ }
+}
+
+template <HnswIndexType type>
+uint32_t
+HnswIndex<type>::check_consistency(uint32_t docid_limit) const noexcept
+{
+ uint32_t inconsistencies = 0;
+ for (uint32_t docid = 1; docid < docid_limit; ++docid) {
+ auto index_subspaces = get_subspaces(docid);
+ auto store_subspaces = get_vectors(docid).subspaces();
+ if (index_subspaces != store_subspaces) {
+ ++inconsistencies;
+ }
+ }
+ return inconsistencies;
+}
+
template class HnswIndex<HnswIndexType::SINGLE>;
template class HnswIndex<HnswIndexType::MULTI>;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 616140f426f..8c74f1e5264 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -171,28 +171,26 @@ protected:
/**
* Performs a greedy search in the given layer to find the candidate that is nearest the input vector.
*/
- HnswCandidate find_nearest_in_layer(const BoundDistanceFunction &df, const HnswCandidate& entry_point, uint32_t level) const;
+ HnswCandidate find_nearest_in_layer(const BoundDistanceFunction &df, const HnswCandidate& entry_point, uint32_t level) const __attribute__((noinline));
template <class VisitedTracker, class BestNeighbors>
void search_layer_helper(const BoundDistanceFunction &df, uint32_t neighbors_to_find, BestNeighbors& best_neighbors,
- uint32_t level, const GlobalFilter *filter,
- uint32_t nodeid_limit,
- const vespalib::Doom* const doom,
- uint32_t estimated_visited_nodes) const;
+ uint32_t level, const GlobalFilter *filter, uint32_t nodeid_limit,
+ const vespalib::Doom* const doom, uint32_t estimated_visited_nodes) const __attribute__((noinline));
template <class BestNeighbors>
void search_layer(const BoundDistanceFunction &df, uint32_t neighbors_to_find, BestNeighbors& best_neighbors,
- uint32_t level, const vespalib::Doom* const doom,
- const GlobalFilter *filter = nullptr) const;
- std::vector<Neighbor> top_k_by_docid(uint32_t k, const BoundDistanceFunction &df,
- const GlobalFilter *filter, uint32_t explore_k,
- const vespalib::Doom& doom,
- double distance_threshold) const;
+ uint32_t level, const vespalib::Doom* const doom, const GlobalFilter *filter = nullptr) const;
+ std::vector<Neighbor> top_k_by_docid(uint32_t k, const BoundDistanceFunction &df, const GlobalFilter *filter,
+ uint32_t explore_k, const vespalib::Doom& doom, double distance_threshold) const;
internal::PreparedAddDoc internal_prepare_add(uint32_t docid, VectorBundle input_vectors,
- vespalib::GenerationHandler::Guard read_guard) const;
+ vespalib::GenerationHandler::Guard read_guard) const;
void internal_prepare_add_node(internal::PreparedAddDoc& op, TypedCells input_vector, const typename GraphType::EntryNode& entry) const;
LinkArray filter_valid_nodeids(uint32_t level, const internal::PreparedAddNode::Links &neighbors, uint32_t self_nodeid);
void internal_complete_add(uint32_t docid, internal::PreparedAddDoc &op);
void internal_complete_add_node(uint32_t nodeid, uint32_t docid, uint32_t subspace, internal::PreparedAddNode &prepared_node);
+
+ // Called from writer only.
+ uint32_t get_subspaces(uint32_t docid) const noexcept;
public:
HnswIndex(const DocVectorAccess& vectors, DistanceFunctionFactory::UP distance_ff,
RandomLevelGenerator::UP level_generator, const HnswIndexConfig& cfg);
@@ -202,8 +200,7 @@ public:
// Implements NearestNeighborIndex
void add_document(uint32_t docid) override;
- std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid,
- VectorBundle vectors,
+ std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid, VectorBundle vectors,
vespalib::GenerationHandler::Guard read_guard) const override;
void complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult> prepare_result) override;
void remove_node(uint32_t nodeid);
@@ -222,32 +219,25 @@ public:
std::unique_ptr<NearestNeighborIndexSaver> make_saver(vespalib::GenericHeader& header) const override;
std::unique_ptr<NearestNeighborIndexLoader> make_loader(FastOS_FileInterface& file, const vespalib::GenericHeader& header) override;
- std::vector<Neighbor> find_top_k(
- uint32_t k,
- const BoundDistanceFunction &df,
- uint32_t explore_k,
- const vespalib::Doom& doom,
- double distance_threshold) const override;
+ std::vector<Neighbor> find_top_k(uint32_t k, const BoundDistanceFunction &df, uint32_t explore_k,
+ const vespalib::Doom& doom, double distance_threshold) const override;
- std::vector<Neighbor> find_top_k_with_filter(
- uint32_t k,
- const BoundDistanceFunction &df,
- const GlobalFilter &filter, uint32_t explore_k,
- const vespalib::Doom& doom,
- double distance_threshold) const override;
+ std::vector<Neighbor> find_top_k_with_filter(uint32_t k, const BoundDistanceFunction &df, const GlobalFilter &filter,
+ uint32_t explore_k, const vespalib::Doom& doom, double distance_threshold) const override;
DistanceFunctionFactory &distance_function_factory() const override { return *_distance_ff; }
- SearchBestNeighbors top_k_candidates(
- const BoundDistanceFunction &df,
- uint32_t k, const GlobalFilter *filter,
- const vespalib::Doom& doom) const;
+ SearchBestNeighbors top_k_candidates(const BoundDistanceFunction &df, uint32_t k, const GlobalFilter *filter,
+ const vespalib::Doom& doom) const;
uint32_t get_entry_nodeid() const { return _graph.get_entry_node().nodeid; }
int32_t get_entry_level() const { return _graph.get_entry_node().level; }
uint32_t get_active_nodes() const noexcept { return _graph.get_active_nodes(); }
+ // Called from writer only.
+ uint32_t check_consistency(uint32_t docid_limit) const noexcept override;
+
// Should only be used by unit tests.
HnswTestNode get_node(uint32_t nodeid) const;
void set_node(uint32_t nodeid, const HnswTestNode &node);
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
index b8189090079..33a9fa2503f 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
@@ -16,10 +16,7 @@ class HnswSimpleNode {
AtomicEntryRef _levels_ref;
public:
- HnswSimpleNode()
- : _levels_ref()
- {
- }
+ HnswSimpleNode() noexcept : _levels_ref() { }
AtomicEntryRef& levels_ref() noexcept { return _levels_ref; }
const AtomicEntryRef& levels_ref() const noexcept { return _levels_ref; }
void store_docid(uint32_t docid) noexcept { (void) docid; }
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
index c42242d8dc8..f3a60668141 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
@@ -10,18 +10,17 @@ using vespalib::eval::Int8Float;
namespace search::tensor {
-template<typename FloatType, bool extra_dim>
+template <typename VectorStoreType, bool extra_dim>
class BoundMipsDistanceFunction final : public BoundDistanceFunction {
- mutable TemporaryVectorStore<FloatType> _tmpSpace;
+private:
+ using FloatType = VectorStoreType::FloatType;
+ mutable VectorStoreType _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs_vector;
const vespalib::hwaccelrated::IAccelrated & _computer;
double _max_sq_norm;
using ExtraDimT = std::conditional_t<extra_dim,double,std::monostate>;
[[no_unique_address]] ExtraDimT _lhs_extra_dim;
- static const double *cast(const double * p) { return p; }
- static const float *cast(const float * p) { return p; }
- static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
public:
BoundMipsDistanceFunction(TypedCells lhs, MaximumSquaredNormStore& sq_norm_store)
: BoundDistanceFunction(),
@@ -50,7 +49,7 @@ public:
double dp = _computer.dotProduct(cast(a), cast(b), rhs.size);
if constexpr (extra_dim) {
double rhs_sq_norm = _computer.dotProduct(cast(b), cast(b), rhs.size);
- // avoid sqrt(negative) for robustness:
+ // avoid sqrt(negative) for robustness:
double diff = std::max(0.0, _max_sq_norm - rhs_sq_norm);
double rhs_extra_dim = std::sqrt(diff);
dp += _lhs_extra_dim * rhs_extra_dim;
@@ -76,14 +75,18 @@ public:
template<typename FloatType>
BoundDistanceFunction::UP
-MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
- return std::make_unique<BoundMipsDistanceFunction<FloatType, false>>(lhs, *_sq_norm_store);
+MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
+ return std::make_unique<BoundMipsDistanceFunction<TemporaryVectorStore<FloatType>, false>>(lhs, *_sq_norm_store);
}
template<typename FloatType>
BoundDistanceFunction::UP
-MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
- return std::make_unique<BoundMipsDistanceFunction<FloatType, true>>(lhs, *_sq_norm_store);
+MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
+ if (_reference_insertion_vector) {
+ return std::make_unique<BoundMipsDistanceFunction<ReferenceVectorStore<FloatType>, true>>(lhs, *_sq_norm_store);
+ } else {
+ return std::make_unique<BoundMipsDistanceFunction<TemporaryVectorStore<FloatType>, true>>(lhs, *_sq_norm_store);
+ }
};
template class MipsDistanceFunctionFactory<Int8Float>;
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
index 67a6eb58de0..7b82661179f 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
@@ -55,15 +55,23 @@ public:
* problem. When inserting vectors, an extra dimension is
* added ensuring behavior "as if" all vectors had length equal
* to the longest vector inserted so far, or at least length 1.
+ *
+ * When reference_insertion_vector == true:
+ * - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ * - The TypedCells memory is just referenced and used directly in calculations,
+ * and thus no transformation via a temporary memory buffer occurs.
*/
-template<typename FloatType>
+template <typename FloatType>
class MipsDistanceFunctionFactory : public MipsDistanceFunctionFactoryBase {
+private:
+ bool _reference_insertion_vector;
public:
- MipsDistanceFunctionFactory() noexcept = default;
+ MipsDistanceFunctionFactory() noexcept : MipsDistanceFunctionFactory(false) {}
+ MipsDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
~MipsDistanceFunctionFactory() override = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index 8462ff05eca..c2bbd17ce63 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -114,6 +114,12 @@ public:
double distance_threshold) const = 0;
virtual DistanceFunctionFactory &distance_function_factory() const = 0;
+
+ /*
+ * Used when checking consistency during load.
+ * Called from writer only.
+ */
+ virtual uint32_t check_consistency(uint32_t docid_limit) const noexcept = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
index 4bc90001227..7c7a660f7ec 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
@@ -4,16 +4,18 @@
#include "temporary_vector_store.h"
#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-using vespalib::typify_invoke;
+using vespalib::eval::Int8Float;
using vespalib::eval::TypifyCellType;
+using vespalib::typify_invoke;
namespace search::tensor {
-template<typename FloatType>
+template <typename VectorStoreType>
class BoundPrenormalizedAngularDistance final : public BoundDistanceFunction {
private:
+ using FloatType = VectorStoreType::FloatType;
const vespalib::hwaccelrated::IAccelrated & _computer;
- mutable TemporaryVectorStore<FloatType> _tmpSpace;
+ mutable VectorStoreType _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs;
double _lhs_norm_sq;
public:
@@ -23,7 +25,7 @@ public:
_lhs(_tmpSpace.storeLhs(lhs))
{
auto a = _lhs.data();
- _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
+ _lhs_norm_sq = _computer.dotProduct(cast(a), cast(a), lhs.size);
if (_lhs_norm_sq <= 0.0) {
_lhs_norm_sq = 1.0;
}
@@ -32,7 +34,7 @@ public:
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
auto a = _lhs.data();
auto b = rhs_vector.data();
- double dot_product = _computer.dotProduct(a, b, _lhs.size());
+ double dot_product = _computer.dotProduct(cast(a), cast(b), _lhs.size());
double distance = _lhs_norm_sq - dot_product;
return distance;
}
@@ -45,7 +47,7 @@ public:
double to_rawscore(double distance) const noexcept override {
double dot_product = _lhs_norm_sq - distance;
double cosine_similarity = dot_product / _lhs_norm_sq;
- // should be in in range [-1,1] but roundoff may cause problems:
+ // should be in range [-1,1] but roundoff may cause problems:
cosine_similarity = std::min(1.0, cosine_similarity);
cosine_similarity = std::max(-1.0, cosine_similarity);
double cosine_distance = 1.0 - cosine_similarity; // in range [0,2]
@@ -57,24 +59,34 @@ public:
}
};
-template class BoundPrenormalizedAngularDistance<float>;
-template class BoundPrenormalizedAngularDistance<double>;
+template class BoundPrenormalizedAngularDistance<TemporaryVectorStore<float>>;
+template class BoundPrenormalizedAngularDistance<TemporaryVectorStore<double>>;
+template class BoundPrenormalizedAngularDistance<TemporaryVectorStore<Int8Float>>;
+template class BoundPrenormalizedAngularDistance<ReferenceVectorStore<float>>;
+template class BoundPrenormalizedAngularDistance<ReferenceVectorStore<double>>;
+template class BoundPrenormalizedAngularDistance<ReferenceVectorStore<Int8Float>>;
template <typename FloatType>
BoundDistanceFunction::UP
-PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
- using DFT = BoundPrenormalizedAngularDistance<FloatType>;
+PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
+ using DFT = BoundPrenormalizedAngularDistance<TemporaryVectorStore<FloatType>>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
- using DFT = BoundPrenormalizedAngularDistance<FloatType>;
- return std::make_unique<DFT>(lhs);
+PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
+ if (_reference_insertion_vector) {
+ using DFT = BoundPrenormalizedAngularDistance<ReferenceVectorStore<FloatType>>;
+ return std::make_unique<DFT>(lhs);
+ } else {
+ using DFT = BoundPrenormalizedAngularDistance<TemporaryVectorStore<FloatType>>;
+ return std::make_unique<DFT>(lhs);
+ }
}
template class PrenormalizedAngularDistanceFunctionFactory<float>;
template class PrenormalizedAngularDistanceFunctionFactory<double>;
+template class PrenormalizedAngularDistanceFunctionFactory<Int8Float>;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
index 7e3a8c2c676..639138df574 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
@@ -9,13 +9,21 @@ namespace search::tensor {
/**
* Calculates inner-product "distance" between vectors assuming a common norm.
* Should give same ordering as Angular distance, but is less expensive.
+ *
+ * When reference_insertion_vector == true:
+ * - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ * - The TypedCells memory is just referenced and used directly in calculations,
+ * and thus no transformation via a temporary memory buffer occurs.
*/
template <typename FloatType>
class PrenormalizedAngularDistanceFunctionFactory : public DistanceFunctionFactory {
+private:
+ bool _reference_insertion_vector;
public:
- PrenormalizedAngularDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ PrenormalizedAngularDistanceFunctionFactory() noexcept : PrenormalizedAngularDistanceFunctionFactory(false) {}
+ PrenormalizedAngularDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
index 4753e9d7c87..097ea67cc9e 100644
--- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
@@ -1,11 +1,13 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "temporary_vector_store.h"
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
using vespalib::ConstArrayRef;
using vespalib::ArrayRef;
using vespalib::eval::CellType;
using vespalib::eval::TypedCells;
+using vespalib::hwaccelrated::IAccelrated;
namespace search::tensor {
@@ -13,18 +15,29 @@ namespace {
template<typename FromType, typename ToType>
ConstArrayRef<ToType>
+convert_cells(ArrayRef<ToType> space, TypedCells cells) noexcept __attribute__((noinline));
+
+template<typename FromType, typename ToType>
+ConstArrayRef<ToType>
convert_cells(ArrayRef<ToType> space, TypedCells cells) noexcept
{
- assert(cells.size == space.size());
- auto old_cells = cells.typify<FromType>();
+ auto old_cells = cells.unsafe_typify<FromType>();
ToType *p = space.data();
for (FromType value : old_cells) {
- ToType conv(value);
- *p++ = conv;
+ *p++ = static_cast<ToType>(value);
}
return space;
}
+template<>
+ConstArrayRef<float>
+convert_cells<vespalib::BFloat16, float>(ArrayRef<float> space, TypedCells cells) noexcept
+{
+ static const IAccelrated & accelerator = IAccelrated::getAccelerator();
+ accelerator.convert_bfloat16_to_float(reinterpret_cast<const uint16_t *>(cells.data), space.data(), space.size());
+ return space;
+}
+
template <typename ToType>
struct ConvertCellsSelector
{
diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h
index 3dc237c85a4..d6702d8278a 100644
--- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h
@@ -6,9 +6,13 @@
namespace search::tensor {
-/** helper class - temporary storage of possibly-converted vector cells */
-template <typename FloatType>
+/**
+ * Helper class containing temporary memory storage for possibly converted vector cells.
+ */
+template <typename FloatTypeT>
class TemporaryVectorStore {
+public:
+ using FloatType = FloatTypeT;
private:
using TypedCells = vespalib::eval::TypedCells;
std::vector<FloatType> _tmpSpace;
@@ -27,4 +31,24 @@ public:
}
};
+/**
+ * Helper class used when TypedCells vector memory is just referenced,
+ * and used directly in calculations without any transforms.
+ */
+template <typename FloatTypeT>
+class ReferenceVectorStore {
+public:
+ using FloatType = FloatTypeT;
+private:
+ using TypedCells = vespalib::eval::TypedCells;
+public:
+ explicit ReferenceVectorStore(size_t vector_size) noexcept { (void) vector_size; }
+ vespalib::ConstArrayRef<FloatType> storeLhs(TypedCells cells) noexcept {
+ return cells.unsafe_typify<FloatType>();
+ }
+ vespalib::ConstArrayRef<FloatType> convertRhs(TypedCells cells) noexcept {
+ return cells.unsafe_typify<FloatType>();
+ }
+};
+
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
index a5d670096ab..9f551166a1d 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
@@ -418,18 +418,25 @@ TensorAttribute::complete_set_tensor(DocId docid, const vespalib::eval::Value& t
std::unique_ptr<PrepareResult> prepare_result)
{
if (_index && !prepare_result) {
- // The tensor cells are unchanged
- if (!_is_dense) {
- // but labels might have changed.
- EntryRef ref = _tensorStore.store_tensor(tensor);
- assert(ref.valid());
- setTensorRef(docid, ref);
+ VectorBundle vectors(tensor.cells().data, tensor.index().size(), _subspace_type);
+ if (tensor_cells_are_unchanged(docid, vectors)) {
+ // The tensor cells are unchanged
+ if (!_is_dense) {
+ // but labels might have changed.
+ EntryRef ref = _tensorStore.store_tensor(tensor);
+ assert(ref.valid());
+ setTensorRef(docid, ref);
+ }
+ return;
}
- return;
}
internal_set_tensor(docid, tensor);
if (_index) {
- _index->complete_add_document(docid, std::move(prepare_result));
+ if (prepare_result) {
+ _index->complete_add_document(docid, std::move(prepare_result));
+ } else {
+ _index->add_document(docid);
+ }
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
index 28c4099c38b..223c9d7d1f2 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
@@ -322,6 +322,9 @@ TensorAttributeLoader::on_load(vespalib::Executor* executor)
if (!load_index()) {
return false;
}
+ if (dense_store == nullptr) {
+ check_consistency(docid_limit);
+ }
} else {
build_index(executor, docid_limit);
}
@@ -329,4 +332,15 @@ TensorAttributeLoader::on_load(vespalib::Executor* executor)
return true;
}
+void
+TensorAttributeLoader::check_consistency(uint32_t docid_limit)
+{
+ auto before = vespalib::steady_clock::now();
+ uint32_t inconsistencies = _index->check_consistency(docid_limit);
+ auto after = vespalib::steady_clock::now();
+ double elapsed = vespalib::to_s(after - before);
+ LOG(info, "%u inconsistencies detected after loading index for attribute %s, (check used %6.3fs)",
+ inconsistencies, _attr.getName().c_str(), elapsed);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
index 6bf68957adc..59baaf0b6dc 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
@@ -34,6 +34,7 @@ class TensorAttributeLoader {
void load_tensor_store(search::attribute::BlobSequenceReader& reader, uint32_t docid_limit);
void build_index(vespalib::Executor* executor, uint32_t docid_limit);
bool load_index();
+ void check_consistency(uint32_t docid_limit);
public:
TensorAttributeLoader(TensorAttribute& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index);
diff --git a/searchlib/src/vespa/searchlib/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
index 83e185dbfb6..4685ad07808 100644
--- a/searchlib/src/vespa/searchlib/test/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
@@ -14,6 +14,7 @@ vespa_add_library(searchlib_test
schema_builder.cpp
string_field_builder.cpp
vector_buffer_writer.cpp
+ weightedchildrenverifiers.cpp
$<TARGET_OBJECTS:searchlib_test_fakedata>
$<TARGET_OBJECTS:searchlib_searchlib_test_diskindex>
$<TARGET_OBJECTS:searchlib_test_gtest_migration>
diff --git a/searchlib/src/vespa/searchlib/test/mock_attribute_context.cpp b/searchlib/src/vespa/searchlib/test/mock_attribute_context.cpp
index 75e85d9d828..1003160249d 100644
--- a/searchlib/src/vespa/searchlib/test/mock_attribute_context.cpp
+++ b/searchlib/src/vespa/searchlib/test/mock_attribute_context.cpp
@@ -28,7 +28,7 @@ MockAttributeContext::getAttributeList(std::vector<const IAttributeVector *> & l
MockAttributeContext::~MockAttributeContext() = default;
void
-MockAttributeContext::add(std::shared_ptr<IAttributeVector> attr) {
+MockAttributeContext::add(std::shared_ptr<const IAttributeVector> attr) {
_vectors[attr->getName()] = attr;
}
diff --git a/searchlib/src/vespa/searchlib/test/mock_attribute_context.h b/searchlib/src/vespa/searchlib/test/mock_attribute_context.h
index 5b522e907b1..165a37994b9 100644
--- a/searchlib/src/vespa/searchlib/test/mock_attribute_context.h
+++ b/searchlib/src/vespa/searchlib/test/mock_attribute_context.h
@@ -11,12 +11,12 @@ namespace search::attribute::test {
class MockAttributeContext : public IAttributeContext
{
private:
- using Map = std::map<string, std::shared_ptr<IAttributeVector>>;
+ using Map = std::map<string, std::shared_ptr<const IAttributeVector>>;
Map _vectors;
public:
~MockAttributeContext() override;
- void add(std::shared_ptr<IAttributeVector> attr);
+ void add(std::shared_ptr<const IAttributeVector> attr);
const IAttributeVector *get(const string &name) const;
const IAttributeVector * getAttribute(const string &name) const override;
diff --git a/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.cpp b/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.cpp
new file mode 100644
index 00000000000..b22dd1a3aa9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.cpp
@@ -0,0 +1,71 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "weightedchildrenverifiers.h"
+
+using search::queryeval::SearchIterator;
+
+namespace search::test {
+
+WeightedChildrenVerifier::WeightedChildrenVerifier()
+ : _weights(_num_children, 1)
+{ }
+WeightedChildrenVerifier::~WeightedChildrenVerifier() = default;
+
+
+IteratorChildrenVerifier::IteratorChildrenVerifier()
+ : WeightedChildrenVerifier(),
+ _split_lists(_num_children)
+{
+ auto full_list = getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ _split_lists[i % _num_children].push_back(full_list[i]);
+ }
+}
+IteratorChildrenVerifier::~IteratorChildrenVerifier() = default;
+
+SearchIterator::UP
+IteratorChildrenVerifier::create(bool strict) const {
+ (void) strict;
+ std::vector<SearchIterator*> children;
+ for (size_t i = 0; i < _num_children; ++i) {
+ children.push_back(createIterator(_split_lists[i], true).release());
+ }
+ return create(children);
+}
+
+SearchIterator::UP
+IteratorChildrenVerifier::create(const std::vector<SearchIterator*> &children) const {
+ (void) children;
+ return {};
+}
+
+
+DwwIteratorChildrenVerifier::DwwIteratorChildrenVerifier()
+ : WeightedChildrenVerifier(),
+ _helper()
+{
+ _helper.add_docs(getDocIdLimit());
+ auto full_list = getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ _helper.set_doc(full_list[i], i % _num_children, 1);
+ }
+}
+DwwIteratorChildrenVerifier::~DwwIteratorChildrenVerifier() = default;
+
+SearchIterator::UP
+DwwIteratorChildrenVerifier::create(bool strict) const {
+ (void) strict;
+ std::vector<DocidWithWeightIterator> children;
+ for (size_t i = 0; i < _num_children; ++i) {
+ auto dict_entry = _helper.dww().lookup(vespalib::make_string("%zu", i).c_str(), _helper.dww().get_dictionary_snapshot());
+ _helper.dww().create(dict_entry.posting_idx, children);
+ }
+ return create(std::move(children));
+}
+SearchIterator::UP
+DwwIteratorChildrenVerifier::create(std::vector<DocidWithWeightIterator> &&) const {
+ return {};
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h b/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h
index 86d2fb9aa67..037d1086950 100644
--- a/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h
+++ b/searchlib/src/vespa/searchlib/test/weightedchildrenverifiers.h
@@ -8,11 +8,8 @@ namespace search::test {
class WeightedChildrenVerifier : public SearchIteratorVerifier {
public:
- WeightedChildrenVerifier()
- : _weights(_num_children, 1)
- { }
- ~WeightedChildrenVerifier() override {}
-
+ WeightedChildrenVerifier();
+ ~WeightedChildrenVerifier() override;
protected:
static constexpr size_t _num_children = 7;
mutable fef::TermFieldMatchData _tfmd;
@@ -21,58 +18,21 @@ protected:
class IteratorChildrenVerifier : public WeightedChildrenVerifier {
public:
- IteratorChildrenVerifier()
- : WeightedChildrenVerifier(),
- _split_lists(_num_children)
- {
- auto full_list = getExpectedDocIds();
- for (size_t i = 0; i < full_list.size(); ++i) {
- _split_lists[i % _num_children].push_back(full_list[i]);
- }
- }
- ~IteratorChildrenVerifier() override { }
- SearchIterator::UP create(bool strict) const override {
- (void) strict;
- std::vector<SearchIterator*> children;
- for (size_t i = 0; i < _num_children; ++i) {
- children.push_back(createIterator(_split_lists[i], true).release());
- }
- return create(children);
- }
+ IteratorChildrenVerifier();
+ ~IteratorChildrenVerifier() override;
+ SearchIterator::UP create(bool strict) const override;
protected:
- virtual SearchIterator::UP create(const std::vector<SearchIterator*> &children) const {
- (void) children;
- return SearchIterator::UP();
- }
+ virtual SearchIterator::UP create(const std::vector<SearchIterator*> &children) const;
std::vector<DocIds> _split_lists;
};
class DwwIteratorChildrenVerifier : public WeightedChildrenVerifier {
public:
- DwwIteratorChildrenVerifier() :
- WeightedChildrenVerifier(),
- _helper()
- {
- _helper.add_docs(getDocIdLimit());
- auto full_list = getExpectedDocIds();
- for (size_t i = 0; i < full_list.size(); ++i) {
- _helper.set_doc(full_list[i], i % _num_children, 1);
- }
- }
- ~DwwIteratorChildrenVerifier() override {}
- SearchIterator::UP create(bool strict) const override {
- (void) strict;
- std::vector<DocidWithWeightIterator> children;
- for (size_t i = 0; i < _num_children; ++i) {
- auto dict_entry = _helper.dww().lookup(vespalib::make_string("%zu", i).c_str(), _helper.dww().get_dictionary_snapshot());
- _helper.dww().create(dict_entry.posting_idx, children);
- }
- return create(std::move(children));
- }
+ DwwIteratorChildrenVerifier();
+ ~DwwIteratorChildrenVerifier() override;
+ SearchIterator::UP create(bool strict) const override;
protected:
- virtual SearchIterator::UP create(std::vector<DocidWithWeightIterator> &&) const {
- return {};
- }
+ virtual SearchIterator::UP create(std::vector<DocidWithWeightIterator> &&) const;
DocumentWeightAttributeHelper _helper;
};
diff --git a/searchlib/src/vespa/searchlib/util/token_extractor.cpp b/searchlib/src/vespa/searchlib/util/token_extractor.cpp
index a78f30afe21..6e1573c4551 100644
--- a/searchlib/src/vespa/searchlib/util/token_extractor.cpp
+++ b/searchlib/src/vespa/searchlib/util/token_extractor.cpp
@@ -143,8 +143,6 @@ TokenExtractor::extract(std::vector<SpanTerm>& terms, const document::StringFiel
{
auto tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
if (tree == nullptr) {
- /* field might not be annotated if match type is exact */
- consider_word(terms, text, Span(0, text.size()), nullptr, doc);
return;
}
for (const Annotation & annotation : *tree) {