aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/queryeval/wand
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/vespa/searchlib/queryeval/wand')
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp44
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h32
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h44
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h72
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h33
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp66
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h15
9 files changed, 213 insertions, 138 deletions
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
index 4c55496822b..48bef125ec3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
@@ -1,42 +1,23 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "parallel_weak_and_blueprint.h"
-#include "wand_parts.h"
#include "parallel_weak_and_search.h"
#include <vespa/searchlib/queryeval/field_spec.hpp>
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/queryeval/flow_tuning.h>
-#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <algorithm>
namespace search::queryeval {
-ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor)
+ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field, uint32_t scoresToTrack,
+ score_t scoreThreshold, double thresholdBoostFactor,
+ bool thread_safe)
: ComplexLeafBlueprint(field),
- _scores(scoresToTrack),
+ _scores(WeakAndPriorityQueue::createHeap(scoresToTrack, thread_safe)),
_scoreThreshold(scoreThreshold),
_thresholdBoostFactor(thresholdBoostFactor),
- _scoresAdjustFrequency(DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
- _layout(),
- _weights(),
- _terms()
-{
-}
-
-ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor,
- uint32_t scoresAdjustFrequency)
- : ComplexLeafBlueprint(field),
- _scores(scoresToTrack),
- _scoreThreshold(scoreThreshold),
- _thresholdBoostFactor(thresholdBoostFactor),
- _scoresAdjustFrequency(scoresAdjustFrequency),
+ _scoresAdjustFrequency(wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
_layout(),
_weights(),
_terms()
@@ -84,7 +65,7 @@ ParallelWeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const
term->update_flow_stats(docid_limit);
}
double child_est = OrFlow::estimate_of(_terms);
- double my_est = abs_to_rel_est(_scores.getScoresToTrack(), docid_limit);
+ double my_est = abs_to_rel_est(_scores->getScoresToTrack(), docid_limit);
double est = (child_est + my_est) / 2.0;
return {est, OrFlow::cost_of(_terms, false),
OrFlow::cost_of(_terms, true) + flow::heap_cost(est, _terms.size())};
@@ -106,14 +87,11 @@ ParallelWeakAndBlueprint::createLeafSearch(const search::fef::TermFieldMatchData
childState.estimate().estHits,
childState.field(0).resolve(*childrenMatchData));
}
- return SearchIterator::UP
- (ParallelWeakAndSearch::create(terms,
- ParallelWeakAndSearch::MatchParams(_scores,
- _scoreThreshold,
- _thresholdBoostFactor,
- _scoresAdjustFrequency).setDocIdLimit(get_docid_limit()),
- ParallelWeakAndSearch::RankParams(*tfmda[0],
- std::move(childrenMatchData)), strict()));
+ return ParallelWeakAndSearch::create(terms,
+ ParallelWeakAndSearch::MatchParams(*_scores, _scoreThreshold, _thresholdBoostFactor,
+ _scoresAdjustFrequency, get_docid_limit()),
+ ParallelWeakAndSearch::RankParams(*tfmda[0],std::move(childrenMatchData)),
+ strict());
}
std::unique_ptr<SearchIterator>
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
index 4a55bf14095..c34d366120e 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
@@ -11,8 +11,6 @@
namespace search::queryeval {
-const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4;
-
/**
* Blueprint for the parallel weak and search operator.
*/
@@ -21,32 +19,24 @@ class ParallelWeakAndBlueprint : public ComplexLeafBlueprint
private:
using score_t = wand::score_t;
- mutable SharedWeakAndPriorityQueue _scores;
- const wand::score_t _scoreThreshold;
- double _thresholdBoostFactor;
- const uint32_t _scoresAdjustFrequency;
- fef::MatchDataLayout _layout;
- std::vector<int32_t> _weights;
- std::vector<Blueprint::UP> _terms;
+ std::unique_ptr<WeakAndPriorityQueue> _scores;
+ const wand::score_t _scoreThreshold;
+ double _thresholdBoostFactor;
+ const uint32_t _scoresAdjustFrequency;
+ fef::MatchDataLayout _layout;
+ std::vector<int32_t> _weights;
+ std::vector<Blueprint::UP> _terms;
public:
ParallelWeakAndBlueprint(const ParallelWeakAndBlueprint &) = delete;
ParallelWeakAndBlueprint &operator=(const ParallelWeakAndBlueprint &) = delete;
- ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor);
- ParallelWeakAndBlueprint(FieldSpecBase field,
- uint32_t scoresToTrack,
- score_t scoreThreshold,
- double thresholdBoostFactor,
- uint32_t scoresAdjustFrequency);
+ ParallelWeakAndBlueprint(FieldSpecBase field, uint32_t scoresToTrack,
+ score_t scoreThreshold, double thresholdBoostFactor,
+ bool thread_safe);
~ParallelWeakAndBlueprint() override;
- const WeakAndHeap &getScores() const { return _scores; }
-
+ const WeakAndHeap &getScores() const { return *_scores; }
score_t getScoreThreshold() const { return _scoreThreshold; }
-
double getThresholdBoostFactor() const { return _thresholdBoostFactor; }
// Used by create visitor
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
index 9e887b9d0f7..78d97e8efa0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
@@ -77,6 +77,7 @@ public:
_matchParams(matchParams),
_localScores()
{
+ _localScores.reserve(_matchParams.scoresAdjustFrequency);
}
size_t get_num_terms() const override { return _terms.size(); }
int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); }
@@ -199,17 +200,17 @@ namespace {
template <typename VectorizedTerms, typename FutureHeap, typename PastHeap>
SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams &params, bool strict) {
if (strict) {
- return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, true>>(tfmd, std::move(terms), params);
+ return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, true>>(tfmd, std::forward<VectorizedTerms>(terms), params);
} else {
- return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, false>>(tfmd, std::move(terms), params);
+ return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, false>>(tfmd, std::forward<VectorizedTerms>(terms), params);
}
}
template <typename VectorizedTerms>
SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams &params, bool strict, bool use_array) {
return (use_array)
- ? create_helper<VectorizedTerms, vespalib::LeftArrayHeap, vespalib::RightArrayHeap>(tfmd, std::move(terms), params, strict)
- : create_helper<VectorizedTerms, vespalib::LeftHeap, vespalib::RightHeap>(tfmd, std::move(terms), params, strict);
+ ? create_helper<VectorizedTerms, vespalib::LeftArrayHeap, vespalib::RightArrayHeap>(tfmd, std::forward<VectorizedTerms>(terms), params, strict)
+ : create_helper<VectorizedTerms, vespalib::LeftHeap, vespalib::RightHeap>(tfmd, std::forward<VectorizedTerms>(terms), params, strict);
}
} // namespace search::queryeval::<unnamed>
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
index bd173ab41eb..70520e267e6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
@@ -20,27 +20,25 @@ struct ParallelWeakAndSearch : public SearchIterator
/**
* Params used to tweak the behavior of the WAND algorithm.
*/
- struct MatchParams
+ struct MatchParams : wand::MatchParams
{
- WeakAndHeap &scores;
- score_t scoreThreshold;
- double thresholdBoostFactor;
- uint32_t scoresAdjustFrequency;
- docid_t docIdLimit;
- MatchParams(WeakAndHeap &scores_,
- score_t scoreThreshold_,
- double thresholdBoostFactor_,
- uint32_t scoresAdjustFrequency_)
- : scores(scores_),
- scoreThreshold(scoreThreshold_),
- thresholdBoostFactor(thresholdBoostFactor_),
- scoresAdjustFrequency(scoresAdjustFrequency_),
- docIdLimit(0)
+ const double thresholdBoostFactor;
+ const docid_t docIdLimit;
+ MatchParams(WeakAndHeap &scores_in,
+ score_t scoreThreshold_in,
+ double thresholdBoostFactor_in,
+ uint32_t scoresAdjustFrequency_in,
+ uint32_t docIdLimit_in) noexcept
+ : wand::MatchParams(scores_in, scoreThreshold_in, scoresAdjustFrequency_in),
+ thresholdBoostFactor(thresholdBoostFactor_in),
+ docIdLimit(docIdLimit_in)
+ {}
+ MatchParams(WeakAndHeap &scores_in,
+ score_t scoreThreshold_in,
+ double thresholdBoostFactor_in,
+ uint32_t scoresAdjustFrequency_in) noexcept
+ : MatchParams(scores_in, scoreThreshold_in, thresholdBoostFactor_in, scoresAdjustFrequency_in, 0)
{}
- MatchParams &setDocIdLimit(docid_t value) {
- docIdLimit = value;
- return *this;
- }
};
/**
@@ -51,7 +49,7 @@ struct ParallelWeakAndSearch : public SearchIterator
fef::TermFieldMatchData &rootMatchData;
fef::MatchData::UP childrenMatchData;
RankParams(fef::TermFieldMatchData &rootMatchData_,
- fef::MatchData::UP &&childrenMatchData_)
+ fef::MatchData::UP &&childrenMatchData_) noexcept
: rootMatchData(rootMatchData_),
childrenMatchData(std::move(childrenMatchData_))
{}
@@ -68,12 +66,10 @@ struct ParallelWeakAndSearch : public SearchIterator
static SearchIterator::UP createHeapWand(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict);
static SearchIterator::UP create(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict);
- static SearchIterator::UP create(fef::TermFieldMatchData &tmd,
- const MatchParams &matchParams,
+ static SearchIterator::UP create(fef::TermFieldMatchData &tmd, const MatchParams &matchParams,
const std::vector<int32_t> &weights,
const std::vector<IDirectPostingStore::LookupResult> &dict_entries,
- const IDocidWithWeightPostingStore &attr,
- bool strict);
+ const IDocidWithWeightPostingStore &attr, bool strict);
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
index 4e781f8497b..9496090cca3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
@@ -2,10 +2,9 @@
#pragma once
-#include <algorithm>
-#include <cmath>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/features/bm25_feature.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/queryeval/iterator_pack.h>
#include <vespa/searchlib/attribute/posting_iterator_pack.h>
@@ -13,23 +12,40 @@
#include <vespa/vespalib/util/priority_queue.h>
#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <cmath>
+namespace search::queryeval { class WeakAndHeap; }
namespace search::queryeval::wand {
//-----------------------------------------------------------------------------
-struct Term;
-using Terms = std::vector<Term>;
using score_t = int64_t;
using docid_t = uint32_t;
using ref_t = uint16_t;
-using Attr = IDirectPostingStore;
-using AttrDictEntry = Attr::LookupResult;
+const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4;
//-----------------------------------------------------------------------------
/**
+ * Params used to tweak the behavior of the WAND algorithm.
+ */
+struct MatchParams
+{
+ WeakAndHeap &scores;
+ score_t scoreThreshold;
+ const uint32_t scoresAdjustFrequency;
+ MatchParams(WeakAndHeap &scores_in) noexcept
+ : MatchParams(scores_in, 1, DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY)
+ {}
+ MatchParams(WeakAndHeap &scores_in, score_t scoreThreshold_in, uint32_t scoresAdjustFrequency_in) noexcept
+ : scores(scores_in),
+ scoreThreshold(scoreThreshold_in),
+ scoresAdjustFrequency(scoresAdjustFrequency_in)
+ {}
+};
+
+/**
* Wrapper used to specify underlying terms during setup
**/
struct Term {
@@ -46,7 +62,7 @@ struct Term {
Term(SearchIterator *s, int32_t w, uint32_t e) noexcept : Term(s, w, e, nullptr) {}
Term(SearchIterator::UP s, int32_t w, uint32_t e) noexcept : Term(s.release(), w, e, nullptr) {}
};
-
+using Terms = std::vector<Term>;
//-----------------------------------------------------------------------------
// input manipulation utilities
@@ -75,7 +91,7 @@ auto assemble(const F &f, const Order &order)->std::vector<decltype(f(0))> {
}
int32_t get_max_weight(const SearchIterator &search) {
- const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo());
+ const auto *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo());
return (minMax != nullptr) ? minMax->getMaxWeight() : std::numeric_limits<int32_t>::max();
}
@@ -291,7 +307,7 @@ struct VectorizedAttributeTerms : VectorizedState<DocidWithWeightIteratorPack> {
**/
struct DocIdOrder {
const docid_t *termPos;
- explicit DocIdOrder(docid_t *pos) noexcept : termPos(pos) {}
+ explicit DocIdOrder(const docid_t *pos) noexcept : termPos(pos) {}
bool at_end(ref_t ref) const noexcept { return termPos[ref] == search::endDocId; }
docid_t get_pos(ref_t ref) const noexcept { return termPos[ref]; }
bool operator()(ref_t a, ref_t b) const noexcept {
@@ -389,7 +405,7 @@ DualHeap<FutureHeap, PastHeap>::stringify() const {
}
//-----------------------------------------------------------------------------
-#define TermFrequencyScorer_TERM_SCORE_FACTOR 1000000.0
+constexpr double TermFrequencyScorer_TERM_SCORE_FACTOR = 1000000.0;
/**
* Scorer used with WeakAndAlgorithm that calculates a pseudo term frequency
@@ -412,6 +428,38 @@ struct TermFrequencyScorer
}
};
+class Bm25TermFrequencyScorer
+{
+public:
+ using Bm25Executor = features::Bm25Executor;
+ Bm25TermFrequencyScorer(uint32_t num_docs, float range) noexcept
+ : _num_docs(num_docs),
+ _range(range),
+ _max_idf(Bm25Executor::calculate_inverse_document_frequency(1, _num_docs))
+ { }
+ double apply_range(double idf) const noexcept {
+ return (1.0 - _range)*_max_idf + _range * idf;
+ }
+ // weight * scaled_bm25_idf, scaled to fixedpoint
+ score_t calculateMaxScore(double estHits, double weight) const noexcept {
+ return score_t(TermFrequencyScorer_TERM_SCORE_FACTOR * weight *
+ apply_range(Bm25Executor::calculate_inverse_document_frequency(estHits, _num_docs)));
+ }
+
+ score_t calculateMaxScore(const Term &term) const noexcept {
+ return calculateMaxScore(term.estHits, term.weight) + 1;
+ }
+
+ template <typename Input>
+ score_t calculate_max_score(const Input &input, ref_t ref) const noexcept {
+ return calculateMaxScore(input.get_est_hits(ref), input.get_weight(ref)) + 1;
+ }
+private:
+ uint32_t _num_docs;
+ float _range;
+ double _max_idf;
+};
+
//-----------------------------------------------------------------------------
/**
@@ -453,14 +501,14 @@ struct DotProductScorer
// used with parallel wand where we can safely discard hits based on score
struct GreaterThan {
score_t threshold;
- GreaterThan(score_t t) : threshold(t) {}
+ explicit GreaterThan(score_t t) noexcept : threshold(t) {}
bool operator()(score_t score) const { return (score > threshold); }
};
// used with old-style vespa wand to ensure at least AND'ish results
struct GreaterThanEqual {
score_t threshold;
- GreaterThanEqual(score_t t) : threshold(t) {}
+ explicit GreaterThanEqual(score_t t) noexcept : threshold(t) {}
bool operator()(score_t score) const { return (score >= threshold); }
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
index d4b92fd67e6..53ebb33e1ea 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
@@ -4,23 +4,28 @@
namespace search::queryeval {
-SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack) :
+WeakAndPriorityQueue::WeakAndPriorityQueue(uint32_t scoresToTrack) :
WeakAndHeap(scoresToTrack),
- _bestScores(),
- _lock()
-{
- _bestScores.reserve(scoresToTrack);
-}
+ _bestScores()
+{ }
-SharedWeakAndPriorityQueue::~SharedWeakAndPriorityQueue() = default;
+WeakAndPriorityQueue::~WeakAndPriorityQueue() = default;
+
+std::unique_ptr<WeakAndPriorityQueue>
+WeakAndPriorityQueue::createHeap(uint32_t scoresToTrack, bool thread_safe) {
+ if (thread_safe) {
+ return std::make_unique<queryeval::SharedWeakAndPriorityQueue>(scoresToTrack);
+ }
+ return std::make_unique<WeakAndPriorityQueue>(scoresToTrack);
+}
void
-SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
+WeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
{
if (getScoresToTrack() == 0) {
return;
}
- std::lock_guard guard(_lock);
+
for (score_t *itr = begin; itr != end; ++itr) {
score_t score = *itr;
if (!is_full()) {
@@ -35,4 +40,17 @@ SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
}
}
+SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack)
+ : WeakAndPriorityQueue(scoresToTrack),
+ _lock()
+{ }
+
+SharedWeakAndPriorityQueue::~SharedWeakAndPriorityQueue() = default;
+
+void
+SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end) {
+ std::lock_guard guard(_lock);
+ WeakAndPriorityQueue::adjust(begin, end);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
index f1c90f5e6ac..db3ddbc39d3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
@@ -17,13 +17,13 @@ namespace search::queryeval {
class WeakAndHeap {
public:
using score_t = wand::score_t;
- WeakAndHeap(uint32_t scoresToTrack) :
+ explicit WeakAndHeap(uint32_t scoresToTrack) noexcept :
_minScore((scoresToTrack == 0)
? std::numeric_limits<score_t>::max()
: 0),
_scoresToTrack(scoresToTrack)
{ }
- virtual ~WeakAndHeap() {}
+ virtual ~WeakAndHeap() = default;
/**
* Consider the given scores for insertion into the underlying structure.
* The implementation may change the given score array to speed up execution.
@@ -33,11 +33,13 @@ public:
/**
* The number of scores this heap is tracking.
**/
- uint32_t getScoresToTrack() const { return _scoresToTrack; }
+ uint32_t getScoresToTrack() const noexcept { return _scoresToTrack; }
- score_t getMinScore() const { return _minScore.load(std::memory_order_relaxed); }
+ score_t getMinScore() const noexcept { return _minScore.load(std::memory_order_relaxed); }
protected:
- void setMinScore(score_t minScore) { _minScore.store(minScore, std::memory_order_relaxed); }
+ void setMinScore(score_t minScore) noexcept {
+ _minScore.store(minScore, std::memory_order_relaxed);
+ }
private:
std::atomic<score_t> _minScore;
const uint32_t _scoresToTrack;
@@ -47,19 +49,28 @@ private:
* An implementation using an underlying priority queue to keep track of the N
* best hits that can be shared among multiple search iterators.
*/
-class SharedWeakAndPriorityQueue : public WeakAndHeap
+class WeakAndPriorityQueue : public WeakAndHeap
{
private:
using Scores = vespalib::PriorityQueue<score_t>;
Scores _bestScores;
- std::mutex _lock;
- bool is_full() const { return (_bestScores.size() >= getScoresToTrack()); }
+ bool is_full() const noexcept { return (_bestScores.size() >= getScoresToTrack()); }
+public:
+ explicit WeakAndPriorityQueue(uint32_t scoresToTrack);
+ ~WeakAndPriorityQueue() override;
+ Scores &getScores() noexcept { return _bestScores; }
+ void adjust(score_t *begin, score_t *end) override;
+ static std::unique_ptr<WeakAndPriorityQueue> createHeap(uint32_t scoresToTrack, bool thread_safe);
+};
+class SharedWeakAndPriorityQueue final : public WeakAndPriorityQueue
+{
+private:
+ std::mutex _lock;
public:
- SharedWeakAndPriorityQueue(uint32_t scoresToTrack);
- ~SharedWeakAndPriorityQueue();
- Scores &getScores() { return _bestScores; }
+ explicit SharedWeakAndPriorityQueue(uint32_t scoresToTrack);
+ ~SharedWeakAndPriorityQueue() override;
void adjust(score_t *begin, score_t *end) override;
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
index 04b1cb75da4..33dd3e46fe5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
@@ -1,7 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "weak_and_search.h"
-#include "wand_parts.h"
+#include "weak_and_heap.h"
#include <vespa/searchlib/queryeval/orsearch.h>
#include <vespa/vespalib/util/left_right_heap.h>
#include <vespa/vespalib/util/priority_queue.h>
@@ -20,7 +20,8 @@ private:
DualHeap<FutureHeap, PastHeap> _heaps;
Algorithm _algo;
score_t _threshold; // current score threshold
- Scores _scores; // best n scores
+ MatchParams _matchParams;
+ std::vector<score_t> _localScores;
const uint32_t _n;
void seek_strict(uint32_t docid) {
@@ -40,16 +41,24 @@ private:
}
}
}
+ void updateThreshold(score_t newThreshold) {
+ if (newThreshold > _threshold) {
+ _threshold = newThreshold;
+ }
+ }
public:
- WeakAndSearchLR(const Terms &terms, uint32_t n)
- : _terms(terms, TermFrequencyScorer(), 0, {}),
+ template<typename Scorer>
+ WeakAndSearchLR(const Terms &terms, const MatchParams & matchParams, const Scorer & scorer, uint32_t n)
+ : _terms(terms, scorer, 0, {}),
_heaps(DocIdOrder(_terms.docId()), _terms.size()),
_algo(),
- _threshold(1),
- _scores(),
+ _threshold(matchParams.scoreThreshold),
+ _matchParams(matchParams),
+ _localScores(),
_n(n)
{
+ _localScores.reserve(_matchParams.scoresAdjustFrequency);
}
size_t get_num_terms() const override { return _terms.size(); }
int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); }
@@ -57,6 +66,7 @@ public:
const Terms &getTerms() const override { return _terms.input_terms(); }
uint32_t getN() const override { return _n; }
void doSeek(uint32_t docid) override {
+ updateThreshold(_matchParams.scores.getMinScore());
if (IS_STRICT) {
seek_strict(docid);
} else {
@@ -65,12 +75,11 @@ public:
}
void doUnpack(uint32_t docid) override {
_algo.find_matching_terms(_terms, _heaps);
- _scores.push(_algo.get_upper_bound());
- if (_scores.size() > _n) {
- _scores.pop_front();
- }
- if (_scores.size() == _n) {
- _threshold = _scores.front();
+ score_t score = _algo.get_upper_bound();
+ _localScores.push_back(score);
+ if (_localScores.size() == _matchParams.scoresAdjustFrequency) {
+ _matchParams.scores.adjust(&_localScores[0], &_localScores[0] + _localScores.size());
+ _localScores.clear();
}
ref_t *end = _heaps.present_end();
for (ref_t *ref = _heaps.present_begin(); ref != end; ++ref) {
@@ -102,36 +111,51 @@ WeakAndSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
//-----------------------------------------------------------------------------
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::createArrayWand(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::createArrayWand(const Terms &terms, const MatchParams & params,
+ const Scorer & scorer, uint32_t n, bool strict)
{
if (strict) {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, params, scorer, n);
} else {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, params, scorer, n);
}
}
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::createHeapWand(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::createHeapWand(const Terms &terms, const MatchParams & params, const Scorer & scorer, uint32_t n, bool strict)
{
if (strict) {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, params, scorer, n);
} else {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, params, scorer, n);
}
}
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::create(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::create(const Terms &terms, const MatchParams & params, const Scorer & scorer, uint32_t n, bool strict)
{
if (terms.size() < 128) {
- return createArrayWand(terms, n, strict);
+ return createArrayWand(terms, params, scorer, n, strict);
} else {
- return createHeapWand(terms, n, strict);
+ return createHeapWand(terms, params, scorer, n, strict);
}
}
+SearchIterator::UP
+WeakAndSearch::create(const Terms &terms, const MatchParams & params, uint32_t n, bool strict)
+{
+ return create(terms, params, wand::TermFrequencyScorer(), n, strict);
+}
+
//-----------------------------------------------------------------------------
+template SearchIterator::UP WeakAndSearch::create<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::create<wand::Bm25TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::Bm25TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::createArrayWand<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::createHeapWand<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
index 6a56a04887c..30292af24ab 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
@@ -9,15 +9,24 @@ namespace search::queryeval {
struct WeakAndSearch : SearchIterator {
using Terms = wand::Terms;
+ using MatchParams = wand::MatchParams;
virtual size_t get_num_terms() const = 0;
virtual int32_t get_term_weight(size_t idx) const = 0;
virtual wand::score_t get_max_score(size_t idx) const = 0;
virtual const Terms &getTerms() const = 0;
virtual uint32_t getN() const = 0;
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
- static SearchIterator::UP createArrayWand(const Terms &terms, uint32_t n, bool strict);
- static SearchIterator::UP createHeapWand(const Terms &terms, uint32_t n, bool strict);
- static SearchIterator::UP create(const Terms &terms, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP createArrayWand(const Terms &terms, const MatchParams & matchParams,
+ const Scorer & scorer, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP createHeapWand(const Terms &terms, const MatchParams & matchParams,
+ const Scorer & scorer, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP create(const Terms &terms, const MatchParams & matchParams,
+ const Scorer & scorer, uint32_t n, bool strict);
+ static SearchIterator::UP create(const Terms &terms, const MatchParams & matchParams,
+ uint32_t n, bool strict);
};
}