aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore/src
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-05-27 11:39:15 +0200
committerTor Egge <Tor.Egge@online.no>2024-05-27 11:39:15 +0200
commit1496f7a7818069c16018620d732213dab1e78f9c (patch)
treecf79cef7872ed419740af09fd774d49e0d1f39a3 /searchcore/src
parent56796667a88c7976279a24803976b6744a208d62 (diff)
Integrate firstPhaseRank feature with match loop.
Diffstat (limited to 'searchcore/src')
-rw-r--r--searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp38
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp69
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h14
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_master.cpp6
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.h4
6 files changed, 114 insertions, 22 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
index d5ee88e1617..5994385b4aa 100644
--- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/test/insertion_operators.h>
#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/searchcore/proton/matching/match_loop_communicator.h>
+#include <vespa/searchlib/features/first_phase_rank_lookup.h>
#include <algorithm>
using namespace proton::matching;
@@ -12,6 +14,7 @@ using Hit = MatchLoopCommunicator::Hit;
using Hits = MatchLoopCommunicator::Hits;
using TaggedHit = MatchLoopCommunicator::TaggedHit;
using TaggedHits = MatchLoopCommunicator::TaggedHits;
+using search::features::FirstPhaseRankLookup;
using search::queryeval::SortedHitSequence;
std::vector<Hit> hit_vec(std::vector<Hit> list) { return list; }
@@ -96,7 +99,7 @@ TEST_F("require that selectBest gives appropriate results for single thread", Ma
}
TEST_F("require that selectBest gives appropriate results for single thread with filter",
- MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>()))
+ MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>(), nullptr))
{
TEST_DO(equal(1u, hit_vec({{1, 5}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}}), thread_id)));
TEST_DO(equal(2u, hit_vec({{1, 5}, {3, 3}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}}), thread_id)));
@@ -154,8 +157,8 @@ TEST_MT_F("require that rangeCover works with no hits", 10, MatchLoopCommunicato
TEST_FFF("require that hits dropped due to lack of diversity affects range cover result",
MatchLoopCommunicator(num_threads, 3),
- MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>()),
- MatchLoopCommunicator(num_threads, 3, std::make_unique<None>()))
+ MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>(), nullptr),
+ MatchLoopCommunicator(num_threads, 3, std::make_unique<None>(), nullptr))
{
auto hits_in = hit_vec({{1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}});
auto [my_work1, hits1, ranges1] = second_phase(f1, hits_in, thread_id, 10);
@@ -207,4 +210,33 @@ TEST_MT_F("require that second phase work is evenly distributed among search thr
}
}
+namespace {
+
+std::vector<double> extract_ranks(const FirstPhaseRankLookup& l) {
+ std::vector<double> result;
+ for (uint32_t docid = 21; docid < 26; ++docid) {
+ result.emplace_back(l.lookup(docid));
+ }
+ return result;
+}
+
+search::feature_t unranked = std::numeric_limits<search::feature_t>::max();
+
+using FeatureVec = std::vector<search::feature_t>;
+
+}
+
+TEST("require that first phase rank lookup is populated")
+{
+ FirstPhaseRankLookup l1;
+ FirstPhaseRankLookup l2;
+ MatchLoopCommunicator f1(num_threads, 3, {}, &l1);
+ MatchLoopCommunicator f2(num_threads, 3, std::make_unique<EveryOdd>(), &l2);
+ auto hits_in = hit_vec({{21, 5}, {22, 4}, {23, 3}, {24, 2}, {25, 1}});
+ auto res1 = second_phase(f1, hits_in, thread_id, 10);
+ auto res2 = second_phase(f2, hits_in, thread_id, 10);
+ EXPECT_EQUAL(FeatureVec({1, 2, 3, unranked, unranked}), extract_ranks(l1));
+ EXPECT_EQUAL(FeatureVec({1, unranked, 3, unranked, 5}), extract_ranks(l2));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
index 01a9508220d..37ae78404a3 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
@@ -1,18 +1,21 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "match_loop_communicator.h"
+#include <vespa/searchlib/features/first_phase_rank_lookup.h>
#include <vespa/vespalib/util/priority_queue.h>
+using search::features::FirstPhaseRankLookup;
+
namespace proton:: matching {
MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN)
- : MatchLoopCommunicator(threads, topN, std::unique_ptr<IDiversifier>())
+ : MatchLoopCommunicator(threads, topN, {}, nullptr)
{}
-MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier)
+MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier, FirstPhaseRankLookup* first_phase_rank_lookup)
: _best_scores(),
_best_dropped(),
_estimate_match_frequency(threads),
- _get_second_phase_work(threads, topN, _best_scores, _best_dropped, std::move(diversifier)),
+ _get_second_phase_work(threads, topN, _best_scores, _best_dropped, std::move(diversifier), first_phase_rank_lookup),
_complete_second_phase(threads, topN, _best_scores, _best_dropped)
{}
MatchLoopCommunicator::~MatchLoopCommunicator() = default;
@@ -34,18 +37,43 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle()
}
}
-MatchLoopCommunicator::GetSecondPhaseWork::GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier)
+namespace {
+
+class NoRegisterFirstPhaseRank {
+public:
+ static void pick(uint32_t) noexcept { };
+ static void drop() noexcept { }
+};
+
+class RegisterFirstPhaseRank {
+ FirstPhaseRankLookup& _first_phase_rank_lookup;
+ uint32_t _rank;
+public:
+ RegisterFirstPhaseRank(FirstPhaseRankLookup& first_phase_rank_lookup)
+ : _first_phase_rank_lookup(first_phase_rank_lookup),
+ _rank(0)
+ {
+ }
+ void pick(uint32_t docid) noexcept { _first_phase_rank_lookup.add(docid, ++_rank); }
+ void drop() noexcept { ++_rank; }
+};
+
+}
+
+MatchLoopCommunicator::GetSecondPhaseWork::GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier, FirstPhaseRankLookup* first_phase_rank_lookup)
: vespalib::Rendezvous<SortedHitSequence, TaggedHits, true>(n),
topN(topN_in),
best_scores(best_scores_in),
best_dropped(best_dropped_in),
- _diversifier(std::move(diversifier))
+ _diversifier(std::move(diversifier)),
+ _first_phase_rank_lookup(first_phase_rank_lookup)
{}
+
MatchLoopCommunicator::GetSecondPhaseWork::~GetSecondPhaseWork() = default;
-template<typename Q, typename F>
+template<typename Q, typename F, typename R>
void
-MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept)
+MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept, R register_first_phase_rank)
{
size_t picked = 0;
search::feature_t last_score = 0.0;
@@ -53,14 +81,18 @@ MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept)
uint32_t i = queue.front();
const Hit & hit = in(i).get();
if (accept(hit.first)) {
+ register_first_phase_rank.pick(hit.first);
out(picked % size()).emplace_back(hit, i);
last_score = hit.second;
if (++picked == 1) {
best_scores.high = hit.second;
}
- } else if (!best_dropped.valid) {
- best_dropped.valid = true;
- best_dropped.score = hit.second;
+ } else {
+ if (!best_dropped.valid) {
+ best_dropped.valid = true;
+ best_dropped.score = hit.second;
+ }
+ register_first_phase_rank.drop();
}
in(i).next();
if (in(i).valid()) {
@@ -74,6 +106,17 @@ MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept)
}
}
+template<typename Q, typename R>
+void
+MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, R register_first_phase_rank)
+{
+ if (_diversifier) {
+ mingle(queue, [diversifier=_diversifier.get()](uint32_t docId) { return diversifier->accepted(docId);}, register_first_phase_rank);
+ } else {
+ mingle(queue, [](uint32_t) { return true;}, register_first_phase_rank);
+ }
+}
+
void
MatchLoopCommunicator::GetSecondPhaseWork::mingle()
{
@@ -87,10 +130,10 @@ MatchLoopCommunicator::GetSecondPhaseWork::mingle()
queue.push(i);
}
}
- if (_diversifier) {
- mingle(queue, [diversifier=_diversifier.get()](uint32_t docId) { return diversifier->accepted(docId);});
+ if (_first_phase_rank_lookup != nullptr) {
+ mingle(queue, RegisterFirstPhaseRank(*_first_phase_rank_lookup));
} else {
- mingle(queue, [](uint32_t) { return true;});
+ mingle(queue, NoRegisterFirstPhaseRank());
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
index eb93bdb68d5..d2fdf00ba38 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
@@ -6,12 +6,15 @@
#include <vespa/searchlib/queryeval/idiversifier.h>
#include <vespa/vespalib/util/rendezvous.h>
+namespace search::features { class FirstPhaseRankLookup; }
+
namespace proton::matching {
class MatchLoopCommunicator final : public IMatchLoopCommunicator
{
private:
using IDiversifier = search::queryeval::IDiversifier;
+ using FirstPhaseRankLookup = search::features::FirstPhaseRankLookup;
struct BestDropped {
bool valid = false;
search::feature_t score = 0.0;
@@ -25,11 +28,14 @@ private:
Range &best_scores;
BestDropped &best_dropped;
std::unique_ptr<IDiversifier> _diversifier;
- GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier>);
+ FirstPhaseRankLookup* _first_phase_rank_lookup;
+ GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier, FirstPhaseRankLookup* first_phase_rank_lookup);
~GetSecondPhaseWork() override;
void mingle() override;
- template<typename Q, typename F>
- void mingle(Q &queue, F &&accept);
+ template<typename Q, typename R>
+ void mingle(Q &queue, R register_first_phase_rank);
+ template<typename Q, typename F, typename R>
+ void mingle(Q &queue, F &&accept, R register_first_phase_rank);
bool cmp(uint32_t a, uint32_t b) {
return (in(a).get().second > in(b).get().second);
}
@@ -59,7 +65,7 @@ private:
public:
MatchLoopCommunicator(size_t threads, size_t topN);
- MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier>);
+ MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier>, FirstPhaseRankLookup* first_phase_rank_lookup);
~MatchLoopCommunicator();
double estimate_match_frequency(const Matches &matches) override {
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
index 89cc97767bf..6dd889da33b 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
@@ -85,7 +85,11 @@ MatchMaster::match(search::engine::Trace & trace,
{
vespalib::Timer query_latency_time;
vespalib::DualMergeDirector mergeDirector(threadBundle.size());
- MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.heapSize));
+ /*
+ * We need a non-const first phase rank lookup since it will be populated
+ * later on when selecting documents for second phase ranking.
+ */
+ MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.heapSize), mtf.get_first_phase_rank_lookup());
TimedMatchLoopCommunicator timedCommunicator(communicator);
DocidRangeScheduler::UP scheduler = createScheduler(threadBundle.size(), numSearchPartitions, params.numDocs);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index 06290386a31..ff64ece4494 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -9,6 +9,7 @@
#include <vespa/searchlib/attribute/diversity.h>
#include <vespa/searchlib/queryeval/flow.h>
#include <vespa/searchlib/engine/trace.h>
+#include <vespa/searchlib/features/first_phase_rank_lookup.h>
#include <vespa/searchlib/fef/indexproperties.h>
#include <vespa/searchlib/fef/ranksetup.h>
#include <vespa/vespalib/util/issue.h>
@@ -190,7 +191,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
_rankSetup(rankSetup),
_featureOverrides(featureOverrides),
_diversityParams(),
- _valid(false)
+ _valid(false),
+ _first_phase_rank_lookup(nullptr)
{
if (doom.soft_doom()) return;
auto trace = root_trace.make_trace();
@@ -219,6 +221,7 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
_query.freeze();
trace.addEvent(5, "Prepare shared state for multi-threaded rank executors");
_rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore());
+ _first_phase_rank_lookup = FirstPhaseRankLookup::get_mutable_shared_state(_queryEnv.getObjectStore());
_diversityParams = extractDiversityParams(_rankSetup, rankProperties);
vespalib::string attribute = DegradationAttribute::lookup(rankProperties, _rankSetup.getDegradationAttribute());
DegradationParams degradationParams = extractDegradationParams(_rankSetup, attribute, rankProperties);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
index 759fe68eea2..da18a8b0a2f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
@@ -21,6 +21,7 @@ namespace vespalib { class ExecutionProfiler; }
namespace vespalib { struct ThreadBundle; }
namespace search::engine { class Trace; }
+namespace search::features { class FirstPhaseRankLookup; }
namespace search::fef {
class RankProgram;
@@ -119,6 +120,7 @@ private:
using RankSetup = search::fef::RankSetup;
using IIndexEnvironment = search::fef::IIndexEnvironment;
using IDiversifier = search::queryeval::IDiversifier;
+ using FirstPhaseRankLookup = search::features::FirstPhaseRankLookup;
QueryLimiter & _queryLimiter;
AttributeBlueprintParams _attribute_blueprint_params;
Query _query;
@@ -131,6 +133,7 @@ private:
const Properties & _featureOverrides;
DiversityParams _diversityParams;
bool _valid;
+ FirstPhaseRankLookup* _first_phase_rank_lookup;
std::unique_ptr<AttributeOperationTask>
createTask(vespalib::stringref attribute, vespalib::stringref operation) const;
@@ -186,6 +189,7 @@ public:
static AttributeBlueprintParams
extract_attribute_blueprint_params(const RankSetup& rank_setup, const Properties& rank_properties,
uint32_t active_docids, uint32_t docid_limit);
+ FirstPhaseRankLookup* get_first_phase_rank_lookup() const noexcept { return _first_phase_rank_lookup; }
};
}