diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-10-27 06:54:37 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-10-27 09:37:13 +0000 |
commit | e7d5df01a80af0734fbe31a3a765e45abeb8dc13 (patch) | |
tree | 4864b8b0dbd186aa8e9df82914648f9ad2db9d2b | |
parent | 69e163ab82de73f5c294cfd2ca3de0e9be434a5b (diff) |
Add and clarify on-first-phase
8 files changed, 42 insertions, 11 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index a4952fa3154..d690fb29795 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -785,8 +785,21 @@ TEST("require that match params are set up straight with ranking on") { ASSERT_EQUAL(0.7, p.rankDropLimit); ASSERT_EQUAL(0u, p.offset); ASSERT_EQUAL(1u, p.hits); + ASSERT_TRUE(p.has_rank_drop_limit()); } +TEST("require that match params can turn off rank-drop-limit") { + MatchParams p(1, 2, 4, -std::numeric_limits<feature_t>::quiet_NaN(), 0, 1, true, true); + ASSERT_EQUAL(1u, p.numDocs); + ASSERT_EQUAL(2u, p.heapSize); + ASSERT_EQUAL(4u, p.arraySize); + ASSERT_TRUE(std::isnan(p.rankDropLimit)); + ASSERT_EQUAL(0u, p.offset); + ASSERT_EQUAL(1u, p.hits); + ASSERT_FALSE(p.has_rank_drop_limit()); +} + + TEST("require that match params are set up straight with ranking on arraySize is atleast the size of heapSize") { MatchParams p(1, 6, 4, 0.7, 1, 1, true, true); ASSERT_EQUAL(1u, p.numDocs); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp index 983e09716c3..7465088d8f0 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp @@ -2,6 +2,7 @@ #include "match_params.h" #include <algorithm> +#include <cmath> namespace proton::matching { @@ -32,4 +33,8 @@ MatchParams::MatchParams(uint32_t numDocs_in, rankDropLimit(rankDropLimit_in) { } +bool MatchParams::has_rank_drop_limit() const { + return ! std::isnan(rankDropLimit); +} + } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_params.h b/searchcore/src/vespa/searchcore/proton/matching/match_params.h index 40f33e25914..efcd507c9af 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_params.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_params.h @@ -28,6 +28,7 @@ struct MatchParams { bool hasFinalRank, bool needRanking=true); bool save_rank_scores() const { return ((heapSize + arraySize) != 0); } + bool has_rank_drop_limit() const; }; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index 87b4f918d11..c9ec2a06353 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -10,7 +10,6 @@ #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/queryeval/multibitvectoriterator.h> #include <vespa/searchlib/queryeval/andnotsearch.h> -#include <vespa/vespalib/util/thread_bundle.h> #include <vespa/vespalib/data/slime/cursor.h> #include <vespa/vespalib/data/slime/inserter.h> @@ -234,10 +233,10 @@ template <bool do_rank, bool do_limit, bool do_share> void MatchThread::match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits) { - if (std::isnan(matchParams.rankDropLimit)) { - match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, false>(tools, hits); - } else { + if (matchParams.has_rank_drop_limit()) { match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, true>(tools, hits); + } else { + match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, false>(tools, hits); } } @@ -376,8 +375,14 @@ MatchThread::processResult(const Doom & doom, } } } - if (auto onMatchTask = matchToolsFactory.createOnMatchTask()) { - onMatchTask->run(search::ResultSet::stealResult(std::move(*result))); + + if (auto task = matchToolsFactory.createOnMatchTask()) { + // This is not correct, as it should use the results before rank-drop-limit + // But keeping like this for now as on-first-phase should be a subset of -on-match + task->run(result->copyResult()); + } + if (auto task = matchToolsFactory.createOnFirstPhaseTask()) { + task->run(search::ResultSet::stealResult(std::move(*result))); } if (hasGrouping) { context.grouping->setDistributionKey(_distributionKey); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 9506ca99b66..d652435cbca 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -253,13 +253,15 @@ MatchToolsFactory::createTask(vespalib::stringref attribute, vespalib::stringref std::unique_ptr<AttributeOperationTask> MatchToolsFactory::createOnMatchTask() const { const auto & op = _rankSetup.getMutateOnMatch(); - return createTask(execute::onmatch::Attribute::lookup(_queryEnv.getProperties(), op._attribute), - execute::onmatch::Operation::lookup(_queryEnv.getProperties(), op._operation)); + return createTask(op._attribute, op._operation); } std::unique_ptr<AttributeOperationTask> MatchToolsFactory::createOnFirstPhaseTask() const { const auto & op = _rankSetup.getMutateOnFirstPhase(); - return createTask(op._attribute, op._operation); + // Note that combining onmatch in query with first-phase is not a bug. + // It is intentional, as the semantics of onmatch in query are identical to on-first-phase. + return createTask(execute::onmatch::Attribute::lookup(_queryEnv.getProperties(), op._attribute), + execute::onmatch::Operation::lookup(_queryEnv.getProperties(), op._operation)); } std::unique_ptr<AttributeOperationTask> MatchToolsFactory::createOnSecondPhaseTask() const { diff --git a/searchlib/src/vespa/searchlib/common/resultset.cpp b/searchlib/src/vespa/searchlib/common/resultset.cpp index 8f5d2d6d105..2e1e431ad82 100644 --- a/searchlib/src/vespa/searchlib/common/resultset.cpp +++ b/searchlib/src/vespa/searchlib/common/resultset.cpp @@ -103,6 +103,12 @@ ResultSet::sort(FastS_IResultSorter & sorter, unsigned int ntop) { } std::pair<std::unique_ptr<BitVector>, vespalib::Array<RankedHit>> +ResultSet::copyResult() const { + std::unique_ptr<BitVector> copy = _bitOverflow ? BitVector::create(*_bitOverflow) : std::unique_ptr<BitVector>(); + return std::make_pair(std::move(copy), _rankedHitsArray); +} + +std::pair<std::unique_ptr<BitVector>, vespalib::Array<RankedHit>> ResultSet::stealResult(ResultSet && rhs) { return std::make_pair(std::move(rhs._bitOverflow), std::move(rhs._rankedHitsArray)); } diff --git a/searchlib/src/vespa/searchlib/common/resultset.h b/searchlib/src/vespa/searchlib/common/resultset.h index e18888dbaf7..6824fc4170d 100644 --- a/searchlib/src/vespa/searchlib/common/resultset.h +++ b/searchlib/src/vespa/searchlib/common/resultset.h @@ -36,6 +36,7 @@ public: unsigned int getNumHits() const; void mergeWithBitOverflow(HitRank default_value = default_rank_value); void sort(FastS_IResultSorter & sorter, unsigned int ntop); + std::pair<std::unique_ptr<BitVector>, vespalib::Array<RankedHit>> copyResult() const; static std::pair<std::unique_ptr<BitVector>, vespalib::Array<RankedHit>> stealResult(ResultSet && rhs); }; diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h index 0d6b470d699..85bbe5ee950 100644 --- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h @@ -80,7 +80,6 @@ private: typedef std::unique_ptr<Collector> UP; virtual ~Collector() {} virtual void collect(uint32_t docId, feature_t score) = 0; - virtual bool isRankedHitCollector() const { return false; } virtual bool isDocIdCollector() const { return false; } }; @@ -104,7 +103,6 @@ private: RankedHitCollector(HitCollector &hc) : CollectorBase(hc) { } void collect(uint32_t docId, feature_t score) override; void collectAndChangeCollector(uint32_t docId, feature_t score) __attribute__((noinline)); - bool isRankedHitCollector() const override { return true; } }; template <bool CollectRankedHit> |