diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2018-08-10 13:33:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-10 13:33:17 +0200 |
commit | 1cd6b0060677ff81135493ce09d371c8160a482c (patch) | |
tree | f2ce2dba211a926811a1e5f560e3b7d35ed24faa | |
parent | 852108790beef2df7707ebcfa1706afd326dd9ad (diff) | |
parent | 77368ff9a425b836f14c587d4ebe44b347a260af (diff) |
Merge pull request #6535 from vespa-engine/balder/create-diversity-on-demand
Balder/create diversity on demand
10 files changed, 276 insertions, 118 deletions
diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp index b153b2ca5e0..d1e7adfedb8 100644 --- a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp +++ b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp @@ -209,7 +209,9 @@ TEST("require that no limiter has no behavior") { TEST("require that the match phase limiter may chose not to limit the query") { FakeRequestContext requestContext; MockSearchable searchable; - MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 1000, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE); + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 1000, true, 1.0, 0.2, 1.0), + DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE)); MaybeMatchPhaseLimiter &limiter = yes_limiter; EXPECT_TRUE(limiter.is_enabled()); EXPECT_EQUAL(20u, limiter.sample_hits_per_thread(10)); @@ -229,7 +231,9 @@ struct MaxFilterCoverageLimiterFixture { MockSearchable searchable; MatchPhaseLimiter::UP getMaxFilterCoverageLimiter() { - MatchPhaseLimiter::UP yes_limiter(new MatchPhaseLimiter(10000, searchable, requestContext, "limiter_attribute", 10000, true, 0.05, 1.0, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE)); + auto yes_limiter = std::make_unique<MatchPhaseLimiter>(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 10000, true, 0.05, 1.0, 1.0), + DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE)); MaybeMatchPhaseLimiter &limiter = *yes_limiter; EXPECT_TRUE(limiter.is_enabled()); EXPECT_EQUAL(1000u, limiter.sample_hits_per_thread(10)); @@ -271,7 +275,9 @@ TEST_F("require that the match phase limiter may chose to limit the query even w TEST("require that the match phase limiter is able to pre-limit the query") { FakeRequestContext requestContext; MockSearchable searchable; - MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE); + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 500, true, 1.0, 0.2, 1.0), + DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE)); MaybeMatchPhaseLimiter &limiter = yes_limiter; EXPECT_TRUE(limiter.is_enabled()); EXPECT_EQUAL(12u, limiter.sample_hits_per_thread(10)); @@ -301,7 +307,9 @@ TEST("require that the match phase limiter is able to pre-limit the query") { TEST("require that the match phase limiter is able to post-limit the query") { MockSearchable searchable; FakeRequestContext requestContext; - MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 1500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE); + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 1500, true, 1.0, 0.2, 1.0), + DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE)); MaybeMatchPhaseLimiter &limiter = yes_limiter; EXPECT_TRUE(limiter.is_enabled()); EXPECT_EQUAL(30u, limiter.sample_hits_per_thread(10)); @@ -331,7 +339,9 @@ void verifyDiversity(AttributeLimiter::DiversityCutoffStrategy strategy) { MockSearchable searchable; FakeRequestContext requestContext; - MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 500, true, 1.0, 0.2, 1.0, "category", 10, 13.1, strategy); + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 500, true, 1.0, 0.2, 1.0), + DiversityParams("category", 10, 13.1, strategy)); MaybeMatchPhaseLimiter &limiter = yes_limiter; SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), 0.1, 100000); limiter.updateDocIdSpaceEstimate(1000, 9000); diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index 0d474fc57cf..c7ea75f50d6 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -171,6 +171,7 @@ struct MyWorld { schema.addIndexField(Schema::IndexField("tensor_field", DataType::TENSOR)); schema.addAttributeField(Schema::AttributeField("a1", DataType::INT32)); schema.addAttributeField(Schema::AttributeField("a2", DataType::INT32)); + schema.addAttributeField(Schema::AttributeField("a3", DataType::INT32)); schema.addAttributeField(Schema::AttributeField("predicate_field", DataType::BOOLEANTREE)); // config @@ -211,6 +212,16 @@ struct MyWorld { assert(docid + 1 == NUM_DOCS); attributeContext.add(attr); } + { + SingleInt32ExtAttribute *attr = new SingleInt32ExtAttribute("a3"); + AttributeVector::DocId docid; + for (uint32_t i = 0; i < NUM_DOCS; ++i) { + attr->addDoc(docid); + attr->add(i%10, docid); + } + assert(docid + 1 == NUM_DOCS); + attributeContext.add(attr); + } // grouping sessionManager = SessionManager::SP(new SessionManager(100)); @@ -321,14 +332,20 @@ struct MyWorld { MySearchHandler(Matcher::SP matcher) : _matcher(matcher) {} - virtual DocsumReply::UP getDocsums(const DocsumRequest &) override - { return DocsumReply::UP(); } - virtual SearchReply::UP match(const ISearchHandler::SP &, - const SearchRequest &, - vespalib::ThreadBundle &) const override - { return SearchReply::UP(); } + DocsumReply::UP getDocsums(const DocsumRequest &) override { + return DocsumReply::UP(); + } + SearchReply::UP match(const ISearchHandler::SP &, const SearchRequest &, vespalib::ThreadBundle &) const override { + return SearchReply::UP(); + } }; + MatchToolsFactory::UP create_mtf(SearchRequest::SP req) { + Matcher::SP matcher = createMatcher(); + search::fef::Properties overrides; + return matcher->create_match_tools_factory(*req, searchContext, attributeContext, metaStore, overrides); + } + double get_first_phase_termwise_limit() { Matcher::SP matcher = createMatcher(); SearchRequest::SP request = createSimpleRequest("f1", "spread"); @@ -407,7 +424,7 @@ MyWorld::MyWorld() clock(), queryLimiter() {} -MyWorld::~MyWorld() {} +MyWorld::~MyWorld() = default; //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -528,6 +545,87 @@ TEST("require that re-ranking is performed (multi-threaded)") { } } +TEST("require that re-ranking is not diverse when not requested to be.") { + MyWorld world; + world.basicSetup(); + world.setupSecondPhaseRanking(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + auto mtf = world.create_mtf(request); + auto diversity = mtf->createDiversifier(); + EXPECT_FALSE(diversity); +} + +using namespace search::fef::indexproperties::matchphase; +TEST("require that re-ranking is diverse with diversity = 1/1") { + MyWorld world; + world.basicSetup(); + world.setupSecondPhaseRanking(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + auto mtf = world.create_mtf(request); + auto & rankProperies = request->propertiesMap.lookupCreate(MapNames::RANK); + rankProperies.add(DiversityAttribute::NAME, "a2") + .add(DiversityMinGroups::NAME, "3") + .add(DiversityCutoffStrategy::NAME, "strict"); + mtf = world.create_mtf(request); + auto diversity = mtf->createDiversifier(); + EXPECT_TRUE(diversity); + SearchReply::UP reply = world.performSearch(request, 1); + EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); + EXPECT_EQUAL(9u, world.matchingStats.docsRanked()); + EXPECT_EQUAL(3u, world.matchingStats.docsReRanked()); + ASSERT_TRUE(reply->hits.size() == 9u); + EXPECT_EQUAL(document::DocumentId("doc::900").getGlobalId(), reply->hits[0].gid); + EXPECT_EQUAL(1800.0, reply->hits[0].metric); + EXPECT_EQUAL(document::DocumentId("doc::800").getGlobalId(), reply->hits[1].gid); + EXPECT_EQUAL(1600.0, reply->hits[1].metric); + EXPECT_EQUAL(document::DocumentId("doc::700").getGlobalId(), reply->hits[2].gid); + EXPECT_EQUAL(1400.0, reply->hits[2].metric); + EXPECT_EQUAL(document::DocumentId("doc::600").getGlobalId(), reply->hits[3].gid); + EXPECT_EQUAL(600.0, reply->hits[3].metric); + EXPECT_EQUAL(document::DocumentId("doc::500").getGlobalId(), reply->hits[4].gid); + EXPECT_EQUAL(500.0, reply->hits[4].metric); + EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); + EXPECT_GREATER(world.matchingStats.rerankTimeAvg(), 0.0000001); +} + +TEST("require that re-ranking is diverse with diversity = 1/10") { + MyWorld world; + world.basicSetup(); + world.setupSecondPhaseRanking(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + auto mtf = world.create_mtf(request); + auto diversity = mtf->createDiversifier(); + EXPECT_FALSE(diversity); + auto & rankProperies = request->propertiesMap.lookupCreate(MapNames::RANK); + rankProperies.add(DiversityAttribute::NAME, "a3") + .add(DiversityMinGroups::NAME, "3") + .add(DiversityCutoffStrategy::NAME, "strict"); + mtf = world.create_mtf(request); + diversity = mtf->createDiversifier(); + EXPECT_TRUE(diversity); + SearchReply::UP reply = world.performSearch(request, 1); + EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); + EXPECT_EQUAL(9u, world.matchingStats.docsRanked()); + EXPECT_EQUAL(1u, world.matchingStats.docsReRanked()); + ASSERT_TRUE(reply->hits.size() == 9u); + EXPECT_EQUAL(document::DocumentId("doc::900").getGlobalId(), reply->hits[0].gid); + EXPECT_EQUAL(1800.0, reply->hits[0].metric); + //TODO This is of course incorrect until the selectBest method sees everything. + EXPECT_EQUAL(document::DocumentId("doc::800").getGlobalId(), reply->hits[1].gid); + EXPECT_EQUAL(800.0, reply->hits[1].metric); + EXPECT_EQUAL(document::DocumentId("doc::700").getGlobalId(), reply->hits[2].gid); + EXPECT_EQUAL(700.0, reply->hits[2].metric); + EXPECT_EQUAL(document::DocumentId("doc::600").getGlobalId(), reply->hits[3].gid); + EXPECT_EQUAL(600.0, reply->hits[3].metric); + EXPECT_EQUAL(document::DocumentId("doc::500").getGlobalId(), reply->hits[4].gid); + EXPECT_EQUAL(500.0, reply->hits[4].metric); + EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); + EXPECT_GREATER(world.matchingStats.rerankTimeAvg(), 0.0000001); +} + TEST("require that sortspec can be used (multi-threaded)") { for (bool drop_sort_data: {false, true}) { for (size_t threads = 1; threads <= 16; ++threads) { @@ -659,8 +757,7 @@ TEST("require that getSummaryFeatures can use cached query setup") { DocsumRequest::SP docsum_request(new DocsumRequest); // no stack dump docsum_request->sessionId = request->sessionId; - docsum_request-> - propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); + docsum_request->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); docsum_request->hits.push_back(DocsumRequest::Hit()); docsum_request->hits.back().docid = 30; diff --git a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h index c50a6e0dcb8..0c23ea05fbd 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h +++ b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h @@ -11,7 +11,7 @@ #include <mutex> namespace proton::matching { - + /** * This class is responsible for creating attribute-based search * iterators that are used to limit the search space. Each search diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index 920f84a21b0..b37f2c002b6 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -62,7 +62,7 @@ MatchMaster::match(const MatchParams ¶ms, fastos::StopWatch query_latency_time; query_latency_time.start(); vespalib::DualMergeDirector mergeDirector(threadBundle.size()); - MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize); + MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, matchToolsFactory.createDiversifier()); TimedMatchLoopCommunicator timedCommunicator(communicator); DocidRangeScheduler::UP scheduler = createScheduler(threadBundle.size(), numSearchPartitions, params.numDocs); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp index d6eb62c3d3e..5e965084a2d 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp @@ -12,8 +12,7 @@ using search::queryeval::IRequestContext; using search::queryeval::AndSearchStrict; using search::queryeval::NoUnpack; -namespace proton { -namespace matching { +namespace proton::matching { namespace { @@ -68,25 +67,16 @@ LimitedSearch::visitMembers(vespalib::ObjectVisitor &visitor) const visit(visitor, "second", getSecond()); } -MatchPhaseLimiter::MatchPhaseLimiter(uint32_t docIdLimit, - Searchable &searchable_attributes, +MatchPhaseLimiter::MatchPhaseLimiter(uint32_t docIdLimit, Searchable &searchable_attributes, IRequestContext & requestContext, - const vespalib::string &attribute_name, - size_t max_hits, bool descending, - double max_filter_coverage, - double samplePercentage, double postFilterMultiplier, - const vespalib::string &diversity_attribute, - uint32_t diversity_min_groups, - double diversify_cutoff_factor, - AttributeLimiter::DiversityCutoffStrategy diversity_cutoff_strategy) - : _postFilterMultiplier(postFilterMultiplier), - _maxFilterCoverage(max_filter_coverage), - _calculator(max_hits, diversity_min_groups, samplePercentage), - _limiter_factory(searchable_attributes, requestContext, attribute_name, descending, - diversity_attribute, diversify_cutoff_factor, diversity_cutoff_strategy), + DegradationParams degradation, DiversityParams diversity) + : _postFilterMultiplier(degradation.post_filter_multiplier), + _maxFilterCoverage(degradation.max_filter_coverage), + _calculator(degradation.max_hits, diversity.min_groups, degradation.sample_percentage), + _limiter_factory(searchable_attributes, requestContext, degradation.attribute, degradation.descending, + diversity.attribute, diversity.cutoff_factor, diversity.cutoff_strategy), _coverage(docIdLimit) -{ -} +{ } namespace { @@ -108,8 +98,7 @@ do_limit(AttributeLimiter &limiter_factory, SearchIterator::UP search, } // namespace proton::matching::<unnamed> SearchIterator::UP -MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, - double match_freq, size_t num_docs) +MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, double match_freq, size_t num_docs) { size_t wanted_num_docs = _calculator.wanted_num_docs(match_freq); size_t max_filter_docs = static_cast<size_t>(num_docs * _maxFilterCoverage); @@ -145,5 +134,4 @@ MatchPhaseLimiter::getDocIdSpaceEstimate() const return _coverage.getEstimate(); } -} // namespace proton::matching -} // namespace proton +} diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h index 165762d5356..b39b6695b7f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h @@ -11,8 +11,7 @@ #include <vespa/searchlib/queryeval/blueprint.h> #include <atomic> -namespace proton { -namespace matching { +namespace proton::matching { class LimitedSearch : public search::queryeval::SearchIterator { public: @@ -69,6 +68,42 @@ struct NoMatchPhaseLimiter : MaybeMatchPhaseLimiter { size_t getDocIdSpaceEstimate() const override { return std::numeric_limits<size_t>::max(); } }; +struct DiversityParams { + using CutoffStrategy = AttributeLimiter::DiversityCutoffStrategy; + DiversityParams() : DiversityParams("", 0, 0, CutoffStrategy::LOOSE) { } + DiversityParams(const vespalib::string & attribute_, uint32_t min_groups_, + double cutoff_factor_, CutoffStrategy cutoff_strategy_) + : attribute(attribute_), + min_groups(min_groups_), + cutoff_factor(cutoff_factor_), + cutoff_strategy(cutoff_strategy_) + { } + bool enabled() const { return !attribute.empty() && (min_groups > 0); } + vespalib::string attribute; + uint32_t min_groups; + double cutoff_factor; + CutoffStrategy cutoff_strategy; +}; + +struct DegradationParams { + DegradationParams(const vespalib::string &attribute_, size_t max_hits_, bool descending_, + double max_filter_coverage_, double sample_percentage_, double post_filter_multiplier_) + : attribute(attribute_), + max_hits(max_hits_), + descending(descending_), + max_filter_coverage(max_filter_coverage_), + sample_percentage(sample_percentage_), + post_filter_multiplier(post_filter_multiplier_) + { } + bool enabled() const { return !attribute.empty() && (max_hits > 0); } + vespalib::string attribute; + size_t max_hits; + bool descending; + double max_filter_coverage; + double sample_percentage; + double post_filter_multiplier; +}; + /** * This class is is used when rank phase limiting is configured. **/ @@ -103,14 +138,7 @@ public: MatchPhaseLimiter(uint32_t docIdLimit, search::queryeval::Searchable &searchable_attributes, search::queryeval::IRequestContext & requestContext, - const vespalib::string &attribute_name, - size_t max_hits, bool descending, - double max_filter_coverage, - double samplePercentage, double postFilterMultiplier, - const vespalib::string &diversity_attribute, - uint32_t diversity_min_groups, - double diversify_cutoff_factor, - AttributeLimiter::DiversityCutoffStrategy diversity_cutoff_strategy); + DegradationParams degradation, DiversityParams diversity); bool is_enabled() const override { return true; } bool was_limited() const override { return _limiter_factory.was_used(); } size_t sample_hits_per_thread(size_t num_threads) const override { @@ -121,6 +149,4 @@ public: size_t getDocIdSpaceEstimate() const override; }; -} // namespace proton::matching -} // namespace proton - +} diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index e7773c94d72..fe10c1d51c3 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -3,12 +3,16 @@ #include "match_tools.h" #include "querynodes.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> +#include <vespa/searchlib/attribute/diversity.h> #include <vespa/log/log.h> LOG_SETUP(".proton.matching.match_tools"); #include <vespa/searchlib/query/tree/querytreecreator.h> using search::attribute::IAttributeContext; using search::queryeval::IRequestContext; +using search::queryeval::IDiversifier; +using search::attribute::diversity::DiversityFilter; + using namespace search::fef; using namespace search::fef::indexproperties::matchphase; using namespace search::fef::indexproperties::matching; @@ -37,6 +41,27 @@ void tag_match_data(const HandleRecorder::HandleSet &handles, MatchData &match_d } } +DegradationParams +extractDegradationParams(const RankSetup &rankSetup, const Properties &rankProperties) +{ + return DegradationParams(DegradationAttribute::lookup(rankProperties, rankSetup.getDegradationAttribute()), + DegradationMaxHits::lookup(rankProperties, rankSetup.getDegradationMaxHits()), + !DegradationAscendingOrder::lookup(rankProperties, rankSetup.isDegradationOrderAscending()), + DegradationMaxFilterCoverage::lookup(rankProperties, rankSetup.getDegradationMaxFilterCoverage()), + DegradationSamplePercentage::lookup(rankProperties, rankSetup.getDegradationSamplePercentage()), + DegradationPostFilterMultiplier::lookup(rankProperties, rankSetup.getDegradationPostFilterMultiplier())); + +} + +DiversityParams +extractDiversityParams(const RankSetup &rankSetup, const Properties &rankProperties) +{ + return DiversityParams(DiversityAttribute::lookup(rankProperties, rankSetup.getDiversityAttribute()), + DiversityMinGroups::lookup(rankProperties, rankSetup.getDiversityMinGroups()), + DiversityCutoffFactor::lookup(rankProperties, rankSetup.getDiversityCutoffFactor()), + AttributeLimiter::toDiversityCutoffStrategy(DiversityCutoffStrategy::lookup(rankProperties, rankSetup.getDiversityCutoffStrategy()))); +} + } // namespace proton::matching::<unnamed> void @@ -138,9 +163,10 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _queryEnv(indexEnv, attributeContext, rankProperties), _mdl(), _rankSetup(rankSetup), - _featureOverrides(featureOverrides) + _featureOverrides(featureOverrides), + _diversityParams(), + _valid(_query.buildTree(queryStack, location, viewResolver, indexEnv)) { - _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv); if (_valid) { _query.extractTerms(_queryEnv.terms()); _query.extractLocations(_queryEnv.locations()); @@ -150,30 +176,12 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.fetchPostings(); _query.freeze(); _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); - vespalib::string limit_attribute = DegradationAttribute::lookup(rankProperties); - size_t limit_maxhits = DegradationMaxHits::lookup(rankProperties); - bool limit_ascending = DegradationAscendingOrder::lookup(rankProperties); - double limit_max_filter_coverage = DegradationMaxFilterCoverage::lookup(rankProperties); - double samplePercentage = DegradationSamplePercentage::lookup(rankProperties); - double postFilterMultiplier = DegradationPostFilterMultiplier::lookup(rankProperties); - vespalib::string diversity_attribute = DiversityAttribute::lookup(rankProperties); - uint32_t diversity_min_groups = DiversityMinGroups::lookup(rankProperties); - double diversity_cutoff_factor = DiversityCutoffFactor::lookup(rankProperties); - vespalib::string diversity_cutoff_strategy = DiversityCutoffStrategy::lookup(rankProperties); - if (!limit_attribute.empty() && limit_maxhits > 0) { - _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), _requestContext, - limit_attribute, limit_maxhits, !limit_ascending, limit_max_filter_coverage, - samplePercentage, postFilterMultiplier, - diversity_attribute, diversity_min_groups, diversity_cutoff_factor, - AttributeLimiter::toDiversityCutoffStrategy(diversity_cutoff_strategy)); - } else if (_rankSetup.hasMatchPhaseDegradation()) { - _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), _requestContext, - _rankSetup.getDegradationAttribute(), _rankSetup.getDegradationMaxHits(), !_rankSetup.isDegradationOrderAscending(), - _rankSetup.getDegradationMaxFilterCoverage(), - _rankSetup.getDegradationSamplePercentage(), _rankSetup.getDegradationPostFilterMultiplier(), - _rankSetup.getDiversityAttribute(), _rankSetup.getDiversityMinGroups(), - _rankSetup.getDiversityCutoffFactor(), - AttributeLimiter::toDiversityCutoffStrategy(_rankSetup.getDiversityCutoffStrategy())); + _diversityParams = extractDiversityParams(_rankSetup, rankProperties); + DegradationParams degradationParams = extractDegradationParams(_rankSetup, rankProperties); + + if (degradationParams.enabled()) { + _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), + _requestContext, degradationParams, _diversityParams); } } if ( ! _match_limiter) { @@ -191,4 +199,19 @@ MatchToolsFactory::createMatchTools() const *_match_limiter, _queryEnv, _mdl, _rankSetup, _featureOverrides); } +std::unique_ptr<IDiversifier> MatchToolsFactory::createDiversifier() const +{ + if ( !_diversityParams.enabled() ) { + return std::unique_ptr<IDiversifier>(); + } + auto attr = _requestContext.getAttribute(_diversityParams.attribute); + if ( !attr) { + LOG(warning, "Skipping diversity due to no %s attribute.", _diversityParams.attribute.c_str()); + return std::unique_ptr<IDiversifier>(); + } + size_t max_per_group = _rankSetup.getHeapSize()/_diversityParams.min_groups; + return DiversityFilter::create(*attr, _rankSetup.getHeapSize(), max_per_group, _diversityParams.min_groups, + _diversityParams.cutoff_strategy == DiversityParams::CutoffStrategy::STRICT); +} + } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index f47eda16cc1..97baafb8bc3 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -16,6 +16,8 @@ #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/fef/fef.h> #include <vespa/searchlib/common/idocumentmetastore.h> +#include <vespa/searchlib/queryeval/idiversifier.h> + namespace proton::matching { @@ -71,16 +73,17 @@ public: class MatchToolsFactory : public vespalib::noncopyable { private: - QueryLimiter & _queryLimiter; - RequestContext _requestContext; - const vespalib::Doom _hardDoom; - Query _query; - MaybeMatchPhaseLimiter::UP _match_limiter; - QueryEnvironment _queryEnv; - search::fef::MatchDataLayout _mdl; - const search::fef::RankSetup & _rankSetup; - const search::fef::Properties & _featureOverrides; - bool _valid; + QueryLimiter & _queryLimiter; + RequestContext _requestContext; + const vespalib::Doom _hardDoom; + Query _query; + MaybeMatchPhaseLimiter::UP _match_limiter; + QueryEnvironment _queryEnv; + search::fef::MatchDataLayout _mdl; + const search::fef::RankSetup & _rankSetup; + const search::fef::Properties & _featureOverrides; + DiversityParams _diversityParams; + bool _valid; public: typedef std::unique_ptr<MatchToolsFactory> UP; @@ -101,6 +104,7 @@ public: bool valid() const { return _valid; } const MaybeMatchPhaseLimiter &match_limiter() const { return *_match_limiter; } MatchTools::UP createMatchTools() const; + std::unique_ptr<search::queryeval::IDiversifier> createDiversifier() const; search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); } bool has_first_phase_rank() const { return !_rankSetup.getFirstPhaseRank().empty(); } }; diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 493b49be0b0..69504c4cc71 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -272,63 +272,63 @@ const vespalib::string DiversityCutoffStrategy::NAME("vespa.matchphase.diversity const vespalib::string DiversityCutoffStrategy::DEFAULT_VALUE("loose"); vespalib::string -DegradationAttribute::lookup(const Properties &props) +DegradationAttribute::lookup(const Properties &props, const vespalib::string & defaultValue) { - return lookupString(props, NAME, DEFAULT_VALUE); + return lookupString(props, NAME, defaultValue); } bool -DegradationAscendingOrder::lookup(const Properties &props) +DegradationAscendingOrder::lookup(const Properties &props, bool defaultValue) { - return lookupBool(props, NAME, DEFAULT_VALUE); + return lookupBool(props, NAME, defaultValue); } uint32_t -DegradationMaxHits::lookup(const Properties &props) +DegradationMaxHits::lookup(const Properties &props, uint32_t defaultValue) { - return lookupUint32(props, NAME, DEFAULT_VALUE); + return lookupUint32(props, NAME, defaultValue); } double -DegradationSamplePercentage::lookup(const Properties &props) +DegradationSamplePercentage::lookup(const Properties &props, double defaultValue) { - return lookupDouble(props, NAME, DEFAULT_VALUE); + return lookupDouble(props, NAME, defaultValue); } double -DegradationMaxFilterCoverage::lookup(const Properties &props) +DegradationMaxFilterCoverage::lookup(const Properties &props, double defaultValue) { - return lookupDouble(props, NAME, DEFAULT_VALUE); + return lookupDouble(props, NAME, defaultValue); } double -DegradationPostFilterMultiplier::lookup(const Properties &props) +DegradationPostFilterMultiplier::lookup(const Properties &props, double defaultValue) { - return lookupDouble(props, NAME, DEFAULT_VALUE); + return lookupDouble(props, NAME, defaultValue); } vespalib::string -DiversityAttribute::lookup(const Properties &props) +DiversityAttribute::lookup(const Properties &props, const vespalib::string & defaultValue) { - return lookupString(props, NAME, DEFAULT_VALUE); + return lookupString(props, NAME, defaultValue); } uint32_t -DiversityMinGroups::lookup(const Properties &props) +DiversityMinGroups::lookup(const Properties &props, uint32_t defaultValue) { - return lookupUint32(props, NAME, DEFAULT_VALUE); + return lookupUint32(props, NAME, defaultValue); } double -DiversityCutoffFactor::lookup(const Properties &props) +DiversityCutoffFactor::lookup(const Properties &props, double defaultValue) { - return lookupDouble(props, NAME, DEFAULT_VALUE); + return lookupDouble(props, NAME, defaultValue); } vespalib::string -DiversityCutoffStrategy::lookup(const Properties &props) +DiversityCutoffStrategy::lookup(const Properties &props, const vespalib::string & defaultValue) { - return lookupString(props, NAME, DEFAULT_VALUE); + return lookupString(props, NAME, defaultValue); } diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 38e0eca7548..5140e811e1c 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -174,7 +174,8 @@ namespace matchphase { struct DegradationAttribute { static const vespalib::string NAME; static const vespalib::string DEFAULT_VALUE; - static vespalib::string lookup(const Properties &props); + static vespalib::string lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static vespalib::string lookup(const Properties &props, const vespalib::string & defaultValue); }; /** @@ -183,7 +184,8 @@ namespace matchphase { struct DegradationAscendingOrder { static const vespalib::string NAME; static const bool DEFAULT_VALUE; - static bool lookup(const Properties &props); + static bool lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static bool lookup(const Properties &props, bool defaultValue); }; /** @@ -192,7 +194,8 @@ namespace matchphase { struct DegradationMaxHits { static const vespalib::string NAME; static const uint32_t DEFAULT_VALUE; - static uint32_t lookup(const Properties &props); + static uint32_t lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static uint32_t lookup(const Properties &props, uint32_t defaultValue); }; /** @@ -201,13 +204,15 @@ namespace matchphase { struct DegradationSamplePercentage { static const vespalib::string NAME; static const double DEFAULT_VALUE; - static double lookup(const Properties &props); + static double lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static double lookup(const Properties &props, double defaultValue); }; struct DegradationMaxFilterCoverage { static const vespalib::string NAME; static const double DEFAULT_VALUE; - static double lookup(const Properties &props); + static double lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static double lookup(const Properties &props, double defaultValue); }; /** @@ -217,7 +222,8 @@ namespace matchphase { struct DegradationPostFilterMultiplier { static const vespalib::string NAME; static const double DEFAULT_VALUE; - static double lookup(const Properties &props); + static double lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static double lookup(const Properties &props, double defaultValue); }; /** @@ -228,7 +234,8 @@ namespace matchphase { struct DiversityAttribute { static const vespalib::string NAME; static const vespalib::string DEFAULT_VALUE; - static vespalib::string lookup(const Properties &props); + static vespalib::string lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static vespalib::string lookup(const Properties &props, const vespalib::string & defaultValue); }; /** @@ -239,18 +246,21 @@ namespace matchphase { struct DiversityMinGroups { static const vespalib::string NAME; static const uint32_t DEFAULT_VALUE; - static uint32_t lookup(const Properties &props); + static uint32_t lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static uint32_t lookup(const Properties &props, uint32_t defaultValue); }; struct DiversityCutoffFactor { static const vespalib::string NAME; static const double DEFAULT_VALUE; - static double lookup(const Properties &props); + static double lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static double lookup(const Properties &props, double defaultValue); }; struct DiversityCutoffStrategy { static const vespalib::string NAME; static const vespalib::string DEFAULT_VALUE; - static vespalib::string lookup(const Properties &props); + static vespalib::string lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); } + static vespalib::string lookup(const Properties &props, const vespalib::string & defaultValue); }; } // namespace matchphase |