diff options
author | Henning Baldersheim <balder@oath.com> | 2018-08-09 15:37:04 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@oath.com> | 2018-08-09 15:37:46 +0200 |
commit | 49b98a062990920ef905832ac85d277269a07361 (patch) | |
tree | 74946595135a3ba8c0e1b849adb61668a3915e18 | |
parent | a6f9ea782e9921498c6a890697824d7d16065f24 (diff) |
Create diversity filter on demand if diversity is specified.
7 files changed, 114 insertions, 67 deletions
diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp index b153b2ca5e0..d1e7adfedb8 100644 --- a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp +++ b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp @@ -209,7 +209,9 @@ TEST("require that no limiter has no behavior") { TEST("require that the match phase limiter may chose not to limit the query") { FakeRequestContext requestContext; MockSearchable searchable; - MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 1000, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE); + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 1000, true, 1.0, 0.2, 1.0), + DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE)); MaybeMatchPhaseLimiter &limiter = yes_limiter; EXPECT_TRUE(limiter.is_enabled()); EXPECT_EQUAL(20u, limiter.sample_hits_per_thread(10)); @@ -229,7 +231,9 @@ struct MaxFilterCoverageLimiterFixture { MockSearchable searchable; MatchPhaseLimiter::UP getMaxFilterCoverageLimiter() { - MatchPhaseLimiter::UP yes_limiter(new MatchPhaseLimiter(10000, searchable, requestContext, "limiter_attribute", 10000, true, 0.05, 1.0, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE)); + auto yes_limiter = std::make_unique<MatchPhaseLimiter>(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 10000, true, 0.05, 1.0, 1.0), + DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE)); MaybeMatchPhaseLimiter &limiter = *yes_limiter; EXPECT_TRUE(limiter.is_enabled()); EXPECT_EQUAL(1000u, limiter.sample_hits_per_thread(10)); @@ -271,7 +275,9 @@ TEST_F("require that the match phase limiter may chose to limit the query even w TEST("require that the match phase limiter is able to pre-limit the query") { FakeRequestContext requestContext; MockSearchable searchable; - MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE); + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 500, true, 1.0, 0.2, 1.0), + DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE)); MaybeMatchPhaseLimiter &limiter = yes_limiter; EXPECT_TRUE(limiter.is_enabled()); EXPECT_EQUAL(12u, limiter.sample_hits_per_thread(10)); @@ -301,7 +307,9 @@ TEST("require that the match phase limiter is able to pre-limit the query") { TEST("require that the match phase limiter is able to post-limit the query") { MockSearchable searchable; FakeRequestContext requestContext; - MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 1500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE); + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 1500, true, 1.0, 0.2, 1.0), + DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE)); MaybeMatchPhaseLimiter &limiter = yes_limiter; EXPECT_TRUE(limiter.is_enabled()); EXPECT_EQUAL(30u, limiter.sample_hits_per_thread(10)); @@ -331,7 +339,9 @@ void verifyDiversity(AttributeLimiter::DiversityCutoffStrategy strategy) { MockSearchable searchable; FakeRequestContext requestContext; - MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 500, true, 1.0, 0.2, 1.0, "category", 10, 13.1, strategy); + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, + DegradationParams("limiter_attribute", 500, true, 1.0, 0.2, 1.0), + DiversityParams("category", 10, 13.1, strategy)); MaybeMatchPhaseLimiter &limiter = yes_limiter; SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), 0.1, 100000); limiter.updateDocIdSpaceEstimate(1000, 9000); diff --git a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h index c50a6e0dcb8..0c23ea05fbd 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h +++ b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h @@ -11,7 +11,7 @@ #include <mutex> namespace proton::matching { - + /** * This class is responsible for creating attribute-based search * iterators that are used to limit the search space. Each search diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index 920f84a21b0..b37f2c002b6 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -62,7 +62,7 @@ MatchMaster::match(const MatchParams ¶ms, fastos::StopWatch query_latency_time; query_latency_time.start(); vespalib::DualMergeDirector mergeDirector(threadBundle.size()); - MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize); + MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, matchToolsFactory.createDiversifier()); TimedMatchLoopCommunicator timedCommunicator(communicator); DocidRangeScheduler::UP scheduler = createScheduler(threadBundle.size(), numSearchPartitions, params.numDocs); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp index d6eb62c3d3e..9834432fecd 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp @@ -12,8 +12,7 @@ using search::queryeval::IRequestContext; using search::queryeval::AndSearchStrict; using search::queryeval::NoUnpack; -namespace proton { -namespace matching { +namespace proton::matching { namespace { @@ -68,25 +67,16 @@ LimitedSearch::visitMembers(vespalib::ObjectVisitor &visitor) const visit(visitor, "second", getSecond()); } -MatchPhaseLimiter::MatchPhaseLimiter(uint32_t docIdLimit, - Searchable &searchable_attributes, +MatchPhaseLimiter::MatchPhaseLimiter(uint32_t docIdLimit, Searchable &searchable_attributes, IRequestContext & requestContext, - const vespalib::string &attribute_name, - size_t max_hits, bool descending, - double max_filter_coverage, - double samplePercentage, double postFilterMultiplier, - const vespalib::string &diversity_attribute, - uint32_t diversity_min_groups, - double diversify_cutoff_factor, - AttributeLimiter::DiversityCutoffStrategy diversity_cutoff_strategy) - : _postFilterMultiplier(postFilterMultiplier), - _maxFilterCoverage(max_filter_coverage), - _calculator(max_hits, diversity_min_groups, samplePercentage), - _limiter_factory(searchable_attributes, requestContext, attribute_name, descending, - diversity_attribute, diversify_cutoff_factor, diversity_cutoff_strategy), + DegradationParams degradation, DiversityParams diversity) + : _postFilterMultiplier(degradation._post_filter_multiplier), + _maxFilterCoverage(degradation._max_filter_coverage), + _calculator(degradation._max_hits, diversity._min_groups, degradation._sample_percentage), + _limiter_factory(searchable_attributes, requestContext, degradation._attribute, degradation._descending, + diversity._attribute, diversity._cutoff_factor, diversity._cutoff_strategy), _coverage(docIdLimit) -{ -} +{ } namespace { @@ -108,8 +98,7 @@ do_limit(AttributeLimiter &limiter_factory, SearchIterator::UP search, } // namespace proton::matching::<unnamed> SearchIterator::UP -MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, - double match_freq, size_t num_docs) +MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, double match_freq, size_t num_docs) { size_t wanted_num_docs = _calculator.wanted_num_docs(match_freq); size_t max_filter_docs = static_cast<size_t>(num_docs * _maxFilterCoverage); @@ -145,5 +134,4 @@ MatchPhaseLimiter::getDocIdSpaceEstimate() const return _coverage.getEstimate(); } -} // namespace proton::matching -} // namespace proton +} diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h index 165762d5356..b0d62cb1281 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h @@ -11,8 +11,7 @@ #include <vespa/searchlib/queryeval/blueprint.h> #include <atomic> -namespace proton { -namespace matching { +namespace proton::matching { class LimitedSearch : public search::queryeval::SearchIterator { public: @@ -69,6 +68,40 @@ struct NoMatchPhaseLimiter : MaybeMatchPhaseLimiter { size_t getDocIdSpaceEstimate() const override { return std::numeric_limits<size_t>::max(); } }; +struct DiversityParams { + using CutoffStrategy = AttributeLimiter::DiversityCutoffStrategy; + DiversityParams(const vespalib::string & attribute, uint32_t min_groups, + double cutoff_factor, CutoffStrategy cutoff_strategy) + : _attribute(attribute), + _min_groups(min_groups), + _cutoff_factor(cutoff_factor), + _cutoff_strategy(cutoff_strategy) + { } + + vespalib::string _attribute; + uint32_t _min_groups; + double _cutoff_factor; + CutoffStrategy _cutoff_strategy; +}; + +struct DegradationParams { + DegradationParams(const vespalib::string &attribute, size_t max_hits, bool descending, double max_filter_coverage, + double sample_percentage, double post_filter_multiplier) + : _attribute(attribute), + _max_hits(max_hits), + _descending(descending), + _max_filter_coverage(max_filter_coverage), + _sample_percentage(sample_percentage), + _post_filter_multiplier(post_filter_multiplier) + { } + vespalib::string _attribute; + size_t _max_hits; + bool _descending; + double _max_filter_coverage; + double _sample_percentage; + double _post_filter_multiplier; +}; + /** * This class is is used when rank phase limiting is configured. **/ @@ -103,14 +136,7 @@ public: MatchPhaseLimiter(uint32_t docIdLimit, search::queryeval::Searchable &searchable_attributes, search::queryeval::IRequestContext & requestContext, - const vespalib::string &attribute_name, - size_t max_hits, bool descending, - double max_filter_coverage, - double samplePercentage, double postFilterMultiplier, - const vespalib::string &diversity_attribute, - uint32_t diversity_min_groups, - double diversify_cutoff_factor, - AttributeLimiter::DiversityCutoffStrategy diversity_cutoff_strategy); + DegradationParams degradation, DiversityParams diversity); bool is_enabled() const override { return true; } bool was_limited() const override { return _limiter_factory.was_used(); } size_t sample_hits_per_thread(size_t num_threads) const override { @@ -121,6 +147,4 @@ public: size_t getDocIdSpaceEstimate() const override; }; -} // namespace proton::matching -} // namespace proton - +} diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index e7773c94d72..b536144acf6 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -3,12 +3,16 @@ #include "match_tools.h" #include "querynodes.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> +#include <vespa/searchlib/attribute/diversity.h> #include <vespa/log/log.h> LOG_SETUP(".proton.matching.match_tools"); #include <vespa/searchlib/query/tree/querytreecreator.h> using search::attribute::IAttributeContext; using search::queryeval::IRequestContext; +using search::queryeval::IDiversifier; +using search::attribute::diversity::DiversityFilter; + using namespace search::fef; using namespace search::fef::indexproperties::matchphase; using namespace search::fef::indexproperties::matching; @@ -138,9 +142,10 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _queryEnv(indexEnv, attributeContext, rankProperties), _mdl(), _rankSetup(rankSetup), - _featureOverrides(featureOverrides) + _featureOverrides(featureOverrides), + _diversityParams(), + _valid(_query.buildTree(queryStack, location, viewResolver, indexEnv)) { - _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv); if (_valid) { _query.extractTerms(_queryEnv.terms()); _query.extractLocations(_queryEnv.locations()); @@ -161,19 +166,21 @@ MatchToolsFactory(QueryLimiter & queryLimiter, double diversity_cutoff_factor = DiversityCutoffFactor::lookup(rankProperties); vespalib::string diversity_cutoff_strategy = DiversityCutoffStrategy::lookup(rankProperties); if (!limit_attribute.empty() && limit_maxhits > 0) { - _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), _requestContext, - limit_attribute, limit_maxhits, !limit_ascending, limit_max_filter_coverage, - samplePercentage, postFilterMultiplier, - diversity_attribute, diversity_min_groups, diversity_cutoff_factor, - AttributeLimiter::toDiversityCutoffStrategy(diversity_cutoff_strategy)); + _diversityParams = std::make_unique<DiversityParams>(diversity_attribute, diversity_min_groups, diversity_cutoff_factor, + AttributeLimiter::toDiversityCutoffStrategy(diversity_cutoff_strategy)); + DegradationParams degradationParams(limit_attribute, limit_maxhits, !limit_ascending, limit_max_filter_coverage, + samplePercentage, postFilterMultiplier); + _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), + _requestContext, degradationParams, *_diversityParams); } else if (_rankSetup.hasMatchPhaseDegradation()) { - _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), _requestContext, - _rankSetup.getDegradationAttribute(), _rankSetup.getDegradationMaxHits(), !_rankSetup.isDegradationOrderAscending(), - _rankSetup.getDegradationMaxFilterCoverage(), - _rankSetup.getDegradationSamplePercentage(), _rankSetup.getDegradationPostFilterMultiplier(), - _rankSetup.getDiversityAttribute(), _rankSetup.getDiversityMinGroups(), - _rankSetup.getDiversityCutoffFactor(), - AttributeLimiter::toDiversityCutoffStrategy(_rankSetup.getDiversityCutoffStrategy())); + _diversityParams = std::make_unique<DiversityParams>(_rankSetup.getDiversityAttribute(), _rankSetup.getDiversityMinGroups(), + _rankSetup.getDiversityCutoffFactor(), + AttributeLimiter::toDiversityCutoffStrategy(_rankSetup.getDiversityCutoffStrategy())); + DegradationParams degradationParams(_rankSetup.getDegradationAttribute(), _rankSetup.getDegradationMaxHits(), !_rankSetup.isDegradationOrderAscending(), + _rankSetup.getDegradationMaxFilterCoverage(), _rankSetup.getDegradationSamplePercentage(), + _rankSetup.getDegradationPostFilterMultiplier()); + _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), + _requestContext, degradationParams, *_diversityParams); } } if ( ! _match_limiter) { @@ -191,4 +198,18 @@ MatchToolsFactory::createMatchTools() const *_match_limiter, _queryEnv, _mdl, _rankSetup, _featureOverrides); } +std::unique_ptr<IDiversifier> MatchToolsFactory::createDiversifier() const +{ + if (!_diversityParams || _diversityParams->_attribute.empty()) { + return std::unique_ptr<IDiversifier>(); + } + auto attr = _requestContext.getAttribute(_diversityParams->_attribute); + if ( !attr) { + LOG(warning, "Skipping diversity due to no %s attribute.", _diversityParams->_attribute.c_str()); + return std::unique_ptr<IDiversifier>(); + } + size_t max_per_group = _rankSetup.getHeapSize()/_diversityParams->_min_groups; + return DiversityFilter::create(*attr, _rankSetup.getHeapSize(), max_per_group, _diversityParams->_min_groups, true); +} + } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index f47eda16cc1..33239f5b57f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -16,6 +16,8 @@ #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/fef/fef.h> #include <vespa/searchlib/common/idocumentmetastore.h> +#include <vespa/searchlib/queryeval/idiversifier.h> + namespace proton::matching { @@ -71,16 +73,17 @@ public: class MatchToolsFactory : public vespalib::noncopyable { private: - QueryLimiter & _queryLimiter; - RequestContext _requestContext; - const vespalib::Doom _hardDoom; - Query _query; - MaybeMatchPhaseLimiter::UP _match_limiter; - QueryEnvironment _queryEnv; - search::fef::MatchDataLayout _mdl; - const search::fef::RankSetup & _rankSetup; - const search::fef::Properties & _featureOverrides; - bool _valid; + QueryLimiter & _queryLimiter; + RequestContext _requestContext; + const vespalib::Doom _hardDoom; + Query _query; + MaybeMatchPhaseLimiter::UP _match_limiter; + QueryEnvironment _queryEnv; + search::fef::MatchDataLayout _mdl; + const search::fef::RankSetup & _rankSetup; + const search::fef::Properties & _featureOverrides; + std::unique_ptr<DiversityParams> _diversityParams; + bool _valid; public: typedef std::unique_ptr<MatchToolsFactory> UP; @@ -101,6 +104,7 @@ public: bool valid() const { return _valid; } const MaybeMatchPhaseLimiter &match_limiter() const { return *_match_limiter; } MatchTools::UP createMatchTools() const; + std::unique_ptr<search::queryeval::IDiversifier> createDiversifier() const; search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); } bool has_first_phase_rank() const { return !_rankSetup.getFirstPhaseRank().empty(); } }; |