summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2018-08-10 13:33:17 +0200
committerGitHub <noreply@github.com>2018-08-10 13:33:17 +0200
commit1cd6b0060677ff81135493ce09d371c8160a482c (patch)
treef2ce2dba211a926811a1e5f560e3b7d35ed24faa
parent852108790beef2df7707ebcfa1706afd326dd9ad (diff)
parent77368ff9a425b836f14c587d4ebe44b347a260af (diff)
Merge pull request #6535 from vespa-engine/balder/create-diversity-on-demand
Balder/create diversity on demand
-rw-r--r--searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp20
-rw-r--r--searchcore/src/tests/proton/matching/matching_test.cpp115
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h2
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_master.cpp2
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp34
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h52
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp75
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.h24
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h30
10 files changed, 276 insertions, 118 deletions
diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
index b153b2ca5e0..d1e7adfedb8 100644
--- a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
@@ -209,7 +209,9 @@ TEST("require that no limiter has no behavior") {
TEST("require that the match phase limiter may chose not to limit the query") {
FakeRequestContext requestContext;
MockSearchable searchable;
- MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 1000, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE);
+ MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 1000, true, 1.0, 0.2, 1.0),
+ DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE));
MaybeMatchPhaseLimiter &limiter = yes_limiter;
EXPECT_TRUE(limiter.is_enabled());
EXPECT_EQUAL(20u, limiter.sample_hits_per_thread(10));
@@ -229,7 +231,9 @@ struct MaxFilterCoverageLimiterFixture {
MockSearchable searchable;
MatchPhaseLimiter::UP getMaxFilterCoverageLimiter() {
- MatchPhaseLimiter::UP yes_limiter(new MatchPhaseLimiter(10000, searchable, requestContext, "limiter_attribute", 10000, true, 0.05, 1.0, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE));
+ auto yes_limiter = std::make_unique<MatchPhaseLimiter>(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 10000, true, 0.05, 1.0, 1.0),
+ DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE));
MaybeMatchPhaseLimiter &limiter = *yes_limiter;
EXPECT_TRUE(limiter.is_enabled());
EXPECT_EQUAL(1000u, limiter.sample_hits_per_thread(10));
@@ -271,7 +275,9 @@ TEST_F("require that the match phase limiter may chose to limit the query even w
TEST("require that the match phase limiter is able to pre-limit the query") {
FakeRequestContext requestContext;
MockSearchable searchable;
- MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE);
+ MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 500, true, 1.0, 0.2, 1.0),
+ DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE));
MaybeMatchPhaseLimiter &limiter = yes_limiter;
EXPECT_TRUE(limiter.is_enabled());
EXPECT_EQUAL(12u, limiter.sample_hits_per_thread(10));
@@ -301,7 +307,9 @@ TEST("require that the match phase limiter is able to pre-limit the query") {
TEST("require that the match phase limiter is able to post-limit the query") {
MockSearchable searchable;
FakeRequestContext requestContext;
- MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 1500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE);
+ MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 1500, true, 1.0, 0.2, 1.0),
+ DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE));
MaybeMatchPhaseLimiter &limiter = yes_limiter;
EXPECT_TRUE(limiter.is_enabled());
EXPECT_EQUAL(30u, limiter.sample_hits_per_thread(10));
@@ -331,7 +339,9 @@ void verifyDiversity(AttributeLimiter::DiversityCutoffStrategy strategy)
{
MockSearchable searchable;
FakeRequestContext requestContext;
- MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 500, true, 1.0, 0.2, 1.0, "category", 10, 13.1, strategy);
+ MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 500, true, 1.0, 0.2, 1.0),
+ DiversityParams("category", 10, 13.1, strategy));
MaybeMatchPhaseLimiter &limiter = yes_limiter;
SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), 0.1, 100000);
limiter.updateDocIdSpaceEstimate(1000, 9000);
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp
index 0d474fc57cf..c7ea75f50d6 100644
--- a/searchcore/src/tests/proton/matching/matching_test.cpp
+++ b/searchcore/src/tests/proton/matching/matching_test.cpp
@@ -171,6 +171,7 @@ struct MyWorld {
schema.addIndexField(Schema::IndexField("tensor_field", DataType::TENSOR));
schema.addAttributeField(Schema::AttributeField("a1", DataType::INT32));
schema.addAttributeField(Schema::AttributeField("a2", DataType::INT32));
+ schema.addAttributeField(Schema::AttributeField("a3", DataType::INT32));
schema.addAttributeField(Schema::AttributeField("predicate_field", DataType::BOOLEANTREE));
// config
@@ -211,6 +212,16 @@ struct MyWorld {
assert(docid + 1 == NUM_DOCS);
attributeContext.add(attr);
}
+ {
+ SingleInt32ExtAttribute *attr = new SingleInt32ExtAttribute("a3");
+ AttributeVector::DocId docid;
+ for (uint32_t i = 0; i < NUM_DOCS; ++i) {
+ attr->addDoc(docid);
+ attr->add(i%10, docid);
+ }
+ assert(docid + 1 == NUM_DOCS);
+ attributeContext.add(attr);
+ }
// grouping
sessionManager = SessionManager::SP(new SessionManager(100));
@@ -321,14 +332,20 @@ struct MyWorld {
MySearchHandler(Matcher::SP matcher) : _matcher(matcher) {}
- virtual DocsumReply::UP getDocsums(const DocsumRequest &) override
- { return DocsumReply::UP(); }
- virtual SearchReply::UP match(const ISearchHandler::SP &,
- const SearchRequest &,
- vespalib::ThreadBundle &) const override
- { return SearchReply::UP(); }
+ DocsumReply::UP getDocsums(const DocsumRequest &) override {
+ return DocsumReply::UP();
+ }
+ SearchReply::UP match(const ISearchHandler::SP &, const SearchRequest &, vespalib::ThreadBundle &) const override {
+ return SearchReply::UP();
+ }
};
+ MatchToolsFactory::UP create_mtf(SearchRequest::SP req) {
+ Matcher::SP matcher = createMatcher();
+ search::fef::Properties overrides;
+ return matcher->create_match_tools_factory(*req, searchContext, attributeContext, metaStore, overrides);
+ }
+
double get_first_phase_termwise_limit() {
Matcher::SP matcher = createMatcher();
SearchRequest::SP request = createSimpleRequest("f1", "spread");
@@ -407,7 +424,7 @@ MyWorld::MyWorld()
clock(),
queryLimiter()
{}
-MyWorld::~MyWorld() {}
+MyWorld::~MyWorld() = default;
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
@@ -528,6 +545,87 @@ TEST("require that re-ranking is performed (multi-threaded)") {
}
}
+TEST("require that re-ranking is not diverse when not requested to be.") {
+ MyWorld world;
+ world.basicSetup();
+ world.setupSecondPhaseRanking();
+ world.basicResults();
+ SearchRequest::SP request = world.createSimpleRequest("f1", "spread");
+ auto mtf = world.create_mtf(request);
+ auto diversity = mtf->createDiversifier();
+ EXPECT_FALSE(diversity);
+}
+
+using namespace search::fef::indexproperties::matchphase;
+TEST("require that re-ranking is diverse with diversity = 1/1") {
+ MyWorld world;
+ world.basicSetup();
+ world.setupSecondPhaseRanking();
+ world.basicResults();
+ SearchRequest::SP request = world.createSimpleRequest("f1", "spread");
+ auto mtf = world.create_mtf(request);
+ auto & rankProperies = request->propertiesMap.lookupCreate(MapNames::RANK);
+ rankProperies.add(DiversityAttribute::NAME, "a2")
+ .add(DiversityMinGroups::NAME, "3")
+ .add(DiversityCutoffStrategy::NAME, "strict");
+ mtf = world.create_mtf(request);
+ auto diversity = mtf->createDiversifier();
+ EXPECT_TRUE(diversity);
+ SearchReply::UP reply = world.performSearch(request, 1);
+ EXPECT_EQUAL(9u, world.matchingStats.docsMatched());
+ EXPECT_EQUAL(9u, world.matchingStats.docsRanked());
+ EXPECT_EQUAL(3u, world.matchingStats.docsReRanked());
+ ASSERT_TRUE(reply->hits.size() == 9u);
+ EXPECT_EQUAL(document::DocumentId("doc::900").getGlobalId(), reply->hits[0].gid);
+ EXPECT_EQUAL(1800.0, reply->hits[0].metric);
+ EXPECT_EQUAL(document::DocumentId("doc::800").getGlobalId(), reply->hits[1].gid);
+ EXPECT_EQUAL(1600.0, reply->hits[1].metric);
+ EXPECT_EQUAL(document::DocumentId("doc::700").getGlobalId(), reply->hits[2].gid);
+ EXPECT_EQUAL(1400.0, reply->hits[2].metric);
+ EXPECT_EQUAL(document::DocumentId("doc::600").getGlobalId(), reply->hits[3].gid);
+ EXPECT_EQUAL(600.0, reply->hits[3].metric);
+ EXPECT_EQUAL(document::DocumentId("doc::500").getGlobalId(), reply->hits[4].gid);
+ EXPECT_EQUAL(500.0, reply->hits[4].metric);
+ EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001);
+ EXPECT_GREATER(world.matchingStats.rerankTimeAvg(), 0.0000001);
+}
+
+TEST("require that re-ranking is diverse with diversity = 1/10") {
+ MyWorld world;
+ world.basicSetup();
+ world.setupSecondPhaseRanking();
+ world.basicResults();
+ SearchRequest::SP request = world.createSimpleRequest("f1", "spread");
+ auto mtf = world.create_mtf(request);
+ auto diversity = mtf->createDiversifier();
+ EXPECT_FALSE(diversity);
+ auto & rankProperies = request->propertiesMap.lookupCreate(MapNames::RANK);
+ rankProperies.add(DiversityAttribute::NAME, "a3")
+ .add(DiversityMinGroups::NAME, "3")
+ .add(DiversityCutoffStrategy::NAME, "strict");
+ mtf = world.create_mtf(request);
+ diversity = mtf->createDiversifier();
+ EXPECT_TRUE(diversity);
+ SearchReply::UP reply = world.performSearch(request, 1);
+ EXPECT_EQUAL(9u, world.matchingStats.docsMatched());
+ EXPECT_EQUAL(9u, world.matchingStats.docsRanked());
+ EXPECT_EQUAL(1u, world.matchingStats.docsReRanked());
+ ASSERT_TRUE(reply->hits.size() == 9u);
+ EXPECT_EQUAL(document::DocumentId("doc::900").getGlobalId(), reply->hits[0].gid);
+ EXPECT_EQUAL(1800.0, reply->hits[0].metric);
+ //TODO This is of course incorrect until the selectBest method sees everything.
+ EXPECT_EQUAL(document::DocumentId("doc::800").getGlobalId(), reply->hits[1].gid);
+ EXPECT_EQUAL(800.0, reply->hits[1].metric);
+ EXPECT_EQUAL(document::DocumentId("doc::700").getGlobalId(), reply->hits[2].gid);
+ EXPECT_EQUAL(700.0, reply->hits[2].metric);
+ EXPECT_EQUAL(document::DocumentId("doc::600").getGlobalId(), reply->hits[3].gid);
+ EXPECT_EQUAL(600.0, reply->hits[3].metric);
+ EXPECT_EQUAL(document::DocumentId("doc::500").getGlobalId(), reply->hits[4].gid);
+ EXPECT_EQUAL(500.0, reply->hits[4].metric);
+ EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001);
+ EXPECT_GREATER(world.matchingStats.rerankTimeAvg(), 0.0000001);
+}
+
TEST("require that sortspec can be used (multi-threaded)") {
for (bool drop_sort_data: {false, true}) {
for (size_t threads = 1; threads <= 16; ++threads) {
@@ -659,8 +757,7 @@ TEST("require that getSummaryFeatures can use cached query setup") {
DocsumRequest::SP docsum_request(new DocsumRequest); // no stack dump
docsum_request->sessionId = request->sessionId;
- docsum_request->
- propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true");
+ docsum_request->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true");
docsum_request->hits.push_back(DocsumRequest::Hit());
docsum_request->hits.back().docid = 30;
diff --git a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h
index c50a6e0dcb8..0c23ea05fbd 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h
@@ -11,7 +11,7 @@
#include <mutex>
namespace proton::matching {
-
+
/**
* This class is responsible for creating attribute-based search
* iterators that are used to limit the search space. Each search
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
index 920f84a21b0..b37f2c002b6 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
@@ -62,7 +62,7 @@ MatchMaster::match(const MatchParams &params,
fastos::StopWatch query_latency_time;
query_latency_time.start();
vespalib::DualMergeDirector mergeDirector(threadBundle.size());
- MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize);
+ MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, matchToolsFactory.createDiversifier());
TimedMatchLoopCommunicator timedCommunicator(communicator);
DocidRangeScheduler::UP scheduler = createScheduler(threadBundle.size(), numSearchPartitions, params.numDocs);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
index d6eb62c3d3e..5e965084a2d 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
@@ -12,8 +12,7 @@ using search::queryeval::IRequestContext;
using search::queryeval::AndSearchStrict;
using search::queryeval::NoUnpack;
-namespace proton {
-namespace matching {
+namespace proton::matching {
namespace {
@@ -68,25 +67,16 @@ LimitedSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
visit(visitor, "second", getSecond());
}
-MatchPhaseLimiter::MatchPhaseLimiter(uint32_t docIdLimit,
- Searchable &searchable_attributes,
+MatchPhaseLimiter::MatchPhaseLimiter(uint32_t docIdLimit, Searchable &searchable_attributes,
IRequestContext & requestContext,
- const vespalib::string &attribute_name,
- size_t max_hits, bool descending,
- double max_filter_coverage,
- double samplePercentage, double postFilterMultiplier,
- const vespalib::string &diversity_attribute,
- uint32_t diversity_min_groups,
- double diversify_cutoff_factor,
- AttributeLimiter::DiversityCutoffStrategy diversity_cutoff_strategy)
- : _postFilterMultiplier(postFilterMultiplier),
- _maxFilterCoverage(max_filter_coverage),
- _calculator(max_hits, diversity_min_groups, samplePercentage),
- _limiter_factory(searchable_attributes, requestContext, attribute_name, descending,
- diversity_attribute, diversify_cutoff_factor, diversity_cutoff_strategy),
+ DegradationParams degradation, DiversityParams diversity)
+ : _postFilterMultiplier(degradation.post_filter_multiplier),
+ _maxFilterCoverage(degradation.max_filter_coverage),
+ _calculator(degradation.max_hits, diversity.min_groups, degradation.sample_percentage),
+ _limiter_factory(searchable_attributes, requestContext, degradation.attribute, degradation.descending,
+ diversity.attribute, diversity.cutoff_factor, diversity.cutoff_strategy),
_coverage(docIdLimit)
-{
-}
+{ }
namespace {
@@ -108,8 +98,7 @@ do_limit(AttributeLimiter &limiter_factory, SearchIterator::UP search,
} // namespace proton::matching::<unnamed>
SearchIterator::UP
-MatchPhaseLimiter::maybe_limit(SearchIterator::UP search,
- double match_freq, size_t num_docs)
+MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, double match_freq, size_t num_docs)
{
size_t wanted_num_docs = _calculator.wanted_num_docs(match_freq);
size_t max_filter_docs = static_cast<size_t>(num_docs * _maxFilterCoverage);
@@ -145,5 +134,4 @@ MatchPhaseLimiter::getDocIdSpaceEstimate() const
return _coverage.getEstimate();
}
-} // namespace proton::matching
-} // namespace proton
+}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h
index 165762d5356..b39b6695b7f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h
@@ -11,8 +11,7 @@
#include <vespa/searchlib/queryeval/blueprint.h>
#include <atomic>
-namespace proton {
-namespace matching {
+namespace proton::matching {
class LimitedSearch : public search::queryeval::SearchIterator {
public:
@@ -69,6 +68,42 @@ struct NoMatchPhaseLimiter : MaybeMatchPhaseLimiter {
size_t getDocIdSpaceEstimate() const override { return std::numeric_limits<size_t>::max(); }
};
+struct DiversityParams {
+ using CutoffStrategy = AttributeLimiter::DiversityCutoffStrategy;
+ DiversityParams() : DiversityParams("", 0, 0, CutoffStrategy::LOOSE) { }
+ DiversityParams(const vespalib::string & attribute_, uint32_t min_groups_,
+ double cutoff_factor_, CutoffStrategy cutoff_strategy_)
+ : attribute(attribute_),
+ min_groups(min_groups_),
+ cutoff_factor(cutoff_factor_),
+ cutoff_strategy(cutoff_strategy_)
+ { }
+ bool enabled() const { return !attribute.empty() && (min_groups > 0); }
+ vespalib::string attribute;
+ uint32_t min_groups;
+ double cutoff_factor;
+ CutoffStrategy cutoff_strategy;
+};
+
+struct DegradationParams {
+ DegradationParams(const vespalib::string &attribute_, size_t max_hits_, bool descending_,
+ double max_filter_coverage_, double sample_percentage_, double post_filter_multiplier_)
+ : attribute(attribute_),
+ max_hits(max_hits_),
+ descending(descending_),
+ max_filter_coverage(max_filter_coverage_),
+ sample_percentage(sample_percentage_),
+ post_filter_multiplier(post_filter_multiplier_)
+ { }
+ bool enabled() const { return !attribute.empty() && (max_hits > 0); }
+ vespalib::string attribute;
+ size_t max_hits;
+ bool descending;
+ double max_filter_coverage;
+ double sample_percentage;
+ double post_filter_multiplier;
+};
+
/**
* This class is is used when rank phase limiting is configured.
**/
@@ -103,14 +138,7 @@ public:
MatchPhaseLimiter(uint32_t docIdLimit,
search::queryeval::Searchable &searchable_attributes,
search::queryeval::IRequestContext & requestContext,
- const vespalib::string &attribute_name,
- size_t max_hits, bool descending,
- double max_filter_coverage,
- double samplePercentage, double postFilterMultiplier,
- const vespalib::string &diversity_attribute,
- uint32_t diversity_min_groups,
- double diversify_cutoff_factor,
- AttributeLimiter::DiversityCutoffStrategy diversity_cutoff_strategy);
+ DegradationParams degradation, DiversityParams diversity);
bool is_enabled() const override { return true; }
bool was_limited() const override { return _limiter_factory.was_used(); }
size_t sample_hits_per_thread(size_t num_threads) const override {
@@ -121,6 +149,4 @@ public:
size_t getDocIdSpaceEstimate() const override;
};
-} // namespace proton::matching
-} // namespace proton
-
+}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index e7773c94d72..fe10c1d51c3 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -3,12 +3,16 @@
#include "match_tools.h"
#include "querynodes.h"
#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include <vespa/searchlib/attribute/diversity.h>
#include <vespa/log/log.h>
LOG_SETUP(".proton.matching.match_tools");
#include <vespa/searchlib/query/tree/querytreecreator.h>
using search::attribute::IAttributeContext;
using search::queryeval::IRequestContext;
+using search::queryeval::IDiversifier;
+using search::attribute::diversity::DiversityFilter;
+
using namespace search::fef;
using namespace search::fef::indexproperties::matchphase;
using namespace search::fef::indexproperties::matching;
@@ -37,6 +41,27 @@ void tag_match_data(const HandleRecorder::HandleSet &handles, MatchData &match_d
}
}
+DegradationParams
+extractDegradationParams(const RankSetup &rankSetup, const Properties &rankProperties)
+{
+ return DegradationParams(DegradationAttribute::lookup(rankProperties, rankSetup.getDegradationAttribute()),
+ DegradationMaxHits::lookup(rankProperties, rankSetup.getDegradationMaxHits()),
+ !DegradationAscendingOrder::lookup(rankProperties, rankSetup.isDegradationOrderAscending()),
+ DegradationMaxFilterCoverage::lookup(rankProperties, rankSetup.getDegradationMaxFilterCoverage()),
+ DegradationSamplePercentage::lookup(rankProperties, rankSetup.getDegradationSamplePercentage()),
+ DegradationPostFilterMultiplier::lookup(rankProperties, rankSetup.getDegradationPostFilterMultiplier()));
+
+}
+
+DiversityParams
+extractDiversityParams(const RankSetup &rankSetup, const Properties &rankProperties)
+{
+ return DiversityParams(DiversityAttribute::lookup(rankProperties, rankSetup.getDiversityAttribute()),
+ DiversityMinGroups::lookup(rankProperties, rankSetup.getDiversityMinGroups()),
+ DiversityCutoffFactor::lookup(rankProperties, rankSetup.getDiversityCutoffFactor()),
+ AttributeLimiter::toDiversityCutoffStrategy(DiversityCutoffStrategy::lookup(rankProperties, rankSetup.getDiversityCutoffStrategy())));
+}
+
} // namespace proton::matching::<unnamed>
void
@@ -138,9 +163,10 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
_queryEnv(indexEnv, attributeContext, rankProperties),
_mdl(),
_rankSetup(rankSetup),
- _featureOverrides(featureOverrides)
+ _featureOverrides(featureOverrides),
+ _diversityParams(),
+ _valid(_query.buildTree(queryStack, location, viewResolver, indexEnv))
{
- _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv);
if (_valid) {
_query.extractTerms(_queryEnv.terms());
_query.extractLocations(_queryEnv.locations());
@@ -150,30 +176,12 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
_query.fetchPostings();
_query.freeze();
_rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore());
- vespalib::string limit_attribute = DegradationAttribute::lookup(rankProperties);
- size_t limit_maxhits = DegradationMaxHits::lookup(rankProperties);
- bool limit_ascending = DegradationAscendingOrder::lookup(rankProperties);
- double limit_max_filter_coverage = DegradationMaxFilterCoverage::lookup(rankProperties);
- double samplePercentage = DegradationSamplePercentage::lookup(rankProperties);
- double postFilterMultiplier = DegradationPostFilterMultiplier::lookup(rankProperties);
- vespalib::string diversity_attribute = DiversityAttribute::lookup(rankProperties);
- uint32_t diversity_min_groups = DiversityMinGroups::lookup(rankProperties);
- double diversity_cutoff_factor = DiversityCutoffFactor::lookup(rankProperties);
- vespalib::string diversity_cutoff_strategy = DiversityCutoffStrategy::lookup(rankProperties);
- if (!limit_attribute.empty() && limit_maxhits > 0) {
- _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), _requestContext,
- limit_attribute, limit_maxhits, !limit_ascending, limit_max_filter_coverage,
- samplePercentage, postFilterMultiplier,
- diversity_attribute, diversity_min_groups, diversity_cutoff_factor,
- AttributeLimiter::toDiversityCutoffStrategy(diversity_cutoff_strategy));
- } else if (_rankSetup.hasMatchPhaseDegradation()) {
- _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), _requestContext,
- _rankSetup.getDegradationAttribute(), _rankSetup.getDegradationMaxHits(), !_rankSetup.isDegradationOrderAscending(),
- _rankSetup.getDegradationMaxFilterCoverage(),
- _rankSetup.getDegradationSamplePercentage(), _rankSetup.getDegradationPostFilterMultiplier(),
- _rankSetup.getDiversityAttribute(), _rankSetup.getDiversityMinGroups(),
- _rankSetup.getDiversityCutoffFactor(),
- AttributeLimiter::toDiversityCutoffStrategy(_rankSetup.getDiversityCutoffStrategy()));
+ _diversityParams = extractDiversityParams(_rankSetup, rankProperties);
+ DegradationParams degradationParams = extractDegradationParams(_rankSetup, rankProperties);
+
+ if (degradationParams.enabled()) {
+ _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(),
+ _requestContext, degradationParams, _diversityParams);
}
}
if ( ! _match_limiter) {
@@ -191,4 +199,19 @@ MatchToolsFactory::createMatchTools() const
*_match_limiter, _queryEnv, _mdl, _rankSetup, _featureOverrides);
}
+std::unique_ptr<IDiversifier> MatchToolsFactory::createDiversifier() const
+{
+ if ( !_diversityParams.enabled() ) {
+ return std::unique_ptr<IDiversifier>();
+ }
+ auto attr = _requestContext.getAttribute(_diversityParams.attribute);
+ if ( !attr) {
+ LOG(warning, "Skipping diversity due to no %s attribute.", _diversityParams.attribute.c_str());
+ return std::unique_ptr<IDiversifier>();
+ }
+ size_t max_per_group = _rankSetup.getHeapSize()/_diversityParams.min_groups;
+ return DiversityFilter::create(*attr, _rankSetup.getHeapSize(), max_per_group, _diversityParams.min_groups,
+ _diversityParams.cutoff_strategy == DiversityParams::CutoffStrategy::STRICT);
+}
+
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
index f47eda16cc1..97baafb8bc3 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
@@ -16,6 +16,8 @@
#include <vespa/searchlib/queryeval/blueprint.h>
#include <vespa/searchlib/fef/fef.h>
#include <vespa/searchlib/common/idocumentmetastore.h>
+#include <vespa/searchlib/queryeval/idiversifier.h>
+
namespace proton::matching {
@@ -71,16 +73,17 @@ public:
class MatchToolsFactory : public vespalib::noncopyable
{
private:
- QueryLimiter & _queryLimiter;
- RequestContext _requestContext;
- const vespalib::Doom _hardDoom;
- Query _query;
- MaybeMatchPhaseLimiter::UP _match_limiter;
- QueryEnvironment _queryEnv;
- search::fef::MatchDataLayout _mdl;
- const search::fef::RankSetup & _rankSetup;
- const search::fef::Properties & _featureOverrides;
- bool _valid;
+ QueryLimiter & _queryLimiter;
+ RequestContext _requestContext;
+ const vespalib::Doom _hardDoom;
+ Query _query;
+ MaybeMatchPhaseLimiter::UP _match_limiter;
+ QueryEnvironment _queryEnv;
+ search::fef::MatchDataLayout _mdl;
+ const search::fef::RankSetup & _rankSetup;
+ const search::fef::Properties & _featureOverrides;
+ DiversityParams _diversityParams;
+ bool _valid;
public:
typedef std::unique_ptr<MatchToolsFactory> UP;
@@ -101,6 +104,7 @@ public:
bool valid() const { return _valid; }
const MaybeMatchPhaseLimiter &match_limiter() const { return *_match_limiter; }
MatchTools::UP createMatchTools() const;
+ std::unique_ptr<search::queryeval::IDiversifier> createDiversifier() const;
search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); }
bool has_first_phase_rank() const { return !_rankSetup.getFirstPhaseRank().empty(); }
};
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 493b49be0b0..69504c4cc71 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -272,63 +272,63 @@ const vespalib::string DiversityCutoffStrategy::NAME("vespa.matchphase.diversity
const vespalib::string DiversityCutoffStrategy::DEFAULT_VALUE("loose");
vespalib::string
-DegradationAttribute::lookup(const Properties &props)
+DegradationAttribute::lookup(const Properties &props, const vespalib::string & defaultValue)
{
- return lookupString(props, NAME, DEFAULT_VALUE);
+ return lookupString(props, NAME, defaultValue);
}
bool
-DegradationAscendingOrder::lookup(const Properties &props)
+DegradationAscendingOrder::lookup(const Properties &props, bool defaultValue)
{
- return lookupBool(props, NAME, DEFAULT_VALUE);
+ return lookupBool(props, NAME, defaultValue);
}
uint32_t
-DegradationMaxHits::lookup(const Properties &props)
+DegradationMaxHits::lookup(const Properties &props, uint32_t defaultValue)
{
- return lookupUint32(props, NAME, DEFAULT_VALUE);
+ return lookupUint32(props, NAME, defaultValue);
}
double
-DegradationSamplePercentage::lookup(const Properties &props)
+DegradationSamplePercentage::lookup(const Properties &props, double defaultValue)
{
- return lookupDouble(props, NAME, DEFAULT_VALUE);
+ return lookupDouble(props, NAME, defaultValue);
}
double
-DegradationMaxFilterCoverage::lookup(const Properties &props)
+DegradationMaxFilterCoverage::lookup(const Properties &props, double defaultValue)
{
- return lookupDouble(props, NAME, DEFAULT_VALUE);
+ return lookupDouble(props, NAME, defaultValue);
}
double
-DegradationPostFilterMultiplier::lookup(const Properties &props)
+DegradationPostFilterMultiplier::lookup(const Properties &props, double defaultValue)
{
- return lookupDouble(props, NAME, DEFAULT_VALUE);
+ return lookupDouble(props, NAME, defaultValue);
}
vespalib::string
-DiversityAttribute::lookup(const Properties &props)
+DiversityAttribute::lookup(const Properties &props, const vespalib::string & defaultValue)
{
- return lookupString(props, NAME, DEFAULT_VALUE);
+ return lookupString(props, NAME, defaultValue);
}
uint32_t
-DiversityMinGroups::lookup(const Properties &props)
+DiversityMinGroups::lookup(const Properties &props, uint32_t defaultValue)
{
- return lookupUint32(props, NAME, DEFAULT_VALUE);
+ return lookupUint32(props, NAME, defaultValue);
}
double
-DiversityCutoffFactor::lookup(const Properties &props)
+DiversityCutoffFactor::lookup(const Properties &props, double defaultValue)
{
- return lookupDouble(props, NAME, DEFAULT_VALUE);
+ return lookupDouble(props, NAME, defaultValue);
}
vespalib::string
-DiversityCutoffStrategy::lookup(const Properties &props)
+DiversityCutoffStrategy::lookup(const Properties &props, const vespalib::string & defaultValue)
{
- return lookupString(props, NAME, DEFAULT_VALUE);
+ return lookupString(props, NAME, defaultValue);
}
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index 38e0eca7548..5140e811e1c 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -174,7 +174,8 @@ namespace matchphase {
struct DegradationAttribute {
static const vespalib::string NAME;
static const vespalib::string DEFAULT_VALUE;
- static vespalib::string lookup(const Properties &props);
+ static vespalib::string lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static vespalib::string lookup(const Properties &props, const vespalib::string & defaultValue);
};
/**
@@ -183,7 +184,8 @@ namespace matchphase {
struct DegradationAscendingOrder {
static const vespalib::string NAME;
static const bool DEFAULT_VALUE;
- static bool lookup(const Properties &props);
+ static bool lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static bool lookup(const Properties &props, bool defaultValue);
};
/**
@@ -192,7 +194,8 @@ namespace matchphase {
struct DegradationMaxHits {
static const vespalib::string NAME;
static const uint32_t DEFAULT_VALUE;
- static uint32_t lookup(const Properties &props);
+ static uint32_t lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static uint32_t lookup(const Properties &props, uint32_t defaultValue);
};
/**
@@ -201,13 +204,15 @@ namespace matchphase {
struct DegradationSamplePercentage {
static const vespalib::string NAME;
static const double DEFAULT_VALUE;
- static double lookup(const Properties &props);
+ static double lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static double lookup(const Properties &props, double defaultValue);
};
struct DegradationMaxFilterCoverage {
static const vespalib::string NAME;
static const double DEFAULT_VALUE;
- static double lookup(const Properties &props);
+ static double lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static double lookup(const Properties &props, double defaultValue);
};
/**
@@ -217,7 +222,8 @@ namespace matchphase {
struct DegradationPostFilterMultiplier {
static const vespalib::string NAME;
static const double DEFAULT_VALUE;
- static double lookup(const Properties &props);
+ static double lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static double lookup(const Properties &props, double defaultValue);
};
/**
@@ -228,7 +234,8 @@ namespace matchphase {
struct DiversityAttribute {
static const vespalib::string NAME;
static const vespalib::string DEFAULT_VALUE;
- static vespalib::string lookup(const Properties &props);
+ static vespalib::string lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static vespalib::string lookup(const Properties &props, const vespalib::string & defaultValue);
};
/**
@@ -239,18 +246,21 @@ namespace matchphase {
struct DiversityMinGroups {
static const vespalib::string NAME;
static const uint32_t DEFAULT_VALUE;
- static uint32_t lookup(const Properties &props);
+ static uint32_t lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static uint32_t lookup(const Properties &props, uint32_t defaultValue);
};
struct DiversityCutoffFactor {
static const vespalib::string NAME;
static const double DEFAULT_VALUE;
- static double lookup(const Properties &props);
+ static double lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static double lookup(const Properties &props, double defaultValue);
};
struct DiversityCutoffStrategy {
static const vespalib::string NAME;
static const vespalib::string DEFAULT_VALUE;
- static vespalib::string lookup(const Properties &props);
+ static vespalib::string lookup(const Properties &props) { return lookup(props, DEFAULT_VALUE); }
+ static vespalib::string lookup(const Properties &props, const vespalib::string & defaultValue);
};
} // namespace matchphase