aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@oath.com>2018-08-09 15:37:04 +0200
committerHenning Baldersheim <balder@oath.com>2018-08-09 15:37:46 +0200
commit49b98a062990920ef905832ac85d277269a07361 (patch)
tree74946595135a3ba8c0e1b849adb61668a3915e18
parenta6f9ea782e9921498c6a890697824d7d16065f24 (diff)
Create diversity filter on demand if diversity is specified.
-rw-r--r--searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp20
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h2
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_master.cpp2
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp34
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h50
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp49
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.h24
7 files changed, 114 insertions, 67 deletions
diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
index b153b2ca5e0..d1e7adfedb8 100644
--- a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp
@@ -209,7 +209,9 @@ TEST("require that no limiter has no behavior") {
TEST("require that the match phase limiter may chose not to limit the query") {
FakeRequestContext requestContext;
MockSearchable searchable;
- MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 1000, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE);
+ MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 1000, true, 1.0, 0.2, 1.0),
+ DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE));
MaybeMatchPhaseLimiter &limiter = yes_limiter;
EXPECT_TRUE(limiter.is_enabled());
EXPECT_EQUAL(20u, limiter.sample_hits_per_thread(10));
@@ -229,7 +231,9 @@ struct MaxFilterCoverageLimiterFixture {
MockSearchable searchable;
MatchPhaseLimiter::UP getMaxFilterCoverageLimiter() {
- MatchPhaseLimiter::UP yes_limiter(new MatchPhaseLimiter(10000, searchable, requestContext, "limiter_attribute", 10000, true, 0.05, 1.0, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE));
+ auto yes_limiter = std::make_unique<MatchPhaseLimiter>(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 10000, true, 0.05, 1.0, 1.0),
+ DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE));
MaybeMatchPhaseLimiter &limiter = *yes_limiter;
EXPECT_TRUE(limiter.is_enabled());
EXPECT_EQUAL(1000u, limiter.sample_hits_per_thread(10));
@@ -271,7 +275,9 @@ TEST_F("require that the match phase limiter may chose to limit the query even w
TEST("require that the match phase limiter is able to pre-limit the query") {
FakeRequestContext requestContext;
MockSearchable searchable;
- MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE);
+ MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 500, true, 1.0, 0.2, 1.0),
+ DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE));
MaybeMatchPhaseLimiter &limiter = yes_limiter;
EXPECT_TRUE(limiter.is_enabled());
EXPECT_EQUAL(12u, limiter.sample_hits_per_thread(10));
@@ -301,7 +307,9 @@ TEST("require that the match phase limiter is able to pre-limit the query") {
TEST("require that the match phase limiter is able to post-limit the query") {
MockSearchable searchable;
FakeRequestContext requestContext;
- MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 1500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE);
+ MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 1500, true, 1.0, 0.2, 1.0),
+ DiversityParams("", 1, 10.0, AttributeLimiter::LOOSE));
MaybeMatchPhaseLimiter &limiter = yes_limiter;
EXPECT_TRUE(limiter.is_enabled());
EXPECT_EQUAL(30u, limiter.sample_hits_per_thread(10));
@@ -331,7 +339,9 @@ void verifyDiversity(AttributeLimiter::DiversityCutoffStrategy strategy)
{
MockSearchable searchable;
FakeRequestContext requestContext;
- MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 500, true, 1.0, 0.2, 1.0, "category", 10, 13.1, strategy);
+ MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,
+ DegradationParams("limiter_attribute", 500, true, 1.0, 0.2, 1.0),
+ DiversityParams("category", 10, 13.1, strategy));
MaybeMatchPhaseLimiter &limiter = yes_limiter;
SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), 0.1, 100000);
limiter.updateDocIdSpaceEstimate(1000, 9000);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h
index c50a6e0dcb8..0c23ea05fbd 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/attribute_limiter.h
@@ -11,7 +11,7 @@
#include <mutex>
namespace proton::matching {
-
+
/**
* This class is responsible for creating attribute-based search
* iterators that are used to limit the search space. Each search
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
index 920f84a21b0..b37f2c002b6 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
@@ -62,7 +62,7 @@ MatchMaster::match(const MatchParams &params,
fastos::StopWatch query_latency_time;
query_latency_time.start();
vespalib::DualMergeDirector mergeDirector(threadBundle.size());
- MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize);
+ MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, matchToolsFactory.createDiversifier());
TimedMatchLoopCommunicator timedCommunicator(communicator);
DocidRangeScheduler::UP scheduler = createScheduler(threadBundle.size(), numSearchPartitions, params.numDocs);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
index d6eb62c3d3e..9834432fecd 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.cpp
@@ -12,8 +12,7 @@ using search::queryeval::IRequestContext;
using search::queryeval::AndSearchStrict;
using search::queryeval::NoUnpack;
-namespace proton {
-namespace matching {
+namespace proton::matching {
namespace {
@@ -68,25 +67,16 @@ LimitedSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
visit(visitor, "second", getSecond());
}
-MatchPhaseLimiter::MatchPhaseLimiter(uint32_t docIdLimit,
- Searchable &searchable_attributes,
+MatchPhaseLimiter::MatchPhaseLimiter(uint32_t docIdLimit, Searchable &searchable_attributes,
IRequestContext & requestContext,
- const vespalib::string &attribute_name,
- size_t max_hits, bool descending,
- double max_filter_coverage,
- double samplePercentage, double postFilterMultiplier,
- const vespalib::string &diversity_attribute,
- uint32_t diversity_min_groups,
- double diversify_cutoff_factor,
- AttributeLimiter::DiversityCutoffStrategy diversity_cutoff_strategy)
- : _postFilterMultiplier(postFilterMultiplier),
- _maxFilterCoverage(max_filter_coverage),
- _calculator(max_hits, diversity_min_groups, samplePercentage),
- _limiter_factory(searchable_attributes, requestContext, attribute_name, descending,
- diversity_attribute, diversify_cutoff_factor, diversity_cutoff_strategy),
+ DegradationParams degradation, DiversityParams diversity)
+ : _postFilterMultiplier(degradation._post_filter_multiplier),
+ _maxFilterCoverage(degradation._max_filter_coverage),
+ _calculator(degradation._max_hits, diversity._min_groups, degradation._sample_percentage),
+ _limiter_factory(searchable_attributes, requestContext, degradation._attribute, degradation._descending,
+ diversity._attribute, diversity._cutoff_factor, diversity._cutoff_strategy),
_coverage(docIdLimit)
-{
-}
+{ }
namespace {
@@ -108,8 +98,7 @@ do_limit(AttributeLimiter &limiter_factory, SearchIterator::UP search,
} // namespace proton::matching::<unnamed>
SearchIterator::UP
-MatchPhaseLimiter::maybe_limit(SearchIterator::UP search,
- double match_freq, size_t num_docs)
+MatchPhaseLimiter::maybe_limit(SearchIterator::UP search, double match_freq, size_t num_docs)
{
size_t wanted_num_docs = _calculator.wanted_num_docs(match_freq);
size_t max_filter_docs = static_cast<size_t>(num_docs * _maxFilterCoverage);
@@ -145,5 +134,4 @@ MatchPhaseLimiter::getDocIdSpaceEstimate() const
return _coverage.getEstimate();
}
-} // namespace proton::matching
-} // namespace proton
+}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h
index 165762d5356..b0d62cb1281 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h
@@ -11,8 +11,7 @@
#include <vespa/searchlib/queryeval/blueprint.h>
#include <atomic>
-namespace proton {
-namespace matching {
+namespace proton::matching {
class LimitedSearch : public search::queryeval::SearchIterator {
public:
@@ -69,6 +68,40 @@ struct NoMatchPhaseLimiter : MaybeMatchPhaseLimiter {
size_t getDocIdSpaceEstimate() const override { return std::numeric_limits<size_t>::max(); }
};
+struct DiversityParams {
+ using CutoffStrategy = AttributeLimiter::DiversityCutoffStrategy;
+ DiversityParams(const vespalib::string & attribute, uint32_t min_groups,
+ double cutoff_factor, CutoffStrategy cutoff_strategy)
+ : _attribute(attribute),
+ _min_groups(min_groups),
+ _cutoff_factor(cutoff_factor),
+ _cutoff_strategy(cutoff_strategy)
+ { }
+
+ vespalib::string _attribute;
+ uint32_t _min_groups;
+ double _cutoff_factor;
+ CutoffStrategy _cutoff_strategy;
+};
+
+struct DegradationParams {
+ DegradationParams(const vespalib::string &attribute, size_t max_hits, bool descending, double max_filter_coverage,
+ double sample_percentage, double post_filter_multiplier)
+ : _attribute(attribute),
+ _max_hits(max_hits),
+ _descending(descending),
+ _max_filter_coverage(max_filter_coverage),
+ _sample_percentage(sample_percentage),
+ _post_filter_multiplier(post_filter_multiplier)
+ { }
+ vespalib::string _attribute;
+ size_t _max_hits;
+ bool _descending;
+ double _max_filter_coverage;
+ double _sample_percentage;
+ double _post_filter_multiplier;
+};
+
/**
* This class is is used when rank phase limiting is configured.
**/
@@ -103,14 +136,7 @@ public:
MatchPhaseLimiter(uint32_t docIdLimit,
search::queryeval::Searchable &searchable_attributes,
search::queryeval::IRequestContext & requestContext,
- const vespalib::string &attribute_name,
- size_t max_hits, bool descending,
- double max_filter_coverage,
- double samplePercentage, double postFilterMultiplier,
- const vespalib::string &diversity_attribute,
- uint32_t diversity_min_groups,
- double diversify_cutoff_factor,
- AttributeLimiter::DiversityCutoffStrategy diversity_cutoff_strategy);
+ DegradationParams degradation, DiversityParams diversity);
bool is_enabled() const override { return true; }
bool was_limited() const override { return _limiter_factory.was_used(); }
size_t sample_hits_per_thread(size_t num_threads) const override {
@@ -121,6 +147,4 @@ public:
size_t getDocIdSpaceEstimate() const override;
};
-} // namespace proton::matching
-} // namespace proton
-
+}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index e7773c94d72..b536144acf6 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -3,12 +3,16 @@
#include "match_tools.h"
#include "querynodes.h"
#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include <vespa/searchlib/attribute/diversity.h>
#include <vespa/log/log.h>
LOG_SETUP(".proton.matching.match_tools");
#include <vespa/searchlib/query/tree/querytreecreator.h>
using search::attribute::IAttributeContext;
using search::queryeval::IRequestContext;
+using search::queryeval::IDiversifier;
+using search::attribute::diversity::DiversityFilter;
+
using namespace search::fef;
using namespace search::fef::indexproperties::matchphase;
using namespace search::fef::indexproperties::matching;
@@ -138,9 +142,10 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
_queryEnv(indexEnv, attributeContext, rankProperties),
_mdl(),
_rankSetup(rankSetup),
- _featureOverrides(featureOverrides)
+ _featureOverrides(featureOverrides),
+ _diversityParams(),
+ _valid(_query.buildTree(queryStack, location, viewResolver, indexEnv))
{
- _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv);
if (_valid) {
_query.extractTerms(_queryEnv.terms());
_query.extractLocations(_queryEnv.locations());
@@ -161,19 +166,21 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
double diversity_cutoff_factor = DiversityCutoffFactor::lookup(rankProperties);
vespalib::string diversity_cutoff_strategy = DiversityCutoffStrategy::lookup(rankProperties);
if (!limit_attribute.empty() && limit_maxhits > 0) {
- _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), _requestContext,
- limit_attribute, limit_maxhits, !limit_ascending, limit_max_filter_coverage,
- samplePercentage, postFilterMultiplier,
- diversity_attribute, diversity_min_groups, diversity_cutoff_factor,
- AttributeLimiter::toDiversityCutoffStrategy(diversity_cutoff_strategy));
+ _diversityParams = std::make_unique<DiversityParams>(diversity_attribute, diversity_min_groups, diversity_cutoff_factor,
+ AttributeLimiter::toDiversityCutoffStrategy(diversity_cutoff_strategy));
+ DegradationParams degradationParams(limit_attribute, limit_maxhits, !limit_ascending, limit_max_filter_coverage,
+ samplePercentage, postFilterMultiplier);
+ _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(),
+ _requestContext, degradationParams, *_diversityParams);
} else if (_rankSetup.hasMatchPhaseDegradation()) {
- _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(), _requestContext,
- _rankSetup.getDegradationAttribute(), _rankSetup.getDegradationMaxHits(), !_rankSetup.isDegradationOrderAscending(),
- _rankSetup.getDegradationMaxFilterCoverage(),
- _rankSetup.getDegradationSamplePercentage(), _rankSetup.getDegradationPostFilterMultiplier(),
- _rankSetup.getDiversityAttribute(), _rankSetup.getDiversityMinGroups(),
- _rankSetup.getDiversityCutoffFactor(),
- AttributeLimiter::toDiversityCutoffStrategy(_rankSetup.getDiversityCutoffStrategy()));
+ _diversityParams = std::make_unique<DiversityParams>(_rankSetup.getDiversityAttribute(), _rankSetup.getDiversityMinGroups(),
+ _rankSetup.getDiversityCutoffFactor(),
+ AttributeLimiter::toDiversityCutoffStrategy(_rankSetup.getDiversityCutoffStrategy()));
+ DegradationParams degradationParams(_rankSetup.getDegradationAttribute(), _rankSetup.getDegradationMaxHits(), !_rankSetup.isDegradationOrderAscending(),
+ _rankSetup.getDegradationMaxFilterCoverage(), _rankSetup.getDegradationSamplePercentage(),
+ _rankSetup.getDegradationPostFilterMultiplier());
+ _match_limiter = std::make_unique<MatchPhaseLimiter>(metaStore.getCommittedDocIdLimit(), searchContext.getAttributes(),
+ _requestContext, degradationParams, *_diversityParams);
}
}
if ( ! _match_limiter) {
@@ -191,4 +198,18 @@ MatchToolsFactory::createMatchTools() const
*_match_limiter, _queryEnv, _mdl, _rankSetup, _featureOverrides);
}
+std::unique_ptr<IDiversifier> MatchToolsFactory::createDiversifier() const
+{
+ if (!_diversityParams || _diversityParams->_attribute.empty()) {
+ return std::unique_ptr<IDiversifier>();
+ }
+ auto attr = _requestContext.getAttribute(_diversityParams->_attribute);
+ if ( !attr) {
+ LOG(warning, "Skipping diversity due to no %s attribute.", _diversityParams->_attribute.c_str());
+ return std::unique_ptr<IDiversifier>();
+ }
+ size_t max_per_group = _rankSetup.getHeapSize()/_diversityParams->_min_groups;
+ return DiversityFilter::create(*attr, _rankSetup.getHeapSize(), max_per_group, _diversityParams->_min_groups, true);
+}
+
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
index f47eda16cc1..33239f5b57f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
@@ -16,6 +16,8 @@
#include <vespa/searchlib/queryeval/blueprint.h>
#include <vespa/searchlib/fef/fef.h>
#include <vespa/searchlib/common/idocumentmetastore.h>
+#include <vespa/searchlib/queryeval/idiversifier.h>
+
namespace proton::matching {
@@ -71,16 +73,17 @@ public:
class MatchToolsFactory : public vespalib::noncopyable
{
private:
- QueryLimiter & _queryLimiter;
- RequestContext _requestContext;
- const vespalib::Doom _hardDoom;
- Query _query;
- MaybeMatchPhaseLimiter::UP _match_limiter;
- QueryEnvironment _queryEnv;
- search::fef::MatchDataLayout _mdl;
- const search::fef::RankSetup & _rankSetup;
- const search::fef::Properties & _featureOverrides;
- bool _valid;
+ QueryLimiter & _queryLimiter;
+ RequestContext _requestContext;
+ const vespalib::Doom _hardDoom;
+ Query _query;
+ MaybeMatchPhaseLimiter::UP _match_limiter;
+ QueryEnvironment _queryEnv;
+ search::fef::MatchDataLayout _mdl;
+ const search::fef::RankSetup & _rankSetup;
+ const search::fef::Properties & _featureOverrides;
+ std::unique_ptr<DiversityParams> _diversityParams;
+ bool _valid;
public:
typedef std::unique_ptr<MatchToolsFactory> UP;
@@ -101,6 +104,7 @@ public:
bool valid() const { return _valid; }
const MaybeMatchPhaseLimiter &match_limiter() const { return *_match_limiter; }
MatchTools::UP createMatchTools() const;
+ std::unique_ptr<search::queryeval::IDiversifier> createDiversifier() const;
search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); }
bool has_first_phase_rank() const { return !_rankSetup.getFirstPhaseRank().empty(); }
};