aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore/src/vespa/searchcore/proton/matching/match_phase_limiter.h
blob: 7f61723c0ce340fa1040782f5426f13762c125de (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "match_phase_limit_calculator.h"
#include "attribute_limiter.h"
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <atomic>

namespace proton::matching {

class RangeQueryLocator;

class LimitedSearch : public search::queryeval::SearchIterator {
public:
    LimitedSearch(SearchIterator::UP first, SearchIterator::UP second) :
        _first(std::move(first)),
        _second(std::move(second))
    {
    }
    void doSeek(uint32_t docId) override;
    void initRange(uint32_t begin, uint32_t end) override;
    void visitMembers(vespalib::ObjectVisitor &visitor) const override;
    const SearchIterator &  getFirst() const { return *_first; }
    const SearchIterator & getSecond() const { return *_second; }
    SearchIterator &  getFirst() { return *_first; }
    SearchIterator & getSecond() { return *_second; }
private:
    SearchIterator::UP _first;
    SearchIterator::UP _second;
};

/**
 * Interface defining how we intend to use the match phase limiter
 * functionality. The first step is to check whether we should enable
 * this functionality at all. If enabled; we need to match some hits
 * in each match thread for estimation purposes. The total number of
 * matches (hits) and the total document space searched (docs) are
 * aggregated across all match threads and each match thread will use
 * the maybe_limit function to possibly augment its iterator tree to
 * limit the number of matches.
 **/
struct MaybeMatchPhaseLimiter {
    using Cursor = vespalib::slime::Cursor;
    using SearchIterator = search::queryeval::SearchIterator;
    using UP = std::unique_ptr<MaybeMatchPhaseLimiter>;
    virtual bool is_enabled() const = 0;
    virtual bool was_limited() const = 0;
    virtual size_t sample_hits_per_thread(size_t num_threads) const = 0;
    virtual SearchIterator::UP maybe_limit(SearchIterator::UP search, double match_freq, size_t num_docs, Cursor * trace) = 0;
    virtual void updateDocIdSpaceEstimate(size_t searchedDocIdSpace, size_t remainingDocIdSpace) = 0;
    virtual size_t getDocIdSpaceEstimate() const = 0;
    virtual ~MaybeMatchPhaseLimiter() = default;
};

/**
 * This class is used when match phase limiting is not configured.
 **/
struct NoMatchPhaseLimiter : MaybeMatchPhaseLimiter {
    bool is_enabled() const override { return false; }
    bool was_limited() const override { return false; }
    size_t sample_hits_per_thread(size_t) const override { return 0; }
    SearchIterator::UP maybe_limit(SearchIterator::UP search, double, size_t, Cursor *) override {
        return search;
    }
    void updateDocIdSpaceEstimate(size_t, size_t) override { }
    size_t getDocIdSpaceEstimate() const override { return std::numeric_limits<size_t>::max(); }
};

struct DiversityParams {
    using CutoffStrategy = AttributeLimiter::DiversityCutoffStrategy;
    DiversityParams() : DiversityParams("", 0, 0, CutoffStrategy::LOOSE) { }
    DiversityParams(const vespalib::string & attribute_, uint32_t min_groups_,
                    double cutoff_factor_, CutoffStrategy cutoff_strategy_)
        : attribute(attribute_),
          min_groups(min_groups_),
          cutoff_factor(cutoff_factor_),
          cutoff_strategy(cutoff_strategy_)
    { }
    bool enabled() const { return !attribute.empty() && (min_groups > 0); }
    vespalib::string  attribute;
    uint32_t          min_groups;
    double            cutoff_factor;
    CutoffStrategy    cutoff_strategy;
};

struct DegradationParams {
    DegradationParams(const vespalib::string &attribute_, size_t max_hits_, bool descending_,
                      double max_filter_coverage_, double sample_percentage_, double post_filter_multiplier_)
        : attribute(attribute_),
          descending(descending_),
          max_hits(max_hits_),
          max_filter_coverage(max_filter_coverage_),
          sample_percentage(sample_percentage_),
          post_filter_multiplier(post_filter_multiplier_)
    { }
    bool enabled() const { return !attribute.empty() && (max_hits > 0); }
    vespalib::string attribute;
    bool             descending;
    size_t           max_hits;
    double           max_filter_coverage;
    double           sample_percentage;
    double           post_filter_multiplier;
};

/**
 * This class is is used when rank phase limiting is configured.
 **/
class MatchPhaseLimiter : public MaybeMatchPhaseLimiter
{
private:
    class Coverage {
    public:
        explicit Coverage(uint32_t docIdLimit) :
            _docIdLimit(docIdLimit),
            _searched(0)
        { }
        void update(size_t searched, size_t remaining, ssize_t hits) {
            if (hits >= 0) {
                _searched += (searched + (hits*remaining)/_docIdLimit);
            } else {
                _searched += (searched + remaining);
            }
        }
        uint32_t getEstimate() const { return _searched; }
    private:
        const uint32_t        _docIdLimit;
        std::atomic<uint32_t> _searched;
    };
    const double              _postFilterMultiplier;
    const double              _maxFilterCoverage;
    MatchPhaseLimitCalculator _calculator;
    AttributeLimiter          _limiter_factory;
    Coverage                  _coverage;


public:
    MatchPhaseLimiter(uint32_t docIdLimit,
                      const RangeQueryLocator & rangeQueryLocator,
                      search::queryeval::Searchable &searchable_attributes,
                      search::queryeval::IRequestContext & requestContext,
                      const DegradationParams & degradation,
                      const DiversityParams & diversity);
    ~MatchPhaseLimiter() override;
    bool is_enabled() const override { return true; }
    bool was_limited() const override { return _limiter_factory.was_used(); }
    size_t sample_hits_per_thread(size_t num_threads) const override {
        return _calculator.sample_hits_per_thread(num_threads);
    }
    SearchIterator::UP maybe_limit(SearchIterator::UP search, double match_freq, size_t num_docs, Cursor * trace) override;
    void updateDocIdSpaceEstimate(size_t searchedDocIdSpace, size_t remainingDocIdSpace) override;
    size_t getDocIdSpaceEstimate() const override;
};

}