aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/queryeval/nearsearch.h
blob: 3a2f98a448d5cbbf433bee339f82139a29841443 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include "andsearch.h"

namespace search::queryeval {

/**
 * The near search base implements the common logic of the near and o-near search.
 */
class NearSearchBase : public AndSearch
{
protected:
    uint32_t _data_size;
    uint32_t _window;
    bool     _strict;

    using TermFieldMatchDataArray = search::fef::TermFieldMatchDataArray;

    class MatcherBase
    {
    private:
        uint32_t                _window;
        TermFieldMatchDataArray _inputs;
    protected:
        uint32_t window() const { return _window; }
        const TermFieldMatchDataArray &inputs() const { return _inputs; }
    public:
        MatcherBase(uint32_t win, uint32_t fieldId, const TermFieldMatchDataArray &in)
            : _window(win),
              _inputs()
        {
            for (size_t i = 0; i < in.size(); ++i) {
                if (in[i]->getFieldId() == fieldId) {
                    _inputs.add(in[i]);
                }
            }
        }
    };

    /**
     * Typedef the list of positions iterators because it takes far too much space to write out :-)
     */
    using PositionsIteratorList = std::vector<search::fef::TermFieldMatchData::PositionsIterator>;

    /**
     * Returns whether or not given document matches. This should only be called when all child terms are all
     * at the same document.
     *
     * @param docId The document for which we are checking.
     * @return True if the document matches.
     */
    virtual bool match(uint32_t docId) = 0;

    /**
     * Performs seek() on all child terms until a match is found. This method calls setDocId() to signal the
     * document found.
     *
     * @param docId The document id from which to start seeking.
     */
    void seekNext(uint32_t docId);

public:
    /**
     * Constructs a new search for the given term match data.
     *
     * @param terms  The iterators for all child terms.
     * @param data   The term match data objects for all child terms.
     * @param window The size of the window in which all terms must occur.
     * @param strict Whether or not to skip to next matching document if seek fails.
     */
    NearSearchBase(Children terms,
                   const TermFieldMatchDataArray &data,
                   uint32_t window,
                   bool strict);

    void visitMembers(vespalib::ObjectVisitor &visitor) const override;
    void doSeek(uint32_t docId) override;
};

/**
 * The near search matches only when all of its child terms occur within some given window size.
 */
class NearSearch : public NearSearchBase
{
private:
    struct Matcher : public NearSearchBase::MatcherBase
    {
        Matcher(uint32_t win, uint32_t fieldId, const TermFieldMatchDataArray &in)
            : MatcherBase(win, fieldId, in) {}
        bool match(uint32_t docId);
    };

    std::vector<Matcher> _matchers;
    bool match(uint32_t docId) override;

public:
    /**
     * Constructs a new search for the given term match data.
     *
     * @param terms  The iterators for all child terms.
     * @param data   The term match data objects for all child terms.
     * @param window The size of the window in which all terms must occur.
     * @param strict Whether or not to skip to next matching document if seek fails.
     */
    NearSearch(Children terms,
               const TermFieldMatchDataArray &data,
               uint32_t window,
               bool strict = true);
};

/**
 * The o-near search matches only when all of its child terms occur within some given window size, in the
 * same order as they appear as children of this.
 */
class ONearSearch : public NearSearchBase
{
private:
    struct Matcher : public NearSearchBase::MatcherBase
    {
        Matcher(uint32_t win, uint32_t fieldId, const TermFieldMatchDataArray &in)
            : MatcherBase(win, fieldId, in) {}
        bool match(uint32_t docId);
    };

    std::vector<Matcher> _matchers;
    bool match(uint32_t docId) override;

public:
    /**
     * Constructs a new search for the given term match data.
     *
     * @param terms  The iterators for all child terms.
     * @param data   The term match data objects for all child terms.
     * @param window The size of the window in which all terms must occur.
     * @param strict Whether or not to skip to next matching document if seek fails.
     */
    ONearSearch(Children terms,
                const TermFieldMatchDataArray &data,
                uint32_t window,
                bool strict = true);

};

}