aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/queryeval/searchiterator.h
blob: f8124a161e2fb287dc6bc379499dea6d949fac01 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "posting_info.h"
#include "begin_and_end_id.h"
#include <vespa/vespalib/stllike/string.h>
#include <vespa/vespalib/util/trinary.h>
#include <memory>
#include <vector>

namespace vespalib { class ObjectVisitor; }
namespace vespalib::slime {
    struct Cursor;
    struct Inserter;
}

namespace search { class BitVector; }
namespace search::attribute { class ISearchContext; }

namespace search::queryeval {

/**
 * This is the abstract superclass of all search objects. Each search
 * object act as an iterator over documents that are results for the
 * subquery represented by that search object. Search objects will be
 * combined into a tree structure to perform query evaluation in
 * parallel. The unpack method is used to unpack match information for
 * a document. The placement and format of this match data is a
 * contract between the application and the leaf search objects and is
 * of no concern to the interface defined by this class.
 **/
class SearchIterator
{
private:
    using BitVectorUP = std::unique_ptr<BitVector>;
    /**
     * The current document id for this search object. This variable
     * will have a value that is either @ref beginId, @ref endId or a
     * document id representing a hit for this search object.
     **/
    uint32_t _docid;

    /**
     * This is the end of the the lidspace this iterator shall consider.
     */
    uint32_t _endid;

    void and_hits_into_strict(BitVector &result, uint32_t begin_id);
    void and_hits_into_non_strict(BitVector &result, uint32_t begin_id);
protected:
    /**
     * This method is used by the @ref doSeek method to indicate that
     * a document is a hit. This method is also used to indicate that
     * no more hits are available by using the @ref endId value.
     *
     * @param id docid for hit
     **/
    void setDocId(uint32_t id) noexcept { _docid = id; }

    /**
     * Used to adjust the end of the legal docid range.
     * Used by subclasses instead of a full initRange call.
     *
     * @param end_id the first docid outside the legal iterator range
     */
    void setEndId(uint32_t end_id) noexcept { _endid = end_id; }

    /**
     * Will terminate the iterator by setting it past the end.
     * Further calls to isAtEnd() will then return true.
     */
    void setAtEnd() noexcept { _docid = search::endDocId; }

public:
    using Trinary=vespalib::Trinary;
    // doSeek and doUnpack are called by templated classes, so making
    // them public to avoid complicated friend requests. Note that if
    // you call doSeek and doUnpack directly instead of using
    // seek/unpack, you are bypassing docid checks and need to know
    // what you are doing.

    /**
     * This method must be overridden to perform the actual seeking
     * for the concrete search class. The task of this method is to
     * check whether the given document id is a hit for this search
     * object. The current document id is changed with the @ref
     * setDocId method. When this method returns, the current document
     * id must have been updated as follows: if the candidate document
     * id was in fact a hit, this is now the new current document
     * id. If the candidate document id was not a hit, the method may
     * choose to either leave the current document id as is, or
     * increase it to indicate the next hit for this search object
     * (@ref endId being a valid value).
     *
     * @param docid hit candidate
     **/
    virtual void doSeek(uint32_t docid) = 0;

    /**
     * This method must be overridden to perform the actual unpacking
     * for the concrete search class. The task of this method is to
     * unpack match information for the given docid. This method can
     * assume that the given document is also the current position of
     * the iterator. This is checked by the @ref unpack method which
     * invokes this method.
     *
     * @param docid what docid to unpack match information for.
     **/
    virtual void doUnpack(uint32_t docid) = 0;

    /**
     * This sets the range the iterator shall work.
     * As soon as it reaches its limit it can stop.
     * Iterators can overload this one and do what it needs to do.
     * It must also rewind if instructed to do so.
     *
     * @param beginId This is the first valid docId and the lowest that will be given to doSeek.
     * @param endId This is the first docid after the valid range.
     */
    virtual void initRange(uint32_t begin_id, uint32_t end_id) {
        _docid = begin_id - 1;
        _endid = end_id;
    }

    /**
     * Will initialize the full range.
     **/
    void initFullRange() { initRange(1, search::endDocId); }

    /**
     * Find all hits in the currently searched range (specified by
     * initRange) and return them as a bitvector. This function will
     * perform term-at-a-time evaluation and should only be used for
     * terms not needed for ranking. Calling this function will
     * exhaust this iterator and no more results will be available in
     * the currently searched range after this function returns.
     *
     * @return bitvector with hits for this iterator
     * @param begin_id the lowest document id that may be a hit
     *                 (we do not remember beginId from initRange)
     **/
    virtual BitVectorUP get_hits(uint32_t begin_id);

    /**
     * Find all hits in the currently searched range (specified by
     * initRange) and OR them into the given temporary result. This
     * function will perform term-at-a-time evaluation and should only
     * be used for terms not needed for ranking. Calling this function
     * will exhaust this iterator and no more results will be
     * available in the currently searched range after this function
     * returns.
     *
     * @param result result to be augmented by adding hits from this
     *               iterator.
     * @param begin_id the lowest document id that may be a hit
     *                 (we might not remember beginId from initRange)
     **/
    virtual void or_hits_into(BitVector &result, uint32_t begin_id);

    /**
     * Find all hits in the currently searched range (specified by
     * initRange) and AND them into the given temporary result. This
     * function will perform term-at-a-time evaluation and should only
     * be used for terms not needed for ranking. Calling this function
     * will exhaust this iterator and no more results will be
     * available in the currently searched range after this function
     * returns.
     *
     * @param result result to be augmented by clearing non-hits from this
     *               iterator.
     * @param begin_id the lowest document id that may be a hit
     *                 (we might not remember beginId from initRange)
     **/
    virtual void and_hits_into(BitVector &result, uint32_t begin_id);

public:
    using UP = std::unique_ptr<SearchIterator>;

    /**
     * The constructor sets the current document id to @ref beginId.
     **/
    SearchIterator() noexcept : _docid(0), _endid(0) { }
    SearchIterator(const SearchIterator &) = delete;
    SearchIterator &operator=(const SearchIterator &) = delete;


    /**
     * Special value indicating that this searcher has not yet started
     * seeking through documents.  This must match beginId() in
     * search::fef::TermFieldMatchData class.
     *
     * @return constant
     **/
    static uint32_t beginId() noexcept { return beginDocId; }

    /**
     * Tell if the iterator has reached the end.
     *
     * @return true if the iterator has reached its end.
     **/
    bool isAtEnd() const noexcept { return isAtEnd(_docid); }
    bool isAtEnd(uint32_t docid) const noexcept {
        if (__builtin_expect(docid >= _endid, false)) {
            return true;
        }
        return false;
    }

    /**
     * Obtain the current document id for this search object. The
     * value is either @ref beginId, @ref endId or a document id
     * representing a hit for this search object.
     *
     * @return current document id
     **/
    uint32_t getDocId() const noexcept { return _docid; }

    uint32_t getEndId() const noexcept { return _endid; }

    /**
     * Check if the given document id is a hit. If it is a hit, the
     * current document id of this search object is set to the given
     * document id. If it is not a hit, the current document id is
     * either unchanged, set to the next hit, or set to @ref endId.
     *
     * @return true if the given document id is a hit.
     * @param docid hit candidate
     **/
    bool seek(uint32_t docid) {
        if (__builtin_expect(docid > _docid, true)) {
            doSeek(docid);
        }
        return (docid == _docid);
    }

    /**
     * Seek to the next docid and return it. Start with the one given.
     * With protection for going backWards.
     * Note that this requires the iterator to be strict.
     *
     * @return the first matching docid
     * @param docid hit candidate
     **/
    uint32_t seekFirst(uint32_t docid) {
        if (__builtin_expect(docid > _docid, true)) {
            doSeek(docid);
        }
        return _docid;
    }

    /**
     * Seek to the next docid and return it. Start with the one given.
     * Without protection for going backWards.
     * Note that this requires the iterator to be strict.
     *
     * @return the first matching docid
     * @param docid hit candidate
     **/
    uint32_t seekNext(uint32_t docid) {
        doSeek(docid);
        return _docid;
    }

    /**
     * Unpack hit information for the given docid if available. This
     * method may also change the current docid for this iterator.
     *
     * @param docid what docid to unpack match information for.
     **/
    void unpack(uint32_t docid) {
        if (__builtin_expect(seek(docid), true)) {
            doUnpack(docid);
        }
    }

    /**
     * Return global posting info associated with this search iterator.
     *
     * @return global posting info or NULL if no info is available.
     **/
    virtual const PostingInfo *getPostingInfo() const { return nullptr; }

    /**
     * Create a human-readable representation of this object. This
     * method will use object visitation internally to capture the
     * full structure of this object.
     *
     * @return structured human-readable representation of this object
     **/
    vespalib::string asString() const;

    /**
    * Create a slime representation of this object. This
    * method will use object visitation internally to capture the
    * full structure of this object.
    *
    * @return structured slime representation of this object
    **/
    vespalib::slime::Cursor & asSlime(const vespalib::slime::Inserter & cursor) const;

    /**
     * Obtain the fully qualified name of the concrete class for this
     * object. The default implementation will perform automatic name
     * resolving. There is only a need to override this function if
     * you want to impersonate another class.
     *
     * @return fully qualified class name
     **/
    virtual vespalib::string getClassName() const;

    /**
     * Visit each of the members of this object. This method should be
     * overridden by subclasses and should present all appropriate
     * internal structure of this object to the given visitor. Note
     * that while each level of a class hierarchy may cooperate to
     * visit all object members (invoking superclass method within
     * method), this method, as implemented in the SearchIterator class
     * should not be invoked, since its default implementation is
     * there to signal about the method not being overridden.
     *
     * @param visitor the visitor of this object
     **/
    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;

    /**
     * Empty, just defined to make it virtual.
     **/
    virtual ~SearchIterator() = default;

    /**
     * @return true if it is a bitvector
     */
    class BitVectorMeta {
    public:
        BitVectorMeta() noexcept : BitVectorMeta(nullptr, 0, false) {}
        BitVectorMeta(const BitVector * bv, uint32_t docidLimit, bool inverted_in) noexcept
            : _bv(bv), _docidLimit(docidLimit), _inverted(inverted_in)
        {}
        const BitVector * vector() const noexcept { return _bv; }
        bool inverted () const noexcept { return _inverted; }
        uint32_t getDocidLimit() const noexcept { return _docidLimit; }
        bool valid() const noexcept { return _bv != nullptr; }
    private:
        const BitVector * _bv;
        uint32_t          _docidLimit;
        bool              _inverted;
    };
    bool isBitVector() const noexcept { return asBitVector().valid(); }
    virtual BitVectorMeta asBitVector() const noexcept { return {}; }
    /**
     * @return true if it is a source blender
     */
    virtual bool isSourceBlender() const { return false; }
    /**
     * @return true if it is a multi search
     */
    virtual bool isMultiSearch() const { return false; }

    /**
     * This is used for adding an extra filter. If it is accepted it will return an empty UP.
     * If not you will get in in return. Currently it will only be accepted by a
     * MultiBitVector<And> with a pure 'and' path down if it is an BitVector,
     * or by a strict AND with a pure 'and' path. Be careful if you you plan to steal the filter.
     *
     * @param filter the searchiterator that is an extra filter.
     * @param estimate is the number of hits this filter is expected to produce.
     * @return the given filter or empty if it has been consumed.
     **/
    virtual UP andWith(UP filter, uint32_t estimate);

    virtual Trinary is_strict() const { return Trinary::Undefined; }

    // will this iterator match any document?
    // (True -> all documents match)
    // (False -> no documents match)
    // (Undefined -> use seek to find out)
    // number of matches: (False <= Undefined <= True)
    virtual Trinary matches_any() const { return Trinary::Undefined; }

    // Disclose children by giving out references to owning
    // pointers. This allows re-wiring from the outside, which is
    // needed for deep decoration used by match profiling. Only
    // disclose children that are treated as generic SearchIterators.
    virtual void disclose_children(std::vector<UP*> &dst);
};

}

void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
           const search::queryeval::SearchIterator &obj);
void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
           const search::queryeval::SearchIterator *obj);