summaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
blob: e64c41f814ff9240f8a7aa1c2fa2c959810633f2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <vespa/document/fieldvalue/iteratorhandler.h>
#include <vespa/searchlib/query/streaming/query.h>
#include <vespa/vsm/common/document.h>
#include <vespa/vsm/common/storagedocument.h>
#include <vespa/vespalib/util/array.h>

namespace search::fef { class IQueryEnvironment; }

namespace vsm {

using termcount_t = size_t;
using termsize_t = size_t;

using ucs4_t = uint32_t;
using cmptype_t = ucs4_t;
using SearcherBuf = vespalib::Array<cmptype_t>;
using SharedSearcherBuf = std::shared_ptr<SearcherBuf>;

class FieldSearcherBase
{
protected:
    FieldSearcherBase() noexcept;
    FieldSearcherBase(const FieldSearcherBase & org);
    virtual ~FieldSearcherBase();
    FieldSearcherBase & operator = (const FieldSearcherBase & org) = delete;
    void prepare(const search::streaming::QueryTermList & qtl);
protected:
    search::streaming::QueryTermList _qtl;
};

class FieldSearcher : public FieldSearcherBase
{
public:
    enum MatchType {
        REGULAR,
        PREFIX,
        SUBSTRING,
        SUFFIX,
        EXACT
    };

    explicit FieldSearcher(FieldIdT fId) noexcept : FieldSearcher(fId, false) {}
    FieldSearcher(FieldIdT fId, bool defaultPrefix) noexcept;
    ~FieldSearcher() override;
    virtual std::unique_ptr<FieldSearcher> duplicate() const = 0;
    bool search(const StorageDocument & doc);
    virtual void prepare(search::streaming::QueryTermList& qtl, const SharedSearcherBuf& buf,
                         const vsm::FieldPathMapT& field_paths, search::fef::IQueryEnvironment& query_env);

    FieldIdT field()                 const { return _field; }
    void field(FieldIdT v)                 { _field = v; prepareFieldId(); }
    bool prefix()                    const { return _matchType == PREFIX; }
    bool substring()                 const { return _matchType == SUBSTRING; }
    bool suffix()                    const { return _matchType == SUFFIX; }
    bool exact()                     const { return _matchType == EXACT; }
    void setMatchType(MatchType mt)        { _matchType = mt; }
    static void init();
    static search::byte fold(search::byte c)               { return _foldLowCase[c]; }
    static search::byte iswordchar(search::byte c)         { return _wordChar[c]; }
    static search::byte isspace(search::byte c)            { return ! iswordchar(c); }
    static size_t countWords(const FieldRef & f);
    int32_t getCurrentWeight()       const { return _currentElementWeight; }
    void zeroStat();
    FieldSearcher & maxFieldLength(uint32_t maxFieldLength_) { _maxFieldLength = maxFieldLength_; return *this; }
    size_t maxFieldLength() const { return _maxFieldLength; }

private:
    class IteratorHandler : public document::fieldvalue::IteratorHandler {
    private:
        FieldSearcher & _searcher;

        void onPrimitive(uint32_t fid, const Content & c) override;
        void onCollectionStart(const Content & c) override;
        void onStructStart(const Content & c) override;

    public:
        explicit IteratorHandler(FieldSearcher & searcher) noexcept : _searcher(searcher) {}
    };
    friend class IteratorHandler; // to allow calls to onValue();

    void prepareFieldId();
    void setCurrentWeight(int32_t weight) { _currentElementWeight = weight; }
    void setCurrentElementId(int32_t weight) { _currentElementId = weight; }
    bool onSearch(const StorageDocument & doc);
    virtual void onValue(const document::FieldValue & fv) = 0;
    virtual void onStructValue(const document::StructFieldValue &) { }
    FieldIdT      _field;
    MatchType     _matchType;
    unsigned      _maxFieldLength;
    uint32_t      _currentElementId;
    int32_t       _currentElementWeight; // Contains the weight of the current item being evaluated.
protected:
    /// Number of terms searched.
    unsigned _words;
    /// Number of utf8 bytes by utf8 size.
    unsigned _badUtf8Count;
    unsigned _zeroCount;
protected:
    /**
     * Adds a hit to the given query term.
     * For each call to onValue() a batch of words are processed, and the position is local to this batch.
     **/
    void addHit(search::streaming::QueryTerm & qt, uint32_t pos) const {
        qt.add(_words + pos, field(), _currentElementId, getCurrentWeight());
    }
public:
    static search::byte _foldLowCase[256];
    static search::byte _wordChar[256];
};

using FieldSearcherContainer = std::unique_ptr<FieldSearcher>;
using FieldIdTSearcherMapT = std::vector<FieldSearcherContainer>;

class FieldIdTSearcherMap : public FieldIdTSearcherMapT
{
public:
    void prepare(const DocumentTypeIndexFieldMapT& difm, const SharedSearcherBuf& searcherBuf,
                 search::streaming::Query& query, const vsm::FieldPathMapT& field_paths,
                 search::fef::IQueryEnvironment& query_env);
};

}