aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/query/tree/termnodes.h
blob: 578213a15ddb5e2ce9bd6685d805c7ec78229179 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "location.h"
#include "predicate_query_term.h"
#include "querynodemixin.h"
#include "range.h"
#include "term.h"
#include "const_bool_nodes.h"

namespace search::query {

using StringBase = TermBase<vespalib::string>;

class NumberTerm : public QueryNodeMixin<NumberTerm, StringBase >
{
public:
    NumberTerm(Type term, vespalib::stringref view, int32_t id, Weight weight)
        : QueryNodeMixinType(term, view, id, weight) {}
    virtual ~NumberTerm() = 0;
};

//-----------------------------------------------------------------------------

class PrefixTerm : public QueryNodeMixin<PrefixTerm, StringBase >
{
public:
    PrefixTerm(const Type &term, vespalib::stringref view,
               int32_t id, Weight weight)
        : QueryNodeMixinType(term, view, id, weight)
    {}
    virtual ~PrefixTerm() = 0;
};

//-----------------------------------------------------------------------------

class RangeTerm : public QueryNodeMixin<RangeTerm, TermBase<Range> >
{
public:
    RangeTerm(const Type& term, vespalib::stringref view,
              int32_t id, Weight weight)
        : QueryNodeMixinType(term, view, id, weight)
    {}
    virtual ~RangeTerm() = 0;
};

//-----------------------------------------------------------------------------

class StringTerm : public QueryNodeMixin<StringTerm, StringBase >
{
public:
    StringTerm(const Type &term, vespalib::stringref view, int32_t id, Weight weight);
    virtual ~StringTerm() = 0;
};

//-----------------------------------------------------------------------------

class SubstringTerm : public QueryNodeMixin<SubstringTerm, StringBase >
{
 public:
    SubstringTerm(const Type &term, vespalib::stringref view,
                  int32_t id, Weight weight)
        : QueryNodeMixinType(term, view, id, weight)
    {}
    virtual ~SubstringTerm() = 0;
};

//-----------------------------------------------------------------------------

class SuffixTerm : public QueryNodeMixin<SuffixTerm, StringBase >
{
public:
    SuffixTerm(const Type &term, vespalib::stringref view,
               int32_t id, Weight weight)
        : QueryNodeMixinType(term, view, id, weight)
    {}
    virtual ~SuffixTerm() = 0;
};

//-----------------------------------------------------------------------------

class LocationTerm : public QueryNodeMixin<LocationTerm, TermBase<Location> >
{
public:
    LocationTerm(const Type &term, vespalib::stringref view,
                 int32_t id, Weight weight)
        : QueryNodeMixinType(term, view, id, weight)
    {}
    bool isLocationTerm() const override { return true; }
    virtual ~LocationTerm() = 0;
};

//-----------------------------------------------------------------------------

class PredicateQuery : public QueryNodeMixin<PredicateQuery,
                                             TermBase<PredicateQueryTerm::UP> >
{
public:
    PredicateQuery(PredicateQueryTerm::UP term, vespalib::stringref view,
                   int32_t id, Weight weight)
        : QueryNodeMixinType(std::move(term), view, id, weight)
    {}
};

//-----------------------------------------------------------------------------

class RegExpTerm : public QueryNodeMixin<RegExpTerm, StringBase>
{
public:
    RegExpTerm(const Type &term, vespalib::stringref view,
               int32_t id, Weight weight)
        : QueryNodeMixinType(term, view, id, weight)
    {}
    virtual ~RegExpTerm() = 0;
};

//-----------------------------------------------------------------------------

class FuzzyTerm : public QueryNodeMixin<FuzzyTerm, StringBase> {
private:
    uint32_t _maxEditDistance;
    uint32_t _prefixLength;
public:
    FuzzyTerm(const Type &term, vespalib::stringref view,
               int32_t id, Weight weight, uint32_t maxEditDistance, uint32_t prefixLength)
            : QueryNodeMixinType(term, view, id, weight),
              _maxEditDistance(maxEditDistance),
              _prefixLength(prefixLength)
    {}

    uint32_t getMaxEditDistance() const { return _maxEditDistance; }
    uint32_t getPrefixLength() const { return _prefixLength; }

    virtual ~FuzzyTerm() = 0;
};

/**
 * Term matching the K nearest neighbors in a multi-dimensional vector space.
 *
 * The query point is specified as a dense tensor of order 1.
 * This is found in fef::IQueryEnvironment using the query tensor name as key.
 * The field name is the name of a dense document tensor of order 1.
 * Both tensors are validated to have the same tensor type before the query is sent to the backend.
 *
 * Target num hits (K) is a hint to how many neighbors to return.
 * The actual returned number might be higher (or lower if the query returns fewer hits).
 */
class NearestNeighborTerm : public QueryNodeMixin<NearestNeighborTerm, TermNode> {
private:
    vespalib::string _query_tensor_name;
    uint32_t _target_num_hits;
    bool _allow_approximate;
    uint32_t _explore_additional_hits;
    double _distance_threshold;

public:
    NearestNeighborTerm(vespalib::stringref query_tensor_name, vespalib::stringref field_name,
                        int32_t id, Weight weight, uint32_t target_num_hits,
                        bool allow_approximate, uint32_t explore_additional_hits,
                        double distance_threshold)
        : QueryNodeMixinType(field_name, id, weight),
          _query_tensor_name(query_tensor_name),
          _target_num_hits(target_num_hits),
          _allow_approximate(allow_approximate),
          _explore_additional_hits(explore_additional_hits),
          _distance_threshold(distance_threshold)
    {}
    virtual ~NearestNeighborTerm() {}
    const vespalib::string& get_query_tensor_name() const { return _query_tensor_name; }
    uint32_t get_target_num_hits() const { return _target_num_hits; }
    bool get_allow_approximate() const { return _allow_approximate; }
    uint32_t get_explore_additional_hits() const { return _explore_additional_hits; }
    double get_distance_threshold() const { return _distance_threshold; }
};

class MultiTerm : public Node {
public:
    enum class Type {STRING, INTEGER, UNKNOWN};
    using StringAndWeight = std::pair<vespalib::stringref, Weight>;
    using IntegerAndWeight = std::pair<int64_t, Weight>;
    struct TermVector {
        using StringAndWeight = MultiTerm::StringAndWeight;
        using IntegerAndWeight = MultiTerm::IntegerAndWeight;
        virtual ~TermVector() = default;
        virtual void addTerm(vespalib::stringref term, Weight weight) = 0;
        virtual void addTerm(int64_t term, Weight weight) = 0;
        virtual StringAndWeight getAsString(uint32_t index) const = 0;
        virtual IntegerAndWeight getAsInteger(uint32_t index) const = 0;
        virtual Weight getWeight(uint32_t index) const = 0;
        virtual uint32_t size() const = 0;
    };
    ~MultiTerm() override;
    void addTerm(vespalib::stringref term, Weight weight);
    void addTerm(int64_t term, Weight weight);
    // Note that the first refers to a zero terminated string.
    // That is required as the comparator for the enum store requires it.
    StringAndWeight getAsString(uint32_t index) const { return _terms->getAsString(index); }
    IntegerAndWeight getAsInteger(uint32_t index) const { return _terms->getAsInteger(index); }
    Weight weight(uint32_t index) const { return _terms->getWeight(index); }
    uint32_t getNumTerms() const { return _num_terms; }
    Type getType() const { return _type; }
protected:
    MultiTerm(uint32_t num_terms);
private:
    VESPA_DLL_LOCAL std::unique_ptr<TermVector> downgrade() __attribute__((noinline));
    std::unique_ptr<TermVector> _terms;
    uint32_t _num_terms;
    Type _type;
};

class WeightedSetTerm : public QueryNodeMixin<WeightedSetTerm, MultiTerm>, public Term {
public:
    WeightedSetTerm(uint32_t num_terms, const vespalib::string &view, int32_t id, Weight weight)
        : QueryNodeMixinType(num_terms),
          Term(view, id, weight)
    {}
    virtual ~WeightedSetTerm() = 0;
};

class DotProduct : public QueryNodeMixin<DotProduct, MultiTerm>, public Term {
public:
    DotProduct(uint32_t num_terms, const vespalib::string &view, int32_t id, Weight weight)
        : QueryNodeMixinType(num_terms),
          Term(view, id, weight)
    {}
    virtual ~DotProduct() = 0;
};

class WandTerm : public QueryNodeMixin<WandTerm, MultiTerm>, public Term {
private:
    uint32_t _targetNumHits;
    int64_t  _scoreThreshold;
    double   _thresholdBoostFactor;
public:
    WandTerm(uint32_t num_terms, const vespalib::string &view, int32_t id, Weight weight,
             uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor)
        : QueryNodeMixinType(num_terms),
          Term(view, id, weight),
          _targetNumHits(targetNumHits),
          _scoreThreshold(scoreThreshold),
          _thresholdBoostFactor(thresholdBoostFactor)
    {}
    virtual ~WandTerm() = 0;
    uint32_t getTargetNumHits() const { return _targetNumHits; }
    int64_t getScoreThreshold() const { return _scoreThreshold; }
    double getThresholdBoostFactor() const { return _thresholdBoostFactor; }
};

}