aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore/src/vespa/searchcore/proton/matching/querynodes.h
blob: f89c42e3e621435717844236889c42e3790ac019 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include <vespa/searchlib/queryeval/field_spec.h>
#include <vespa/searchlib/fef/iindexenvironment.h>
#include <vespa/searchlib/fef/itermdata.h>
#include <vespa/searchlib/fef/matchdatalayout.h>
#include <vespa/searchlib/query/tree/intermediatenodes.h>
#include <vespa/searchlib/query/tree/termnodes.h>
#include <vespa/searchlib/query/tree/simplequery.h>
#include <vespa/searchlib/query/weight.h>
#include <vespa/vespalib/stllike/hash_set.h>
#include <memory>
#include <vector>

namespace proton::matching {

class ViewResolver;

class ProtonTermData : public search::fef::ITermData
{
public:
    using FieldSpec = search::queryeval::FieldSpec;
    using ITermFieldData = search::fef::ITermFieldData;
    using TermFieldHandle = search::fef::TermFieldHandle;
    using MatchDataDetails = search::fef::MatchDataDetails;

    struct FieldEntry final : ITermFieldData {
        FieldSpec _field_spec;
        bool attribute_field;

        FieldEntry(const vespalib::string &name, uint32_t fieldId, bool is_filter) noexcept
            : ITermFieldData(fieldId),
              _field_spec(name, fieldId, search::fef::IllegalHandle, is_filter),
              attribute_field(false)
        {}

        [[nodiscard]] const FieldSpec & fieldSpec() const noexcept {
            return _field_spec;
        }
        [[nodiscard]] TermFieldHandle getHandle() const { return getHandle(MatchDataDetails::Normal); }
        [[nodiscard]] TermFieldHandle getHandle(MatchDataDetails requested_details) const override;
        [[nodiscard]] const vespalib::string & getName() const noexcept { return _field_spec.getName(); }
        [[nodiscard]] bool is_filter() const noexcept { return _field_spec.isFilter(); }
    };

private:
    vespalib::SmallVector<FieldEntry, 1u> _fields;

    void propagate_document_frequency(uint32_t matching_count_doc, uint32_t total_doc_count);

protected:
    void resolve(const ViewResolver &resolver, const search::fef::IIndexEnvironment &idxEnv,
                 const vespalib::string &view, bool forceFilter);

public:
    ProtonTermData() noexcept;
    ProtonTermData(const ProtonTermData &) = delete;
    ProtonTermData & operator = (const ProtonTermData &) = delete;
    ~ProtonTermData() override;
    void resolveFromChildren(const std::vector<search::query::Node *> &children);
    void allocateTerms(search::fef::MatchDataLayout &mdl);
    void setDocumentFrequency(uint32_t estHits, uint32_t numDocs);

    // ITermData interface
    [[nodiscard]] std::optional<vespalib::string> query_tensor_name() const override { return std::nullopt; }
    [[nodiscard]] size_t numFields() const final { return _fields.size(); }
    [[nodiscard]] const FieldEntry &field(size_t i) const final { return _fields[i]; }
    [[nodiscard]] const FieldEntry *lookupField(uint32_t fieldId) const final;
};

template <typename NodeType> inline uint32_t numTerms(const NodeType &) { return 1; }

template <>
inline uint32_t numTerms<search::query::Phrase>(const search::query::Phrase &n) {
    return n.getChildren().size();
}

template <typename Base>
struct ProtonTermBase : public Base,
                        public ProtonTermData
{
    using Base::Base;
    ~ProtonTermBase() override;

    void resolve(const ViewResolver &resolver, const search::fef::IIndexEnvironment &idxEnv)
    {
        bool forceFilter = !Base::usePositionData();
        ProtonTermData::resolve(resolver, idxEnv, Base::getView(), forceFilter);
    }

    // ITermData interface
    [[nodiscard]] uint32_t getPhraseLength() const final { return numTerms<Base>(*this); }
    [[nodiscard]] search::query::Weight getWeight() const final { return Base::getWeight(); }
    [[nodiscard]] uint32_t getUniqueId() const final { return Base::getId(); }
};

template <typename Base>
ProtonTermBase<Base>::~ProtonTermBase() = default;

template <typename Base>
struct ProtonTerm final : public ProtonTermBase<Base> {
    using ProtonTermBase<Base>::ProtonTermBase;
    ~ProtonTerm();
};

template <typename Base>
ProtonTerm<Base>::~ProtonTerm() = default;

using ProtonAnd =         search::query::SimpleAnd;
using ProtonAndNot =      search::query::SimpleAndNot;
using ProtonNear =        search::query::SimpleNear;
using ProtonONear =       search::query::SimpleONear;
using ProtonOr =          search::query::SimpleOr;
using ProtonRank =        search::query::SimpleRank;
using ProtonWeakAnd =     search::query::SimpleWeakAnd;
using ProtonTrue =        search::query::SimpleTrue;
using ProtonFalse =       search::query::SimpleFalse;

struct ProtonEquiv final : public ProtonTermBase<search::query::Equiv> {
    search::fef::MatchDataLayout children_mdl;
    using ProtonTermBase::ProtonTermBase;
};

struct ProtonSameElement final : public ProtonTermBase<search::query::SameElement> {
    using ProtonTermBase::ProtonTermBase;
};

struct ProtonNearestNeighborTerm : public ProtonTermBase<search::query::NearestNeighborTerm> {
    using ProtonTermBase::ProtonTermBase;
    [[nodiscard]] std::optional<vespalib::string> query_tensor_name() const override {
        return ProtonTermBase::NearestNeighborTerm::get_query_tensor_name();
    }
};

using ProtonLocationTerm = ProtonTerm<search::query::LocationTerm>;
using ProtonNumberTerm =   ProtonTerm<search::query::NumberTerm>;
using ProtonPhrase =       ProtonTerm<search::query::Phrase>;

using ProtonPrefixTerm =      ProtonTerm<search::query::PrefixTerm>;
using ProtonRangeTerm =       ProtonTerm<search::query::RangeTerm>;
using ProtonStringTerm =      ProtonTerm<search::query::StringTerm>;
using ProtonSubstringTerm =   ProtonTerm<search::query::SubstringTerm>;
using ProtonSuffixTerm =      ProtonTerm<search::query::SuffixTerm>;
using ProtonWeightedSetTerm = ProtonTerm<search::query::WeightedSetTerm>;
using ProtonDotProduct =      ProtonTerm<search::query::DotProduct>;
using ProtonWandTerm =        ProtonTerm<search::query::WandTerm>;
using ProtonPredicateQuery =  ProtonTerm<search::query::PredicateQuery>;
using ProtonRegExpTerm =      ProtonTerm<search::query::RegExpTerm>;
using ProtonFuzzyTerm =       ProtonTerm<search::query::FuzzyTerm>;

struct ProtonNodeTypes {
    using And =                 ProtonAnd;
    using AndNot =              ProtonAndNot;
    using Equiv =               ProtonEquiv;
    using LocationTerm =        ProtonLocationTerm;
    using Near =                ProtonNear;
    using NumberTerm =          ProtonNumberTerm;
    using ONear =               ProtonONear;
    using Or =                  ProtonOr;
    using Phrase =              ProtonPhrase;
    using SameElement =         ProtonSameElement;
    using PrefixTerm =          ProtonPrefixTerm;
    using RangeTerm =           ProtonRangeTerm;
    using Rank =                ProtonRank;
    using StringTerm =          ProtonStringTerm;
    using SubstringTerm =       ProtonSubstringTerm;
    using SuffixTerm =          ProtonSuffixTerm;
    using WeakAnd =             ProtonWeakAnd;
    using WeightedSetTerm =     ProtonWeightedSetTerm;
    using DotProduct =          ProtonDotProduct;
    using WandTerm =            ProtonWandTerm;
    using PredicateQuery =      ProtonPredicateQuery;
    using RegExpTerm =          ProtonRegExpTerm;
    using NearestNeighborTerm = ProtonNearestNeighborTerm;
    using TrueQueryNode =       ProtonTrue;
    using FalseQueryNode =      ProtonFalse;
    using FuzzyTerm =           ProtonFuzzyTerm;
};

}