aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
blob: 7dfc7fe3a785d2ebdf3c4c67b3ef4e9db12771d8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "attribute_object_visitor.h"
#include "i_direct_posting_store.h"
#include <vespa/searchcommon/attribute/iattributevector.h>
#include <vespa/searchlib/common/matching_elements_fields.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchlib/queryeval/blueprint.h>
#include <vespa/searchlib/queryeval/flow_tuning.h>
#include <vespa/searchlib/queryeval/field_spec.h>
#include <vespa/searchlib/queryeval/matching_elements_search.h>
#include <variant>

namespace search::queryeval { class SearchIterator; }

namespace search::attribute {

/**
 * Blueprint used for multi-term query operators as InTerm, WeightedSetTerm or DotProduct
 * over an attribute which supports the IDocidPostingStore or IDocidWithWeightPostingStore interface.
 *
 * This uses access to low-level posting lists, which speeds up query execution.
 */
template <typename PostingStoreType, typename SearchType>
class DirectMultiTermBlueprint : public queryeval::ComplexLeafBlueprint
{
private:
    std::vector<int32_t>                           _weights;
    std::vector<IDirectPostingStore::LookupResult> _terms;
    const IAttributeVector                        &_iattr;
    const PostingStoreType                        &_attr;
    vespalib::datastore::EntryRef                  _dictionary_snapshot;

    using IteratorType = typename PostingStoreType::IteratorType;
    using IteratorWeights = std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>>;

    bool use_hash_filter(bool strict) const;

    IteratorWeights create_iterators(std::vector<IteratorType>& btree_iterators,
                                     std::vector<std::unique_ptr<queryeval::SearchIterator>>& bitvectors,
                                     bool use_bitvector_when_available,
                                     fef::TermFieldMatchData& tfmd, bool strict) const;

    std::unique_ptr<queryeval::SearchIterator> combine_iterators(std::unique_ptr<queryeval::SearchIterator> multi_term_iterator,
                                                                 std::vector<std::unique_ptr<queryeval::SearchIterator>>&& bitvectors,
                                                                 bool strict) const;

    template <bool filter_search>
    std::unique_ptr<queryeval::SearchIterator> create_search_helper(const fef::TermFieldMatchDataArray& tfmda,
                                                                    bool strict) const;

public:
    DirectMultiTermBlueprint(const queryeval::FieldSpec &field, const IAttributeVector &iattr, const PostingStoreType &attr, size_t size_hint);
    ~DirectMultiTermBlueprint() override;

    void addTerm(const IDirectPostingStore::LookupKey & key, int32_t weight, HitEstimate & estimate) {
        IDirectPostingStore::LookupResult result = _attr.lookup(key, _dictionary_snapshot);
        HitEstimate childEst(result.posting_size, (result.posting_size == 0));
        if (!childEst.empty) {
            if (estimate.empty) {
                estimate = childEst;
            } else {
                estimate.estHits += childEst.estHits;
            }
            _weights.push_back(weight);
            _terms.push_back(result);
        }
    }
    void complete(HitEstimate estimate) {
        setEstimate(estimate);
    }

    void sort(queryeval::InFlow in_flow, const Options &) override {
        strict(in_flow.strict());
    }

    queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
        using OrFlow = search::queryeval::OrFlow;
        struct MyAdapter {
            uint32_t docid_limit;
            MyAdapter(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {}
            double estimate(const IDirectPostingStore::LookupResult &term) const noexcept {
                return abs_to_rel_est(term.posting_size, docid_limit);
            }
            double cost(const IDirectPostingStore::LookupResult &) const noexcept {
                return search::queryeval::flow::btree_cost();
            }
            double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept {
                double rel_est = abs_to_rel_est(term.posting_size, docid_limit);
                return search::queryeval::flow::btree_strict_cost(rel_est);
            }
        };
        double est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms);
        return {est, OrFlow::cost_of(MyAdapter(docid_limit), _terms, false),
                OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())};
    }

    std::unique_ptr<queryeval::SearchIterator> createLeafSearch(const fef::TermFieldMatchDataArray &tfmda) const override;

    std::unique_ptr<queryeval::SearchIterator> createFilterSearch(FilterConstraint constraint) const override;
    std::unique_ptr<queryeval::MatchingElementsSearch> create_matching_elements_search(const MatchingElementsFields &fields) const override {
        if (fields.has_field(_iattr.getName())) {
            return queryeval::MatchingElementsSearch::create(_iattr, _dictionary_snapshot, vespalib::ConstArrayRef<IDirectPostingStore::LookupResult>(_terms));
        } else {
            return {};
        }
    }
    void visitMembers(vespalib::ObjectVisitor& visitor) const override {
        LeafBlueprint::visitMembers(visitor);
        visit_attribute(visitor, _iattr);
    }
};

}