aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/predicate/predicate_index.h
blob: 4f1281a649f94fe187aa686e11442acd3b9b83db (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "common.h"
#include "document_features_store.h"
#include "predicate_interval_store.h"
#include "simple_index.h"
#include "predicate_interval.h"
#include <vespa/searchlib/common/bitvectorcache.h>
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/stllike/string.h>
#include <unordered_map>

namespace search::predicate {

struct PredicateTreeAnnotations;

/**
 * PredicateIndex keeps an index of boolean constraints for use with
 * the interval algorithm. It is the central component of
 * PredicateAttribute, and PredicateBlueprint uses it to obtain
 * posting lists for matching.
 */
class PredicateIndex : public PopulateInterface {
    using IntervalIndex = SimpleIndex<vespalib::datastore::EntryRef>;
    using BoundsIndex = SimpleIndex<vespalib::datastore::EntryRef>;
    template <typename IntervalT>
    using FeatureMap = std::unordered_map<uint64_t, std::vector<IntervalT>>;
    using generation_t = vespalib::GenerationHandler::generation_t;
    template <typename T>
    using optional = std::optional<T>;

public:
    using UP = std::unique_ptr<PredicateIndex>;
    using GenerationHandler = vespalib::GenerationHandler;
    using GenerationHolder = vespalib::GenerationHolder;
    using BTreeIterator = SimpleIndex<vespalib::datastore::EntryRef>::BTreeIterator;
    using VectorIterator = SimpleIndex<vespalib::datastore::EntryRef>::VectorIterator;
private:
    uint32_t                  _arity;
    const DocIdLimitProvider &_limit_provider;
    IntervalIndex             _interval_index;
    BoundsIndex               _bounds_index;
    PredicateIntervalStore    _interval_store;
    BTreeSet                  _zero_constraint_docs;

    DocumentFeaturesStore     _features_store;
    mutable BitVectorCache    _cache;

    template <typename IntervalT>
    void addPosting(uint64_t feature, uint32_t doc_id, vespalib::datastore::EntryRef ref);

    template <typename IntervalT>
    void indexDocumentFeatures(uint32_t doc_id, const FeatureMap<IntervalT> &interval_map);

public:
    PredicateIndex(GenerationHolder &genHolder,
                   const DocIdLimitProvider &limit_provider,
                   const SimpleIndexConfig &simple_index_config, uint32_t arity);
    // deserializes PredicateIndex from buffer.
    // The observer can be used to gain some insight into what has been added to the index..
    PredicateIndex(GenerationHolder &genHolder,
                   const DocIdLimitProvider &limit_provider,
                   const SimpleIndexConfig &simple_index_config, vespalib::DataBuffer &buffer,
                   SimpleIndexDeserializeObserver<> & observer, uint32_t version);

    ~PredicateIndex() override;
    void serialize(vespalib::DataBuffer &buffer) const;
    void onDeserializationCompleted();

    void indexEmptyDocument(uint32_t doc_id);
    void indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations);
    void removeDocument(uint32_t doc_id);
    void commit();
    void reclaim_memory(generation_t oldest_used_gen);
    void assign_generation(generation_t current_gen);
    vespalib::MemoryUsage getMemoryUsage() const;

    int getArity() const { return _arity; }

    const ZeroConstraintDocs getZeroConstraintDocs() const {
        return _zero_constraint_docs.getFrozenView();
    }

    const IntervalIndex &getIntervalIndex() const {
        return _interval_index;
    }

    const BoundsIndex &getBoundsIndex() const {
        return _bounds_index;
    }

    const PredicateIntervalStore &getIntervalStore() const {
        return _interval_store;
    }

    void populateIfNeeded(size_t doc_id_limit);
    BitVectorCache::KeySet lookupCachedSet(const BitVectorCache::KeyAndCountSet & keys) const;
    void computeCountVector(BitVectorCache::KeySet & keys, BitVectorCache::CountVector & v) const;

    /*
     * Adjust size of structures to have space for docId.
     */
    void adjustDocIdLimit(uint32_t docId);
    PopulateInterface::Iterator::UP lookup(uint64_t key) const override;
    // Exposed for testing
    void requireCachePopulation() const { _cache.requirePopulation(); }
};

extern template class SimpleIndex<vespalib::datastore::EntryRef>;

}