aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/common/idocumentmetastore.h
blob: 729bede1e7139d17299b3ddddfd7321b739330a7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "lid_usage_stats.h"
#include <vespa/document/base/globalid.h>
#include <vespa/document/bucket/bucketid.h>
#include <vector>
#include <memory>

namespace search {

/**
 * Meta data for a single document.
 **/
struct DocumentMetaData {
    using DocId = uint32_t;
    DocId lid;
    uint64_t timestamp;
    document::BucketId bucketId;
    document::GlobalId gid;
    bool removed;

    using Vector = std::vector<DocumentMetaData>;

    DocumentMetaData() noexcept
        : lid(0),
          timestamp(0),
          bucketId(),
          gid(),
          removed(false)
    { }

    DocumentMetaData(DocId lid_,
                     uint64_t timestamp_,
                     document::BucketId bucketId_,
                     const document::GlobalId &gid_) noexcept
        : DocumentMetaData(lid_, timestamp_, bucketId_, gid_, false)
    { }

    DocumentMetaData(DocId lid_,
                     uint64_t timestamp_,
                     document::BucketId bucketId_,
                     const document::GlobalId &gid_,
                     bool removed_) noexcept
        : lid(lid_),
          timestamp(timestamp_),
          bucketId(bucketId_),
          gid(gid_),
          removed(removed_)
    { }

    bool valid() const {
        return lid != 0 && timestamp != 0 && bucketId.isSet();
    }
};

namespace queryeval { class Blueprint; }

class IGidToLidMapperVisitor;
class BitVector;


/**
 * Read interface for a document meta store that provides mapping between
 * global document id (gid) and local document id (lid) with additional
 * meta data per document.
 **/
struct IDocumentMetaStore {
    using DocId = uint32_t;
    using GlobalId = document::GlobalId;
    using BucketId = document::BucketId;
    using Timestamp = uint64_t;

    virtual ~IDocumentMetaStore() = default;

    virtual const BitVector & getValidLids() const = 0;

    /**
     * Retrieves the gid associated with the given lid.
     * Returns true if found, false otherwise.
     **/
    virtual bool getGid(DocId lid, GlobalId &gid) const = 0;
    /**
     * Retrieves the gid associated with the given lid, even if the lid has moved.
     * Returns true if found, false otherwise.
     **/
    virtual bool getGidEvenIfMoved(DocId lid, GlobalId &gid) const = 0;

    /**
     * Retrieves the lid associated with the given gid.
     * Returns true if found, false otherwise.
     **/
    virtual bool getLid(const GlobalId &gid, DocId &lid) const = 0;

    /**
     * Retrieves the meta data for the document with the given gid.
     **/
    virtual DocumentMetaData getMetaData(const GlobalId &gid) const = 0;

    /**
     * Retrieves meta data for all documents contained in the given bucket.
     **/
    virtual void getMetaData(const BucketId &bucketId, DocumentMetaData::Vector &result) const = 0;

    /**
     * Returns the lid following the largest lid used in the store.
     *
     * As long as the reader holds a read guard on the document meta
     * store, we guarantee that the meta store info for lids that were
     * valid when calling this method will remain valid while the
     * guard is held, i.e. lids for newly removed documents are not
     * reused while the read guard is held.
     *
     * Access to lids beyond the returned limit is not safe.
     *
     * The return value can be used as lid range for queries when
     * attribute writer threads are synced, and is propagated as such
     * when visibility delay is nonzero and forceCommit() method is
     * called regularly on feed views, cf. proton::FastAccessFeedView.
     *
     * In the future, this method might be renamed to getReaderDocIdLimit().
     **/
    virtual DocId getCommittedDocIdLimit() const = 0;

    /**
     * Returns the number of used lids in this store.
     */
    virtual DocId getNumUsedLids() const = 0;

    /**
     * Returns the number of active lids in this store.
     * This should be <= getNumUsedLids().
     * Active lids correspond to documents in active buckets.
     */
    virtual DocId getNumActiveLids() const = 0;

    /**
     * Returns stats on the usage and availability of lids in this store.
     */
    virtual LidUsageStats getLidUsageStats() const = 0;

    /**
     * Creates a white list blueprint that returns a search iterator
     * that gives hits for all documents that should be visible.
     **/
    virtual std::unique_ptr<queryeval::Blueprint> createWhiteListBlueprint() const = 0;

    /**
     * Give read access to the current generation of the metastore.
     **/
    virtual uint64_t getCurrentGeneration() const = 0;

    virtual void foreach(const IGidToLidMapperVisitor &visitor) const = 0;
};


}