aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/docstore/idocumentstore.h
blob: bdb815d6acf57f796b2115d20251ccb9553658f0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "idatastore.h"
#include <vespa/searchlib/common/i_compactable_lid_space.h>
#include <vespa/searchlib/query/base.h>
#include <future>

namespace document {
    class Document;
    class DocumentTypeRepo;
}

namespace vespalib {
struct CacheStats;
class nbostream;
}

namespace search {

class IDocumentStoreReadVisitor
{
public:
    using DocumentSP = std::shared_ptr<document::Document>;
    virtual ~IDocumentStoreReadVisitor() = default;
    virtual void visit(uint32_t lid, const DocumentSP &doc) = 0;
    virtual void visit(uint32_t lid) = 0;
};

class IDocumentStoreRewriteVisitor
{
public:
    using DocumentSP = std::shared_ptr<document::Document>;
    virtual ~IDocumentStoreRewriteVisitor() = default;
    virtual void visit(uint32_t lid, const DocumentSP &doc) = 0;
};

class IDocumentStoreVisitorProgress
{
public:
    virtual ~IDocumentStoreVisitorProgress() = default;

    virtual void updateProgress(double progress) = 0;
};

class IDocumentVisitor
{
public:
    using DocumentUP = std::unique_ptr<document::Document>;
    virtual ~IDocumentVisitor() = default;
    virtual void visit(uint32_t lid, DocumentUP doc) = 0;
    virtual bool allowVisitCaching() const = 0;
private:
};

/**
 * Simple document store that contains serialized Document instances.
 * updates will be held in memory until flush() is called.
 * Uses a Local ID as key.
 **/
class IDocumentStore : public common::ICompactableLidSpace
{
public:
    /**
     * Convenience typedef for a shared pointer to this class.
     **/
    using SP = std::shared_ptr<IDocumentStore>;
    using LidVector = std::vector<uint32_t>;
    using DocumentUP = std::unique_ptr<document::Document>;

    /**
     * Make a Document from a stored serialized data blob.
     * @param lid The local ID associated with the document.
     * @return NULL if there is no document associated with the lid.
     **/
    virtual DocumentUP read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const = 0;
    virtual void visit(const LidVector & lidVector, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const;

    /**
     * Serialize and store a document.
     * @param doc The document to store
     * @param lid The local ID associated with the document
     **/
    virtual void write(uint64_t syncToken, DocumentIdT lid, const document::Document& doc) = 0;
    virtual void write(uint64_t synkToken, DocumentIdT lid, const vespalib::nbostream & os) = 0;

    /**
     * Mark a document as removed. A later read() will return NULL for the given lid.
     * @param lid The local ID associated with the document
     **/
    virtual void remove(uint64_t syncToken, DocumentIdT lid) = 0;

    /**
     * Flush all in-memory updates to disk.
     **/
    virtual void flush(uint64_t syncToken) = 0;

    virtual uint64_t initFlush(uint64_t synctoken) = 0;

    /**
     * If possible compact the disk.
     **/
    virtual void compactBloat(uint64_t syncToken) = 0;
    virtual void compactSpread(uint64_t syncToken) = 0;

    /**
     * The sync token used for the last successful flush() operation,
     * or 0 if no flush() has been performed yet.
     * @return Last flushed sync token.
     **/
    virtual uint64_t lastSyncToken() const = 0;

    /*
     * The sync token used for last write operation.
     */
    virtual uint64_t tentativeLastSyncToken() const = 0;

    /**
     * The time of the last flush operation,
     * or 0 if no flush has been performed yet.
     * @return Time of last flush.
     **/
    virtual vespalib::system_time getLastFlushTime() const = 0;

    /**
     * Get the number of entries (including removed IDs
     * or gaps in the local ID sequence) in the document store.
     */
    virtual uint32_t getDocIdLimit() const = 0;

    /**
     * Calculate memory used by this instance.  During flush() actual
     * memory usage may be approximately twice the reported amount.
     * @return memory usage (in bytes)
     **/
    virtual size_t memoryUsed() const = 0;

    /**
     * Calculates memory that is used for meta data by this instance. Calling
     * flush() does not free this memory.
     * @return memory usage (in bytes)
     **/
    virtual size_t memoryMeta() const = 0;

    /**
     * Calculates how much disk is used
     * @return disk space used.
     */
    virtual size_t getDiskFootprint() const = 0;
    /**
     * Calculates how much wasted space there is.
     * @return disk bloat.
     */
    virtual size_t getDiskBloat() const = 0;

    /**
     * Calculates the gain from keeping buckets close. It is converted to diskbloat
     * so it can be prioritized accordingly.
     * @return spread as disk bloat.
     */
    virtual size_t getMaxSpreadAsBloat() const = 0;

    /**
     * Returns statistics about the cache.
     */
    virtual vespalib::CacheStats getCacheStats() const = 0;

    /**
     * Returns the base directory from which all structures are stored.
     **/
    virtual const vespalib::string & getBaseDir() const = 0;

    /**
     * Visit all documents found in document store.
     */
    virtual void
    accept(IDocumentStoreReadVisitor &visitor,
           IDocumentStoreVisitorProgress &visitorProgress,
           const document::DocumentTypeRepo &repo) = 0;

    /**
     * Visit all documents found in document store.
     */
    virtual void
    accept(IDocumentStoreRewriteVisitor &visitor,
           IDocumentStoreVisitorProgress &visitorProgress,
           const document::DocumentTypeRepo &repo) = 0;

    /**
     * Return cost of visiting all documents found in document store.
     */
    virtual double getVisitCost() const = 0;

    /*
     * Return brief stats for data store.
     */
    virtual DataStoreStorageStats getStorageStats() const = 0;

    /*
     * Return the memory usage for document store.
     */
    virtual vespalib::MemoryUsage getMemoryUsage() const = 0;

    /*
     * Return detailed stats about underlying files for data store.
     */
    virtual std::vector<DataStoreFileChunkStats> getFileChunkStats() const = 0;
};

} // namespace search