summaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h
blob: 242f575ea1d7bbe1940d1df34ad4a9ebac72acaa (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "dictionary.h"
#include "documentinverter.h"
#include <vespa/document/fieldvalue/document.h>
#include <vespa/searchlib/queryeval/searchable.h>
#include <vector>
#include <vespa/vespalib/stllike/hash_set.h>

namespace search {

namespace index { class IndexBuilder; }

class ISequencedTaskExecutor;

namespace memoryindex {

/**
 * Lock-free implementation of a memory-based index
 * using the document inverter and dictionary classes from searchlib.
 **/
class MemoryIndex : public queryeval::Searchable
{
private:
    index::Schema     _schema;
    ISequencedTaskExecutor &_invertThreads;
    ISequencedTaskExecutor &_pushThreads;
    DocumentInverter  _inverter0;
    DocumentInverter  _inverter1;
    DocumentInverter *_inverter;
    Dictionary        _dictionary;
    bool              _frozen;
    uint32_t          _maxDocId;
    uint32_t          _numDocs;
    vespalib::Lock    _lock;
    std::vector<bool> _hiddenFields;
    index::Schema::SP _prunedSchema;
    vespalib::hash_set<uint32_t> _indexedDocs; // documents in memory index
    const uint64_t    _staticMemoryFootprint;

    MemoryIndex(const MemoryIndex &) = delete;
    MemoryIndex(MemoryIndex &&) = delete;
    MemoryIndex &operator=(const MemoryIndex &) = delete;
    MemoryIndex &operator=(MemoryIndex &&) = delete;

    void removeDocumentHelper(uint32_t docId, const document::Document &doc);
    void updateMaxDocId(uint32_t docId) {
        if (docId > _maxDocId) {
            _maxDocId = docId;
        }
    }
    void incNumDocs() {
        ++_numDocs;
    }
    void decNumDocs() {
        if (_numDocs > 0) {
            --_numDocs;
        }
    }

    void flipInverter();

public:
    /**
     * Convenience type defs.
     */
    typedef std::unique_ptr<MemoryIndex> UP;
    typedef std::shared_ptr<MemoryIndex> SP;

    /**
     * Create a new memory index based on the given schema.
     *
     * @param schema the index schema to use
     **/
    MemoryIndex(const index::Schema &schema,
                ISequencedTaskExecutor &invertThreads,
                ISequencedTaskExecutor &pushThreads);

    /**
     * Class destructor.  Clean up washlist.
     */
    ~MemoryIndex();

    /**
     * Obtain the schema used by this index.
     *
     * @return schema used by this index
     **/
    const index::Schema &getSchema() const { return _schema; }

    /**
     * Check if this index is frozen.
     *
     * @return true if this index is frozen
     **/
    bool isFrozen() const { return _frozen; }

    /**
     * Insert a document into the index. If the document is already in
     * the index, the old version will be removed first.
     *
     * @param docId local document id.
     * @param doc the document to insert.
     **/
    void insertDocument(uint32_t docId, const document::Document &doc);

    /**
     * Remove a document from the index.
     *
     * @param docId local document id.
     **/
    void removeDocument(uint32_t docId);

    /**
     * Commits the inserts and removes since the last commit, making
     * them searchable. When commit is completed, onWriteDone goes out
     * of scope, scheduling completion callback.
     *
     * Callers can call pushThreads.sync() to wait for push completion.
     **/
    void commit(const std::shared_ptr<IDestructorCallback> &onWriteDone);

    /**
     * Freeze this index. Further index updates will be
     * discarded. Extra information kept to wash the posting lists
     * will be discarded.
     **/
    void freeze();

    /**
     * Dump the contents of this index into the given index builder.
     *
     * @param indexBuilder the builder to dump into
     **/
    void dump(index::IndexBuilder &indexBuilder);

    // implements Searchable
    queryeval::Blueprint::UP
    createBlueprint(const queryeval::IRequestContext & requestContext,
                    const queryeval::FieldSpec &field,
                    const query::Node &term) override;

    queryeval::Blueprint::UP
    createBlueprint(const queryeval::IRequestContext & requestContext,
                    const queryeval::FieldSpecList &fields,
                    const query::Node &term) override {
        return queryeval::Searchable::createBlueprint(requestContext, fields, term);
    }

    virtual uint32_t getDocIdLimit() const {
        // Used to get docId range.
        return _maxDocId + 1;
    }

    virtual uint32_t getNumDocs() const {
        return _numDocs;
    }

    virtual uint64_t getNumWords() const {
        return _dictionary.getNumUniqueWords();
    }

    void pruneRemovedFields(const index::Schema &schema);

    index::Schema::SP getPrunedSchema() const;

    /**
     * Gets an approximation of how much memory the index uses.
     *
     * @return approximately how much memory is used by the index.
     **/
    MemoryUsage getMemoryUsage() const;

    uint64_t getStaticMemoryFootprint() const { return _staticMemoryFootprint; }
};

} // namespace memoryindex
} // namespace search