blob: d89bdad5bb814837fa0f8dca7640e6a627f042bb (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
#include <vespa/vespalib/util/monitored_refcount.h>
#include <cstdint>
#include <memory>
#include <vector>
namespace document {
class DataType;
class Document;
class DocumentType;
class Field;
class FieldValue;
}
namespace vespalib { class IDestructorCallback; }
namespace search::memoryindex {
class DocumentInverterContext;
class FieldInverter;
class UrlFieldInverter;
class IFieldIndexCollection;
/**
* Class used to invert the fields for a set of documents, preparing for pushing changes info field indexes.
*
* Each text and uri field in the document is handled separately by a FieldInverter and UrlFieldInverter.
*/
class DocumentInverter {
private:
DocumentInverter(const DocumentInverter &) = delete;
DocumentInverter &operator=(const DocumentInverter &) = delete;
DocumentInverterContext& _context;
using LidVector = std::vector<uint32_t>;
using OnWriteDoneType = const std::shared_ptr<vespalib::IDestructorCallback> &;
std::vector<std::unique_ptr<FieldInverter>> _inverters;
std::vector<std::unique_ptr<UrlFieldInverter>> _urlInverters;
vespalib::MonitoredRefCount _ref_count;
public:
/**
* Create a new document inverter based on the given schema.
*
* @param context A document inverter context shared between related document inverters.
*/
DocumentInverter(DocumentInverterContext& context);
~DocumentInverter();
/**
* Push the current batch of inverted documents to corresponding field indexes.
*
* This function is async:
* For each field inverter a task for pushing the inverted documents to the corresponding field index
* is added to the 'push threads' executor, then this function returns.
* All tasks hold a reference to the 'on_write_done' callback, so when the last task is completed,
* the callback is destructed.
*
* NOTE: The caller of this function should sync the 'invert threads' executor first,
* to ensure that inverting is completed before pushing starts.
*/
void pushDocuments(OnWriteDoneType on_write_done);
/**
* Invert (add) the given document.
*
* This function is async:
* For each text and uri field in the document a task for inverting and adding that
* field (using a field inverter) is added to the 'invert threads' executor, then this function returns.
**/
void invertDocument(uint32_t docId, const document::Document &doc, OnWriteDoneType on_write_done);
/**
* Remove the given document.
*
* This function is async:
* For each text and uri field in the index schema a task for removing this document
* (using a field inverter) is added to the 'invert threads' executor', then this function returns.
*/
void removeDocument(uint32_t docId);
void removeDocuments(LidVector lids);
FieldInverter *getInverter(uint32_t fieldId) const {
return _inverters[fieldId].get();
}
uint32_t getNumFields() const { return _inverters.size(); }
void wait_for_zero_ref_count() { _ref_count.waitForZeroRefCount(); }
bool has_zero_ref_count() { return _ref_count.has_zero_ref_count(); }
vespalib::MonitoredRefCount& get_ref_count() noexcept { return _ref_count; }
};
}
|