summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2021-12-04 01:22:35 +0100
committerTor Egge <Tor.Egge@online.no>2021-12-04 01:22:35 +0100
commit1e00538c210421e3d774db73fe9b0eb7dbf89bce (patch)
tree9511bae49ac482c4698d48c3fce9ea36ab198102 /searchlib
parent76327f3cd416bbe56986061a42b2db7b95a948d5 (diff)
Enable early check for buffer being compacted when compacting posting store.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.cpp201
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.h6
2 files changed, 117 insertions, 90 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
index 3451c2b0456..2592a2889e4 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
@@ -127,45 +127,49 @@ PostingStore<DataT>::removeSparseBitVectors()
}
}
if (needscan) {
- res = _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef
- { return consider_remove_sparse_bitvector(posting_idx); });
+ std::vector<bool> filter(RefType::numBuffers());
+ for (uint32_t buffer_id : _bvType.get_active_buffers()) {
+ filter[buffer_id] = true;
+ }
+ res = _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs)
+ { consider_remove_sparse_bitvector(refs); },
+ filter, RefType::offset_bits);
}
return res;
}
template <typename DataT>
-typename PostingStore<DataT>::EntryRef
-PostingStore<DataT>::consider_remove_sparse_bitvector(EntryRef ref)
+void
+PostingStore<DataT>::consider_remove_sparse_bitvector(std::vector<EntryRef>& refs)
{
- if (!ref.valid() || !isBitVector(getTypeId(EntryRef(ref)))) {
- return ref;
- }
- RefType iRef(ref);
- uint32_t typeId = getTypeId(iRef);
- assert(isBitVector(typeId));
- assert(_bvs.find(ref.ref() )!= _bvs.end());
- BitVectorEntry *bve = getWBitVectorEntry(iRef);
- BitVector &bv = *bve->_bv.get();
- uint32_t docFreq = bv.countTrueBits();
- if (bve->_tree.valid()) {
- RefType iRef2(bve->_tree);
- assert(isBTree(iRef2));
- const BTreeType *tree = getTreeEntry(iRef2);
- assert(tree->size(_allocator) == docFreq);
- (void) tree;
- }
- if (docFreq < _minBvDocFreq) {
- dropBitVector(ref);
- if (ref.valid()) {
+ for (auto& ref : refs) {
+ RefType iRef(ref);
+ assert(iRef.valid());
+ uint32_t typeId = getTypeId(iRef);
+ assert(isBitVector(typeId));
+ assert(_bvs.find(iRef.ref()) != _bvs.end());
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ BitVector &bv = *bve->_bv.get();
+ uint32_t docFreq = bv.countTrueBits();
+ if (bve->_tree.valid()) {
+ RefType iRef2(bve->_tree);
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ assert(tree->size(_allocator) == docFreq);
+ (void) tree;
+ }
+ if (docFreq < _minBvDocFreq) {
+ dropBitVector(ref);
iRef = ref;
- typeId = getTypeId(iRef);
- if (isBTree(typeId)) {
- BTreeType *tree = getWTreeEntry(iRef);
- normalizeTree(ref, tree, false);
+ if (iRef.valid()) {
+ typeId = getTypeId(iRef);
+ if (isBTree(typeId)) {
+ BTreeType *tree = getWTreeEntry(iRef);
+ normalizeTree(ref, tree, false);
+ }
}
}
}
- return ref;
}
template <typename DataT>
@@ -647,74 +651,75 @@ PostingStore<DataT>::update_stat()
template <typename DataT>
void
-PostingStore<DataT>::move_btree_nodes(EntryRef ref)
+PostingStore<DataT>::move_btree_nodes(const std::vector<EntryRef>& refs)
{
- if (ref.valid()) {
+ for (auto ref : refs) {
RefType iRef(ref);
+ assert(iRef.valid());
uint32_t typeId = getTypeId(iRef);
uint32_t clusterSize = getClusterSize(typeId);
- if (clusterSize == 0) {
- if (isBitVector(typeId)) {
- BitVectorEntry *bve = getWBitVectorEntry(iRef);
- RefType iRef2(bve->_tree);
- if (iRef2.valid()) {
- assert(isBTree(iRef2));
- BTreeType *tree = getWTreeEntry(iRef2);
- tree->move_nodes(_allocator);
- }
- } else {
- BTreeType *tree = getWTreeEntry(iRef);
+ assert(clusterSize == 0);
+ if (isBitVector(typeId)) {
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ BTreeType *tree = getWTreeEntry(iRef2);
tree->move_nodes(_allocator);
}
+ } else {
+ assert(isBTree(typeId));
+ BTreeType *tree = getWTreeEntry(iRef);
+ tree->move_nodes(_allocator);
}
}
}
template <typename DataT>
-typename PostingStore<DataT>::EntryRef
-PostingStore<DataT>::move(EntryRef ref)
+void
+PostingStore<DataT>::move(std::vector<EntryRef>& refs)
{
- if (!ref.valid()) {
- return EntryRef();
- }
- RefType iRef(ref);
- uint32_t typeId = getTypeId(iRef);
- uint32_t clusterSize = getClusterSize(typeId);
- if (clusterSize == 0) {
- if (isBitVector(typeId)) {
- BitVectorEntry *bve = getWBitVectorEntry(iRef);
- RefType iRef2(bve->_tree);
- if (iRef2.valid()) {
- assert(isBTree(iRef2));
- if (_store.getCompacting(iRef2)) {
- BTreeType *tree = getWTreeEntry(iRef2);
- auto ref_and_ptr = allocBTreeCopy(*tree);
- tree->prepare_hold();
- bve->_tree = ref_and_ptr.ref;
+ for (auto& ref : refs) {
+ RefType iRef(ref);
+ assert(iRef.valid());
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ if (_store.getCompacting(iRef2)) {
+ BTreeType *tree = getWTreeEntry(iRef2);
+ auto ref_and_ptr = allocBTreeCopy(*tree);
+ tree->prepare_hold();
+ // Note: Needs review when porting to other platforms
+ // Assumes that other CPUs observes stores from this CPU in order
+ std::atomic_thread_fence(std::memory_order_release);
+ bve->_tree = ref_and_ptr.ref;
+ }
}
+ if (_store.getCompacting(iRef)) {
+ auto new_ref = allocBitVectorCopy(*bve).ref;
+ _bvs.erase(iRef.ref());
+ _bvs.insert(new_ref.ref());
+ ref = new_ref;
+ }
+ } else {
+ assert(isBTree(typeId));
+ assert(_store.getCompacting(iRef));
+ BTreeType *tree = getWTreeEntry(iRef);
+ auto ref_and_ptr = allocBTreeCopy(*tree);
+ tree->prepare_hold();
+ ref = ref_and_ptr.ref;
}
- if (!_store.getCompacting(ref)) {
- return ref;
- }
- auto new_ref = allocBitVectorCopy(*bve).ref;
- _bvs.erase(ref.ref());
- _bvs.insert(new_ref.ref());
- return new_ref;
} else {
- if (!_store.getCompacting(ref)) {
- return ref;
- }
- BTreeType *tree = getWTreeEntry(iRef);
- auto ref_and_ptr = allocBTreeCopy(*tree);
- tree->prepare_hold();
- return ref_and_ptr.ref;
+ assert(_store.getCompacting(iRef));
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ ref = allocKeyDataCopy(shortArray, clusterSize).ref;
}
}
- if (!_store.getCompacting(ref)) {
- return ref;
- }
- const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
- return allocKeyDataCopy(shortArray, clusterSize).ref;
}
template <typename DataT>
@@ -722,11 +727,16 @@ void
PostingStore<DataT>::compact_worst_btree_nodes()
{
auto to_hold = this->start_compact_worst_btree_nodes();
- _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef
- {
- move_btree_nodes(posting_idx);
- return posting_idx;
- });
+ std::vector<bool> filter(RefType::numBuffers());
+ // Only look at buffers containing bitvectors and btree roots
+ for (uint32_t buffer_id : this->_treeType.get_active_buffers()) {
+ filter[buffer_id] = true;
+ }
+ for (uint32_t buffer_id : _bvType.get_active_buffers()) {
+ filter[buffer_id] = true;
+ }
+ _dictionary.foreach_posting_list([this](const std::vector<EntryRef>& refs)
+ { move_btree_nodes(refs); }, filter, RefType::offset_bits);
this->finish_compact_worst_btree_nodes(to_hold);
}
@@ -735,8 +745,25 @@ void
PostingStore<DataT>::compact_worst_buffers()
{
auto to_hold = this->start_compact_worst_buffers();
- _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef
- { return move(posting_idx); });
+ bool compact_btree_roots = false;
+ std::vector<bool> filter(RefType::numBuffers());
+ // Start with looking at buffers being compacted
+ for (uint32_t buffer_id : to_hold) {
+ if (isBTree(_store.getBufferState(buffer_id).getTypeId())) {
+ compact_btree_roots = true;
+ }
+ filter[buffer_id] = true;
+ }
+ if (compact_btree_roots) {
+ // If we are compacting btree roots then we also have to look at bitvector
+ // buffers
+ for (uint32_t buffer_id : _bvType.get_active_buffers()) {
+ filter[buffer_id] = true;
+ }
+ }
+ _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs)
+ { return move(refs); },
+ filter, RefType::offset_bits);
this->finishCompact(to_hold);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h
index a0f0be1c430..74a147b3ccb 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h
@@ -105,7 +105,7 @@ public:
~PostingStore();
bool removeSparseBitVectors() override;
- EntryRef consider_remove_sparse_bitvector(EntryRef ref);
+ void consider_remove_sparse_bitvector(std::vector<EntryRef> &refs);
static bool isBitVector(uint32_t typeId) { return typeId == BUFFERTYPE_BITVECTOR; }
static bool isBTree(uint32_t typeId) { return typeId == BUFFERTYPE_BTREE; }
bool isBTree(RefType ref) const { return isBTree(getTypeId(ref)); }
@@ -188,8 +188,8 @@ public:
vespalib::MemoryUsage getMemoryUsage() const;
vespalib::MemoryUsage update_stat();
- void move_btree_nodes(EntryRef ref);
- EntryRef move(EntryRef ref);
+ void move_btree_nodes(const std::vector<EntryRef> &refs);
+ void move(std::vector<EntryRef>& refs);
void compact_worst_btree_nodes();
void compact_worst_buffers();