diff options
author | Tor Egge <Tor.Egge@online.no> | 2021-12-04 01:22:35 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2021-12-04 01:22:35 +0100 |
commit | 1e00538c210421e3d774db73fe9b0eb7dbf89bce (patch) | |
tree | 9511bae49ac482c4698d48c3fce9ea36ab198102 /searchlib | |
parent | 76327f3cd416bbe56986061a42b2db7b95a948d5 (diff) |
Enable early check for buffer being compacted when compacting posting store.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/postingstore.cpp | 201 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/postingstore.h | 6 |
2 files changed, 117 insertions, 90 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp index 3451c2b0456..2592a2889e4 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp @@ -127,45 +127,49 @@ PostingStore<DataT>::removeSparseBitVectors() } } if (needscan) { - res = _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef - { return consider_remove_sparse_bitvector(posting_idx); }); + std::vector<bool> filter(RefType::numBuffers()); + for (uint32_t buffer_id : _bvType.get_active_buffers()) { + filter[buffer_id] = true; + } + res = _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs) + { consider_remove_sparse_bitvector(refs); }, + filter, RefType::offset_bits); } return res; } template <typename DataT> -typename PostingStore<DataT>::EntryRef -PostingStore<DataT>::consider_remove_sparse_bitvector(EntryRef ref) +void +PostingStore<DataT>::consider_remove_sparse_bitvector(std::vector<EntryRef>& refs) { - if (!ref.valid() || !isBitVector(getTypeId(EntryRef(ref)))) { - return ref; - } - RefType iRef(ref); - uint32_t typeId = getTypeId(iRef); - assert(isBitVector(typeId)); - assert(_bvs.find(ref.ref() )!= _bvs.end()); - BitVectorEntry *bve = getWBitVectorEntry(iRef); - BitVector &bv = *bve->_bv.get(); - uint32_t docFreq = bv.countTrueBits(); - if (bve->_tree.valid()) { - RefType iRef2(bve->_tree); - assert(isBTree(iRef2)); - const BTreeType *tree = getTreeEntry(iRef2); - assert(tree->size(_allocator) == docFreq); - (void) tree; - } - if (docFreq < _minBvDocFreq) { - dropBitVector(ref); - if (ref.valid()) { + for (auto& ref : refs) { + RefType iRef(ref); + assert(iRef.valid()); + uint32_t typeId = getTypeId(iRef); + assert(isBitVector(typeId)); + assert(_bvs.find(iRef.ref()) != _bvs.end()); + BitVectorEntry *bve = getWBitVectorEntry(iRef); + BitVector &bv = *bve->_bv.get(); + uint32_t docFreq = bv.countTrueBits(); + if (bve->_tree.valid()) { + RefType iRef2(bve->_tree); + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + assert(tree->size(_allocator) == docFreq); + (void) tree; + } + if (docFreq < _minBvDocFreq) { + dropBitVector(ref); iRef = ref; - typeId = getTypeId(iRef); - if (isBTree(typeId)) { - BTreeType *tree = getWTreeEntry(iRef); - normalizeTree(ref, tree, false); + if (iRef.valid()) { + typeId = getTypeId(iRef); + if (isBTree(typeId)) { + BTreeType *tree = getWTreeEntry(iRef); + normalizeTree(ref, tree, false); + } } } } - return ref; } template <typename DataT> @@ -647,74 +651,75 @@ PostingStore<DataT>::update_stat() template <typename DataT> void -PostingStore<DataT>::move_btree_nodes(EntryRef ref) +PostingStore<DataT>::move_btree_nodes(const std::vector<EntryRef>& refs) { - if (ref.valid()) { + for (auto ref : refs) { RefType iRef(ref); + assert(iRef.valid()); uint32_t typeId = getTypeId(iRef); uint32_t clusterSize = getClusterSize(typeId); - if (clusterSize == 0) { - if (isBitVector(typeId)) { - BitVectorEntry *bve = getWBitVectorEntry(iRef); - RefType iRef2(bve->_tree); - if (iRef2.valid()) { - assert(isBTree(iRef2)); - BTreeType *tree = getWTreeEntry(iRef2); - tree->move_nodes(_allocator); - } - } else { - BTreeType *tree = getWTreeEntry(iRef); + assert(clusterSize == 0); + if (isBitVector(typeId)) { + BitVectorEntry *bve = getWBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + BTreeType *tree = getWTreeEntry(iRef2); tree->move_nodes(_allocator); } + } else { + assert(isBTree(typeId)); + BTreeType *tree = getWTreeEntry(iRef); + tree->move_nodes(_allocator); } } } template <typename DataT> -typename PostingStore<DataT>::EntryRef -PostingStore<DataT>::move(EntryRef ref) +void +PostingStore<DataT>::move(std::vector<EntryRef>& refs) { - if (!ref.valid()) { - return EntryRef(); - } - RefType iRef(ref); - uint32_t typeId = getTypeId(iRef); - uint32_t clusterSize = getClusterSize(typeId); - if (clusterSize == 0) { - if (isBitVector(typeId)) { - BitVectorEntry *bve = getWBitVectorEntry(iRef); - RefType iRef2(bve->_tree); - if (iRef2.valid()) { - assert(isBTree(iRef2)); - if (_store.getCompacting(iRef2)) { - BTreeType *tree = getWTreeEntry(iRef2); - auto ref_and_ptr = allocBTreeCopy(*tree); - tree->prepare_hold(); - bve->_tree = ref_and_ptr.ref; + for (auto& ref : refs) { + RefType iRef(ref); + assert(iRef.valid()); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + BitVectorEntry *bve = getWBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + if (_store.getCompacting(iRef2)) { + BTreeType *tree = getWTreeEntry(iRef2); + auto ref_and_ptr = allocBTreeCopy(*tree); + tree->prepare_hold(); + // Note: Needs review when porting to other platforms + // Assumes that other CPUs observes stores from this CPU in order + std::atomic_thread_fence(std::memory_order_release); + bve->_tree = ref_and_ptr.ref; + } } + if (_store.getCompacting(iRef)) { + auto new_ref = allocBitVectorCopy(*bve).ref; + _bvs.erase(iRef.ref()); + _bvs.insert(new_ref.ref()); + ref = new_ref; + } + } else { + assert(isBTree(typeId)); + assert(_store.getCompacting(iRef)); + BTreeType *tree = getWTreeEntry(iRef); + auto ref_and_ptr = allocBTreeCopy(*tree); + tree->prepare_hold(); + ref = ref_and_ptr.ref; } - if (!_store.getCompacting(ref)) { - return ref; - } - auto new_ref = allocBitVectorCopy(*bve).ref; - _bvs.erase(ref.ref()); - _bvs.insert(new_ref.ref()); - return new_ref; } else { - if (!_store.getCompacting(ref)) { - return ref; - } - BTreeType *tree = getWTreeEntry(iRef); - auto ref_and_ptr = allocBTreeCopy(*tree); - tree->prepare_hold(); - return ref_and_ptr.ref; + assert(_store.getCompacting(iRef)); + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + ref = allocKeyDataCopy(shortArray, clusterSize).ref; } } - if (!_store.getCompacting(ref)) { - return ref; - } - const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); - return allocKeyDataCopy(shortArray, clusterSize).ref; } template <typename DataT> @@ -722,11 +727,16 @@ void PostingStore<DataT>::compact_worst_btree_nodes() { auto to_hold = this->start_compact_worst_btree_nodes(); - _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef - { - move_btree_nodes(posting_idx); - return posting_idx; - }); + std::vector<bool> filter(RefType::numBuffers()); + // Only look at buffers containing bitvectors and btree roots + for (uint32_t buffer_id : this->_treeType.get_active_buffers()) { + filter[buffer_id] = true; + } + for (uint32_t buffer_id : _bvType.get_active_buffers()) { + filter[buffer_id] = true; + } + _dictionary.foreach_posting_list([this](const std::vector<EntryRef>& refs) + { move_btree_nodes(refs); }, filter, RefType::offset_bits); this->finish_compact_worst_btree_nodes(to_hold); } @@ -735,8 +745,25 @@ void PostingStore<DataT>::compact_worst_buffers() { auto to_hold = this->start_compact_worst_buffers(); - _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef - { return move(posting_idx); }); + bool compact_btree_roots = false; + std::vector<bool> filter(RefType::numBuffers()); + // Start with looking at buffers being compacted + for (uint32_t buffer_id : to_hold) { + if (isBTree(_store.getBufferState(buffer_id).getTypeId())) { + compact_btree_roots = true; + } + filter[buffer_id] = true; + } + if (compact_btree_roots) { + // If we are compacting btree roots then we also have to look at bitvector + // buffers + for (uint32_t buffer_id : _bvType.get_active_buffers()) { + filter[buffer_id] = true; + } + } + _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs) + { return move(refs); }, + filter, RefType::offset_bits); this->finishCompact(to_hold); } diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h index a0f0be1c430..74a147b3ccb 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.h +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h @@ -105,7 +105,7 @@ public: ~PostingStore(); bool removeSparseBitVectors() override; - EntryRef consider_remove_sparse_bitvector(EntryRef ref); + void consider_remove_sparse_bitvector(std::vector<EntryRef> &refs); static bool isBitVector(uint32_t typeId) { return typeId == BUFFERTYPE_BITVECTOR; } static bool isBTree(uint32_t typeId) { return typeId == BUFFERTYPE_BTREE; } bool isBTree(RefType ref) const { return isBTree(getTypeId(ref)); } @@ -188,8 +188,8 @@ public: vespalib::MemoryUsage getMemoryUsage() const; vespalib::MemoryUsage update_stat(); - void move_btree_nodes(EntryRef ref); - EntryRef move(EntryRef ref); + void move_btree_nodes(const std::vector<EntryRef> &refs); + void move(std::vector<EntryRef>& refs); void compact_worst_btree_nodes(); void compact_worst_buffers(); |