// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "postinglistattribute.h" #include "loadednumericvalue.h" #include "enumcomparator.h" #include "enum_store_loaders.h" #include namespace search { using attribute::LoadedNumericValue; template PostingListAttributeBase

:: PostingListAttributeBase(AttributeVector &attr, IEnumStore &enumStore) : attribute::IPostingListAttributeBase(), _posting_store(enumStore.get_dictionary(), attr.getStatus(), attr.getConfig()), _attr(attr), _dictionary(enumStore.get_dictionary()) { } template PostingListAttributeBase

::~PostingListAttributeBase() = default; template void PostingListAttributeBase

::clearAllPostings() { _posting_store.clearBuilder(); _attr.incGeneration(); // Force freeze auto clearer = [this](EntryRef posting_idx) { _posting_store.clear(posting_idx); }; _dictionary.clear_all_posting_lists(clearer); _attr.incGeneration(); // Force freeze } template void PostingListAttributeBase

::handle_load_posting_lists_and_update_enum_store(enumstore::EnumeratedPostingsLoader& loader) { clearAllPostings(); uint32_t docIdLimit = _attr.getNumDocs(); EntryRef newIndex; PostingChange

postings; const auto& loaded_enums = loader.get_loaded_enums(); if (loaded_enums.empty()) { loader.build_empty_dictionary(); return; } uint32_t preve = 0; uint32_t refCount = 0; vespalib::ConstArrayRef enum_indexes(loader.get_enum_indexes()); assert(!enum_indexes.empty()); auto posting_indexes = loader.initialize_empty_posting_indexes(); uint32_t posting_enum = preve; for (const auto& elem : loaded_enums) { if (preve != elem.getEnum()) { assert(preve < elem.getEnum()); assert(elem.getEnum() < enum_indexes.size()); loader.set_ref_count(enum_indexes[preve], refCount); refCount = 0; preve = elem.getEnum(); if (loader.is_folded_change(enum_indexes[posting_enum], enum_indexes[preve])) { postings.removeDups(); newIndex = EntryRef(); _posting_store.apply(newIndex, postings._additions.data(), postings._additions.data() + postings._additions.size(), postings._removals.data(), postings._removals.data() + postings._removals.size()); posting_indexes[posting_enum] = newIndex; postings.clear(); posting_enum = elem.getEnum(); } } assert(refCount < std::numeric_limits::max()); ++refCount; assert(elem.getDocId() < docIdLimit); (void) docIdLimit; postings.add(elem.getDocId(), elem.getWeight()); } assert(refCount != 0); loader.set_ref_count(enum_indexes[preve], refCount); postings.removeDups(); newIndex = EntryRef(); _posting_store.apply(newIndex, postings._additions.data(), postings._additions.data() + postings._additions.size(), postings._removals.data(), postings._removals.data() + postings._removals.size()); posting_indexes[posting_enum] = newIndex; loader.build_dictionary(); loader.free_unused_values(); } template void PostingListAttributeBase

::updatePostings(PostingMap &changePost, const vespalib::datastore::EntryComparator &cmp) { for (auto& elem : changePost) { EnumIndex idx = elem.first.getEnumIdx(); auto& change = elem.second; change.removeDups(); auto updater= [this, &change](EntryRef posting_idx) -> EntryRef { _posting_store.apply(posting_idx, change._additions.data(), change._additions.data() + change._additions.size(), change._removals.data(), change._removals.data() + change._removals.size()); return posting_idx; }; _dictionary.update_posting_list(idx, cmp, updater); } } template bool PostingListAttributeBase

::forwardedOnAddDoc(DocId doc, size_t wantSize, size_t wantCapacity) { if (doc >= wantSize) { wantSize = doc + 1; } if (doc >= wantCapacity) { wantCapacity = doc + 1; } return _posting_store.resizeBitVectors(wantSize, wantCapacity); } template void PostingListAttributeBase

:: clearPostings(attribute::IAttributeVector::EnumHandle eidx, uint32_t fromLid, uint32_t toLid, const vespalib::datastore::EntryComparator &cmp) { PostingChange

postings; for (uint32_t lid = fromLid; lid < toLid; ++lid) { postings.remove(lid); } EntryRef er(eidx); auto updater = [this, &postings](EntryRef posting_idx) -> EntryRef { _posting_store.apply(posting_idx, postings._additions.data(), postings._additions.data() + postings._additions.size(), postings._removals.data(), postings._removals.data() + postings._removals.size()); return posting_idx; }; _dictionary.update_posting_list(er, cmp, updater); } template void PostingListAttributeBase

::forwardedShrinkLidSpace(uint32_t newSize) { (void) _posting_store.resizeBitVectors(newSize, newSize); } template attribute::PostingStoreMemoryUsage PostingListAttributeBase

::getMemoryUsage() const { return _posting_store.getMemoryUsage(); } template bool PostingListAttributeBase

::consider_compact_worst_btree_nodes(const CompactionStrategy& compaction_strategy) { return _posting_store.consider_compact_worst_btree_nodes(compaction_strategy); } template bool PostingListAttributeBase

::consider_compact_worst_buffers(const CompactionStrategy& compaction_strategy) { return _posting_store.consider_compact_worst_buffers(compaction_strategy); } template PostingListAttributeSubBase:: PostingListAttributeSubBase(AttributeVector &attr, EnumStore &enumStore) : Parent(attr, enumStore), _es(enumStore) { } template PostingListAttributeSubBase:: ~PostingListAttributeSubBase() = default; template void PostingListAttributeSubBase:: handle_load_posting_lists(LoadedVector& loaded) { if constexpr (!std::is_same_v) { clearAllPostings(); EntryRef newIndex; PostingChange

postings; uint32_t docIdLimit = _attr.getNumDocs(); _posting_store.resizeBitVectors(docIdLimit, docIdLimit); if ( ! loaded.empty() ) { vespalib::Array similarValues; auto value = loaded.read(); LoadedValueType prev = value.getValue(); for (size_t i(0), m(loaded.size()); i < m; i++, loaded.next()) { value = loaded.read(); if (ComparatorType::equal_helper(prev, value.getValue())) { // for single value attributes loaded[numDocs] is used // for default value but we don't want to add an // invalid docId to the posting list. if (value._docId < docIdLimit) { postings.add(value._docId, value.getWeight()); similarValues.push_back(value); } } else { postings.removeDups(); newIndex = EntryRef(); _posting_store.apply(newIndex, postings._additions.data(), postings._additions.data() + postings._additions.size(), postings._removals.data(), postings._removals.data() + postings._removals.size()); postings.clear(); if (value._docId < docIdLimit) { postings.add(value._docId, value.getWeight()); } similarValues[0]._pidx = newIndex; for (size_t j(0), k(similarValues.size()); j < k; j++) { loaded.write(similarValues[j]); } similarValues.clear(); similarValues.push_back(value); prev = value.getValue(); } } postings.removeDups(); newIndex = EntryRef(); _posting_store.apply(newIndex, postings._additions.data(), postings._additions.data() + postings._additions.size(), postings._removals.data(), postings._removals.data() + postings._removals.size()); similarValues[0]._pidx = newIndex; for (size_t i(0), m(similarValues.size()); i < m; i++) { loaded.write(similarValues[i]); } } } } template void PostingListAttributeSubBase:: updatePostings(PostingMap &changePost) { updatePostings(changePost, _es.get_folded_comparator()); } template void PostingListAttributeSubBase:: clearPostings(attribute::IAttributeVector::EnumHandle eidx, uint32_t fromLid, uint32_t toLid) { clearPostings(eidx, fromLid, toLid, _es.get_folded_comparator()); } template class PostingListAttributeBase; template class PostingListAttributeBase; using LoadedInt8Vector = SequentialReadModifyWriteInterface >; using LoadedInt16Vector = SequentialReadModifyWriteInterface >; using LoadedInt32Vector = SequentialReadModifyWriteInterface >; using LoadedInt64Vector = SequentialReadModifyWriteInterface >; using LoadedFloatVector = SequentialReadModifyWriteInterface >; using LoadedDoubleVector = SequentialReadModifyWriteInterface >; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; template class PostingListAttributeSubBase>; }