// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once #include "singleenumattribute.h" #include "enumattribute.hpp" #include "ipostinglistattributebase.h" #include "singleenumattributesaver.h" #include "load_utils.h" #include "enum_store_loaders.h" #include "valuemodifier.h" #include namespace search { template SingleValueEnumAttribute:: SingleValueEnumAttribute(const vespalib::string &baseFileName, const AttributeVector::Config &cfg) : B(baseFileName, cfg), SingleValueEnumAttributeBase(cfg, getGenerationHolder(), this->get_initial_alloc()) { } template SingleValueEnumAttribute::~SingleValueEnumAttribute() { getGenerationHolder().reclaim_all(); } template bool SingleValueEnumAttribute::onAddDoc(DocId doc) { if (doc < _enumIndices.capacity()) { _enumIndices.reserve(doc+1); return true; } return false; } template void SingleValueEnumAttribute::onAddDocs(DocId limit) { _enumIndices.reserve(limit); } template bool SingleValueEnumAttribute::addDoc(DocId & doc) { bool incGen = false; doc = SingleValueEnumAttributeBase::addDoc(incGen); if (doc > 0u) { // Make sure that a valid value(magic default) is referenced, // even between addDoc and commit(). if (_enumIndices[0].load_relaxed().valid()) { _enumIndices[doc] = _enumIndices[0]; this->_enumStore.inc_ref_count(_enumIndices[0].load_relaxed()); } } this->incNumDocs(); this->updateUncommittedDocIdLimit(doc); incGen |= onAddDoc(doc); if (incGen) { this->incGeneration(); } else this->reclaim_unused_memory(); return true; } template uint32_t SingleValueEnumAttribute::getValueCount(DocId doc) const { if (doc >= this->getNumDocs()) { return 0; } return 1; } template void SingleValueEnumAttribute::onCommit() { this->checkSetMaxValueCount(1); // update enum store auto updater = this->_enumStore.make_batch_updater(); this->insertNewUniqueValues(updater); // apply updates applyValueChanges(updater); this->_changes.clear(); updater.commit(); freezeEnumDictionary(); std::atomic_thread_fence(std::memory_order_release); this->reclaim_unused_memory(); auto remapper = this->_enumStore.consider_compact_values(this->getConfig().getCompactionStrategy()); if (remapper) { remap_enum_store_refs(*remapper, *this); remapper->done(); remapper.reset(); this->incGeneration(); this->updateStat(true); } if (this->_enumStore.consider_compact_dictionary(this->getConfig().getCompactionStrategy())) { this->incGeneration(); this->updateStat(true); } auto *pab = this->getIPostingListAttributeBase(); if (pab != nullptr) { if (pab->consider_compact_worst_btree_nodes(this->getConfig().getCompactionStrategy())) { this->incGeneration(); this->updateStat(true); } if (pab->consider_compact_worst_buffers(this->getConfig().getCompactionStrategy())) { this->incGeneration(); this->updateStat(true); } } } template void SingleValueEnumAttribute::onUpdateStat() { // update statistics vespalib::MemoryUsage total = _enumIndices.getMemoryUsage(); auto& compaction_strategy = this->getConfig().getCompactionStrategy(); total.mergeGenerationHeldBytes(getGenerationHolder().get_held_bytes()); total.merge(this->_enumStore.update_stat(compaction_strategy)); total.merge(this->getChangeVectorMemoryUsage()); mergeMemoryStats(total); this->updateStatistics(_enumIndices.size(), this->_enumStore.get_num_uniques(), total.allocatedBytes(), total.usedBytes(), total.deadBytes(), total.allocatedBytesOnHold()); } template void SingleValueEnumAttribute::considerUpdateAttributeChange(const Change & c, EnumStoreBatchUpdater & inserter) { EnumIndex idx; if (!this->_enumStore.find_index(c._data.raw(), idx)) { c.set_entry_ref(inserter.insert(c._data.raw()).ref()); } else { c.set_entry_ref(idx.ref()); } considerUpdateAttributeChange(c._doc, c); // for numeric } template void SingleValueEnumAttribute::considerAttributeChange(const Change & c, EnumStoreBatchUpdater & inserter) { if (c._type == ChangeBase::UPDATE) { considerUpdateAttributeChange(c, inserter); } else if (c._type >= ChangeBase::ADD && c._type <= ChangeBase::DIV) { considerArithmeticAttributeChange(c, inserter); // for numeric } else if (c._type == ChangeBase::CLEARDOC) { considerUpdateAttributeChange(c._doc, this->_defaultValue); } } template void SingleValueEnumAttribute::applyUpdateValueChange(const Change& c, EnumStoreBatchUpdater& updater) { EnumIndex oldIdx = _enumIndices[c._doc].load_relaxed(); EnumIndex newIdx; if (c.has_entry_ref()) { newIdx = EnumIndex(vespalib::datastore::EntryRef(c.get_entry_ref())); } else { this->_enumStore.find_index(c._data.raw(), newIdx); } updateEnumRefCounts(c._doc, newIdx, oldIdx, updater); } template void SingleValueEnumAttribute::applyValueChanges(EnumStoreBatchUpdater& updater) { ValueModifier valueGuard(this->getValueModifier()); for (const auto& change : this->_changes.getInsertOrder()) { if (change._type == ChangeBase::UPDATE) { applyUpdateValueChange(change, updater); } else if (change._type >= ChangeBase::ADD && change._type <= ChangeBase::DIV) { applyArithmeticValueChange(change, updater); } else if (change._type == ChangeBase::CLEARDOC) { EnumIndex oldIdx = _enumIndices[change._doc].load_relaxed(); EnumIndex newIdx = this->_enumStore.get_default_value_ref().load_relaxed(); updateEnumRefCounts(change._doc, newIdx, oldIdx, updater); } } } template void SingleValueEnumAttribute::updateEnumRefCounts(DocId doc, EnumIndex newIdx, EnumIndex oldIdx, EnumStoreBatchUpdater& updater) { updater.inc_ref_count(newIdx); _enumIndices[doc].store_release(newIdx); if (oldIdx.valid()) { updater.dec_ref_count(oldIdx); } } template void SingleValueEnumAttribute::fillValues(LoadedVector & loaded) { if constexpr (!std::is_same_v) { uint32_t numDocs = this->getNumDocs(); getGenerationHolder().reclaim_all(); _enumIndices.reset(); _enumIndices.unsafe_reserve(numDocs); for (DocId doc = 0; doc < numDocs; ++doc, loaded.next()) { _enumIndices.push_back(AtomicEntryRef(loaded.read().getEidx())); } } } template void SingleValueEnumAttribute::load_enumerated_data(ReaderBase& attrReader, enumstore::EnumeratedPostingsLoader& loader, size_t num_values) { loader.reserve_loaded_enums(num_values); attribute::loadFromEnumeratedSingleValue(_enumIndices, getGenerationHolder(), attrReader, loader.get_enum_indexes(), loader.get_enum_value_remapping(), attribute::SaveLoadedEnum(loader.get_loaded_enums())); loader.free_enum_value_remapping(); loader.sort_loaded_enums(); } template void SingleValueEnumAttribute::load_enumerated_data(ReaderBase& attrReader, enumstore::EnumeratedLoader& loader) { loader.allocate_enums_histogram(); attribute::loadFromEnumeratedSingleValue(_enumIndices, getGenerationHolder(), attrReader, loader.get_enum_indexes(), loader.get_enum_value_remapping(), attribute::SaveEnumHist(loader.get_enums_histogram())); loader.free_enum_value_remapping(); loader.set_ref_counts(); loader.build_dictionary(); loader.free_unused_values(); } template void SingleValueEnumAttribute::reclaim_memory(generation_t oldest_used_gen) { this->_enumStore.reclaim_memory(oldest_used_gen); getGenerationHolder().reclaim(oldest_used_gen); } template void SingleValueEnumAttribute::before_inc_generation(generation_t current_gen) { /* * Freeze tree before generation is increased in attribute vector * but after generation is increased in tree. This ensures that * unlocked readers accessing a frozen tree will access a * sufficiently new frozen tree. */ freezeEnumDictionary(); getGenerationHolder().assign_generation(current_gen); this->_enumStore.assign_generation(current_gen); } template void SingleValueEnumAttribute::clearDocs(DocId lidLow, DocId lidLimit, bool) { EnumHandle e(0); bool findDefaultEnumRes(this->findEnum(this->getDefaultEnumTypeValue(), e)); if (!findDefaultEnumRes) { e = EnumHandle(); } assert(lidLow <= lidLimit); assert(lidLimit <= this->getNumDocs()); for (DocId lid = lidLow; lid < lidLimit; ++lid) { if (_enumIndices[lid].load_relaxed() != vespalib::datastore::EntryRef(e)) { this->clearDoc(lid); } } } template void SingleValueEnumAttribute::onShrinkLidSpace() { EnumHandle e(0); bool findDefaultEnumRes(this->findEnum(this->getDefaultEnumTypeValue(), e)); assert(findDefaultEnumRes); uint32_t committedDocIdLimit = this->getCommittedDocIdLimit(); assert(_enumIndices.size() >= committedDocIdLimit); attribute::IPostingListAttributeBase *pab = this->getIPostingListAttributeBase(); if (pab != nullptr) { pab->clearPostings(e, committedDocIdLimit, _enumIndices.size()); } uint32_t shrink_docs = _enumIndices.size() - committedDocIdLimit; if (shrink_docs > 0u) { vespalib::datastore::EntryRef default_value_ref(e); assert(default_value_ref.valid()); uint32_t default_value_ref_count = this->_enumStore.get_ref_count(default_value_ref); assert(default_value_ref_count >= shrink_docs); this->_enumStore.set_ref_count(default_value_ref, default_value_ref_count - shrink_docs); IEnumStore::IndexList possibly_unused; possibly_unused.push_back(default_value_ref); this->_enumStore.free_unused_values(std::move(possibly_unused)); } _enumIndices.shrink(committedDocIdLimit); this->setNumDocs(committedDocIdLimit); } template std::unique_ptr SingleValueEnumAttribute::onInitSave(vespalib::stringref fileName) { auto guard = this->getGenerationHandler().takeGuard(); return std::make_unique (std::move(guard), this->createAttributeHeader(fileName), attribute::make_entry_ref_vector_snapshot(this->_enumIndices, this->getCommittedDocIdLimit()), this->_enumStore); } } // namespace search