diff options
25 files changed, 121 insertions, 28 deletions
diff --git a/searchlib/src/tests/attribute/.gitignore b/searchlib/src/tests/attribute/.gitignore index 732912ab981..d5747ffc3ff 100644 --- a/searchlib/src/tests/attribute/.gitignore +++ b/searchlib/src/tests/attribute/.gitignore @@ -1,5 +1,6 @@ *.dat *.idx +*.udat *.weight .depend Makefile diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp index 61d5b3795e4..7677a71ba3a 100644 --- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -9,6 +9,7 @@ #include <vespa/searchlib/attribute/attributememoryfilebufferwriter.h> #include <vespa/searchlib/attribute/attributememorysavetarget.h> #include <vespa/searchlib/attribute/attributesaver.h> +#include <vespa/searchlib/attribute/i_enum_store_dictionary.h> #include <vespa/searchlib/queryeval/executeinfo.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> @@ -530,9 +531,28 @@ EnumeratedSaveTest::saveMemDuringCompaction(AttributeVector &v) void EnumeratedSaveTest::checkMem(AttributeVector &v, const MemAttr &e) { - MemAttr m; - EXPECT_TRUE(v.save(m, v.getBaseFileName())); - ASSERT_TRUE(m == e); + auto *esb = v.getEnumStoreBase(); + if (esb == nullptr || esb->get_dictionary().get_has_btree_dictionary()) { + MemAttr m; + EXPECT_TRUE(v.save(m, v.getBaseFileName())); + ASSERT_TRUE(m == e); + } else { + // Save without sorting unique values, load into temporary + // attribute vector with sorted dictionary and save again + // to verify data. + search::AttributeMemorySaveTarget ms; + search::TuneFileAttributes tune; + search::index::DummyFileHeaderContext fileHeaderContext; + EXPECT_TRUE(v.save(ms, "convert")); + EXPECT_TRUE(ms.writeToFile(tune, fileHeaderContext)); + auto cfg = v.getConfig(); + cfg.set_dictionary_config(search::DictionaryConfig(search::DictionaryConfig::Type::BTREE)); + auto v2 = AttributeFactory::createAttribute("convert", cfg); + EXPECT_TRUE(v2->load()); + MemAttr m2; + EXPECT_TRUE(v2->save(m2, v.getBaseFileName())); + ASSERT_TRUE(m2 == e); + } } diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp index c335c2064f1..56cca654b96 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp @@ -9,10 +9,13 @@ namespace search::enumstore { EnumeratedLoaderBase::EnumeratedLoaderBase(IEnumStore& store) : _store(store), - _indexes() + _indexes(), + _enum_value_remapping() { } +EnumeratedLoaderBase::~EnumeratedLoaderBase() = default; + void EnumeratedLoaderBase::load_unique_values(const void* src, size_t available) { @@ -32,6 +35,41 @@ EnumeratedLoaderBase::free_unused_values() _store.free_unused_values(); } +void +EnumeratedLoaderBase::build_enum_value_remapping() +{ + if (!_store.get_dictionary().get_has_btree_dictionary() || _indexes.size() < 2u) { + return; // No need for unique values to be sorted + } + auto comp_up = _store.allocate_comparator(); + auto& comp = *comp_up; + if (std::is_sorted(_indexes.begin(), _indexes.end(), [&comp](Index lhs, Index rhs) { return !comp.less(rhs, lhs); })) { + return; // Unique values are already sorted + } + vespalib::Array<std::pair<Index, uint32_t>> sortdata; + uint32_t enum_value = 0; + sortdata.reserve(_indexes.size()); + for (auto index : _indexes) { + sortdata.push_back(std::make_pair(index, enum_value)); + ++enum_value; + } + std::sort(sortdata.begin(), sortdata.end(), [&comp](auto lhs, auto rhs) { return comp.less(lhs.first, rhs.first); }); + _enum_value_remapping.resize(_indexes.size()); + enum_value = 0; + for (auto &entry : sortdata) { + _indexes[enum_value] = entry.first; + _enum_value_remapping[entry.second] = enum_value; + ++enum_value; + } + assert(std::is_sorted(_indexes.begin(), _indexes.end(), [&comp](Index lhs, Index rhs) { return !comp.less(rhs, lhs); })); +} + +void +EnumeratedLoaderBase::free_enum_value_remapping() +{ + EnumVector().swap(_enum_value_remapping); +} + EnumeratedLoader::EnumeratedLoader(IEnumStore& store) : EnumeratedLoaderBase(store), _enums_histogram() diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h index 87705681dcf..4fa63646ed3 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h @@ -16,12 +16,17 @@ class EnumeratedLoaderBase { protected: IEnumStore& _store; IndexVector _indexes; + EnumVector _enum_value_remapping; // Empty if saved unique values are sorted. void release_enum_indexes(); public: EnumeratedLoaderBase(IEnumStore& store); + ~EnumeratedLoaderBase(); const IndexVector& get_enum_indexes() const { return _indexes; } + const EnumVector& get_enum_value_remapping() const noexcept { return _enum_value_remapping; } void load_unique_values(const void* src, size_t available); + void build_enum_value_remapping(); + void free_enum_value_remapping(); void free_unused_values(); }; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 6d77295db08..05f7b6d0fe0 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -208,6 +208,7 @@ public: _store.get_allocator().get_data_store().inc_compaction_count(); } std::unique_ptr<Enumerator> make_enumerator() const override; + std::unique_ptr<vespalib::datastore::EntryComparator> allocate_comparator() const override; }; std::unique_ptr<vespalib::datastore::IUniqueStoreDictionary> diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 357026ab944..8ec61df5ac8 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -61,13 +61,7 @@ EnumStoreT<EntryT>::load_unique_value(const void* src, size_t available, Index& return -1; } const auto* value = static_cast<const EntryType*>(src); - Index prev_idx = idx; idx = _store.get_allocator().allocate(*value); - - if (prev_idx.valid()) { - auto cmp = make_comparator(*value); - assert(cmp.less(prev_idx, Index())); - } return sizeof(EntryType); } @@ -261,7 +255,14 @@ template <typename EntryT> std::unique_ptr<IEnumStore::Enumerator> EnumStoreT<EntryT>::make_enumerator() const { - return std::make_unique<Enumerator>(*_dict, _store.get_data_store()); + return std::make_unique<Enumerator>(*_dict, _store.get_data_store(), false); +} + +template <typename EntryT> +std::unique_ptr<vespalib::datastore::EntryComparator> +EnumStoreT<EntryT>::allocate_comparator() const +{ + return std::make_unique<ComparatorType>(_store.get_data_store()); } } diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp index 895e6a6f4c0..ab2a588a53d 100644 --- a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp @@ -94,7 +94,7 @@ FlagAttributeT<B>::onLoadEnumerated(ReaderBase &attrReader) vespalib::ConstArrayRef<TT> map(reinterpret_cast<const TT *>(udatBuffer->buffer()), udatBuffer->size() / sizeof(TT)); SaveBits<FlagAttributeT<B>, TT> saver(map, *this); - uint32_t maxvc = attribute::loadFromEnumeratedMultiValue(this->_mvMapping, attrReader, map, saver); + uint32_t maxvc = attribute::loadFromEnumeratedMultiValue(this->_mvMapping, attrReader, map, vespalib::ConstArrayRef<uint32_t>(), saver); this->checkSetMaxValueCount(maxvc); return true; diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h index 321459078b9..55cd4f88c25 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h @@ -79,6 +79,7 @@ public: } virtual std::unique_ptr<Enumerator> make_enumerator() const = 0; + virtual std::unique_ptr<vespalib::datastore::EntryComparator> allocate_comparator() const = 0; }; } diff --git a/searchlib/src/vespa/searchlib/attribute/load_utils.cpp b/searchlib/src/vespa/searchlib/attribute/load_utils.cpp index 701c8eaf702..5e9bc80f46a 100644 --- a/searchlib/src/vespa/searchlib/attribute/load_utils.cpp +++ b/searchlib/src/vespa/searchlib/attribute/load_utils.cpp @@ -82,11 +82,11 @@ LoadUtils::loadUDAT(const AttributeVector& attr) #define INSTANTIATE_ARRAY(ValueType, Saver) \ -template uint32_t loadFromEnumeratedMultiValue(MultiValueMapping<Value<ValueType>> &, ReaderBase &, vespalib::ConstArrayRef<ValueType>, Saver) +template uint32_t loadFromEnumeratedMultiValue(MultiValueMapping<Value<ValueType>> &, ReaderBase &, vespalib::ConstArrayRef<ValueType>, vespalib::ConstArrayRef<uint32_t>, Saver) #define INSTANTIATE_WSET(ValueType, Saver) \ -template uint32_t loadFromEnumeratedMultiValue(MultiValueMapping<WeightedValue<ValueType>> &, ReaderBase &, vespalib::ConstArrayRef<ValueType>, Saver) +template uint32_t loadFromEnumeratedMultiValue(MultiValueMapping<WeightedValue<ValueType>> &, ReaderBase &, vespalib::ConstArrayRef<ValueType>, vespalib::ConstArrayRef<uint32_t>, Saver) #define INSTANTIATE_SINGLE(ValueType, Saver) \ -template void loadFromEnumeratedSingleValue(vespalib::RcuVectorBase<ValueType> &, vespalib::GenerationHolder &, ReaderBase &, vespalib::ConstArrayRef<ValueType>, Saver) +template void loadFromEnumeratedSingleValue(vespalib::RcuVectorBase<ValueType> &, vespalib::GenerationHolder &, ReaderBase &, vespalib::ConstArrayRef<ValueType>, vespalib::ConstArrayRef<uint32_t>, Saver) #define INSTANTIATE_SINGLE_ARRAY_WSET(ValueType, Saver) \ INSTANTIATE_SINGLE(ValueType, Saver); \ diff --git a/searchlib/src/vespa/searchlib/attribute/load_utils.h b/searchlib/src/vespa/searchlib/attribute/load_utils.h index cd9d98084d5..6833ab6b0b7 100644 --- a/searchlib/src/vespa/searchlib/attribute/load_utils.h +++ b/searchlib/src/vespa/searchlib/attribute/load_utils.h @@ -42,6 +42,7 @@ uint32_t loadFromEnumeratedMultiValue(MvMapping &mapping, ReaderBase &attrReader, vespalib::ConstArrayRef<typename MvMapping::MultiValueType::ValueType> enumValueToValueMap, + vespalib::ConstArrayRef<uint32_t> enum_value_remapping, Saver saver) __attribute((noinline)); /** @@ -54,6 +55,7 @@ loadFromEnumeratedSingleValue(Vector &vector, vespalib::GenerationHolder &genHolder, ReaderBase &attrReader, vespalib::ConstArrayRef<typename Vector::ValueType> enumValueToValueMap, + vespalib::ConstArrayRef<uint32_t> enum_value_remapping, Saver saver) __attribute((noinline)); } diff --git a/searchlib/src/vespa/searchlib/attribute/load_utils.hpp b/searchlib/src/vespa/searchlib/attribute/load_utils.hpp index 61d56cfa4d9..4f856314997 100644 --- a/searchlib/src/vespa/searchlib/attribute/load_utils.hpp +++ b/searchlib/src/vespa/searchlib/attribute/load_utils.hpp @@ -12,6 +12,7 @@ uint32_t loadFromEnumeratedMultiValue(MvMapping & mapping, ReaderBase & attrReader, vespalib::ConstArrayRef<typename MvMapping::MultiValueType::ValueType> enumValueToValueMap, + vespalib::ConstArrayRef<uint32_t> enum_value_remapping, Saver saver) { mapping.prepareLoadFromMultiValue(); @@ -30,6 +31,9 @@ loadFromEnumeratedMultiValue(MvMapping & mapping, for (uint32_t vci = 0; vci < valueCount; ++vci) { uint32_t enumValue = attrReader.getNextEnum(); assert(enumValue < enumValueToValueMap.size()); + if (!enum_value_remapping.empty()) { + enumValue = enum_value_remapping[enumValue]; + } int32_t weight = MultiValueType::_hasWeight ? attrReader.getNextWeight() : 1; indices.emplace_back(enumValueToValueMap[enumValue], weight); saver.save(enumValue, doc, weight); @@ -51,6 +55,7 @@ loadFromEnumeratedSingleValue(Vector &vector, vespalib::GenerationHolder &genHolder, ReaderBase &attrReader, vespalib::ConstArrayRef<typename Vector::ValueType> enumValueToValueMap, + vespalib::ConstArrayRef<uint32_t> enum_value_remapping, Saver saver) { uint32_t numDocs = attrReader.getEnumCount(); @@ -60,6 +65,9 @@ loadFromEnumeratedSingleValue(Vector &vector, for (uint32_t doc = 0; doc < numDocs; ++doc) { uint32_t enumValue = attrReader.getNextEnum(); assert(enumValue < enumValueToValueMap.size()); + if (!enum_value_remapping.empty()) { + enumValue = enum_value_remapping[enumValue]; + } vector.push_back(enumValueToValueMap[enumValue]); saver.save(enumValue, doc, 1); } diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index bd3a8adb56e..d320ecfaa85 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -99,7 +99,9 @@ MultiValueEnumAttribute<B, M>::load_enumerated_data(ReaderBase& attrReader, loader.reserve_loaded_enums(num_values); uint32_t maxvc = attribute::loadFromEnumeratedMultiValue(this->_mvMapping, attrReader, vespalib::ConstArrayRef<EnumIndex>(loader.get_enum_indexes()), + loader.get_enum_value_remapping(), attribute::SaveLoadedEnum(loader.get_loaded_enums())); + loader.free_enum_value_remapping(); loader.sort_loaded_enums(); this->checkSetMaxValueCount(maxvc); } @@ -112,7 +114,9 @@ MultiValueEnumAttribute<B, M>::load_enumerated_data(ReaderBase& attrReader, loader.allocate_enums_histogram(); uint32_t maxvc = attribute::loadFromEnumeratedMultiValue(this->_mvMapping, attrReader, vespalib::ConstArrayRef<EnumIndex>(loader.get_enum_indexes()), + loader.get_enum_value_remapping(), attribute::SaveEnumHist(loader.get_enums_histogram())); + loader.free_enum_value_remapping(); loader.set_ref_counts(); loader.build_dictionary(); loader.free_unused_values(); diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp index 3ca7423c38c..b0aa2dcb6c0 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp @@ -120,7 +120,7 @@ MultiValueNumericAttribute<B, M>::onLoadEnumerated(ReaderBase & attrReader) auto udatBuffer = attribute::LoadUtils::loadUDAT(*this); assert((udatBuffer->size() % sizeof(T)) == 0); vespalib::ConstArrayRef<T> map(reinterpret_cast<const T *>(udatBuffer->buffer()), udatBuffer->size() / sizeof(T)); - uint32_t maxvc = attribute::loadFromEnumeratedMultiValue(this->_mvMapping, attrReader, map, attribute::NoSaveLoadedEnum()); + uint32_t maxvc = attribute::loadFromEnumeratedMultiValue(this->_mvMapping, attrReader, map, vespalib::ConstArrayRef<uint32_t>(), attribute::NoSaveLoadedEnum()); this->checkSetMaxValueCount(maxvc); return true; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp index e17d41a5521..fffdbcde1bb 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp @@ -68,6 +68,7 @@ MultiValueNumericEnumAttribute<B, M>::onLoadEnumerated(ReaderBase &attrReader) if (this->hasPostings()) { auto loader = this->getEnumStore().make_enumerated_postings_loader(); loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.build_enum_value_remapping(); this->load_enumerated_data(attrReader, loader, numValues); if (numDocs > 0) { this->onAddDoc(numDocs - 1); @@ -76,6 +77,7 @@ MultiValueNumericEnumAttribute<B, M>::onLoadEnumerated(ReaderBase &attrReader) } else { auto loader = this->getEnumStore().make_enumerated_loader(); loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.build_enum_value_remapping(); this->load_enumerated_data(attrReader, loader); } return true; diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp index fd4b0365ca1..2af5bfdf225 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp @@ -57,7 +57,7 @@ ReferenceAttribute::~ReferenceAttribute() _referenceMappings.clearBuilder(); incGeneration(); // Force freeze const auto &store = _store; - const auto enumerator = _store.getEnumerator(); + const auto enumerator = _store.getEnumerator(true); enumerator.foreach_key([&store,this](EntryRef ref) { const Reference &entry = store.get(ref); _referenceMappings.clearMapping(entry); diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.cpp index d3c71796838..aa76967e4ed 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.cpp +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.cpp @@ -21,7 +21,7 @@ ReferenceAttributeSaver(GenerationHandler::Guard &&guard, : AttributeSaver(std::move(guard), header), _indices(std::move(indices)), _store(store), - _enumerator(store.getEnumerator()) + _enumerator(store.getEnumerator(true)) { } diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 4d91c60ef4e..96dda48c043 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -208,7 +208,9 @@ SingleValueEnumAttribute<B>::load_enumerated_data(ReaderBase& attrReader, getGenerationHolder(), attrReader, loader.get_enum_indexes(), + loader.get_enum_value_remapping(), attribute::SaveLoadedEnum(loader.get_loaded_enums())); + loader.free_enum_value_remapping(); loader.sort_loaded_enums(); } @@ -222,7 +224,9 @@ SingleValueEnumAttribute<B>::load_enumerated_data(ReaderBase& attrReader, getGenerationHolder(), attrReader, loader.get_enum_indexes(), + loader.get_enum_value_remapping(), attribute::SaveEnumHist(loader.get_enums_histogram())); + loader.free_enum_value_remapping(); loader.set_ref_counts(); loader.build_dictionary(); loader.free_unused_values(); diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp index 681c2af1f07..fd913f34c3a 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp @@ -119,7 +119,7 @@ SingleValueNumericAttribute<B>::onLoadEnumerated(ReaderBase &attrReader) vespalib::ConstArrayRef<T> map(reinterpret_cast<const T *>(udatBuffer->buffer()), udatBuffer->size() / sizeof(T)); attribute::loadFromEnumeratedSingleValue(_data, getGenerationHolder(), attrReader, - map, attribute::NoSaveLoadedEnum()); + map, vespalib::ConstArrayRef<uint32_t>(), attribute::NoSaveLoadedEnum()); return true; } diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp index 5fb587c908e..dc1a6b8f278 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp @@ -90,6 +90,7 @@ SingleValueNumericEnumAttribute<B>::onLoadEnumerated(ReaderBase &attrReader) if (this->hasPostings()) { auto loader = this->getEnumStore().make_enumerated_postings_loader(); loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.build_enum_value_remapping(); this->load_enumerated_data(attrReader, loader, numValues); if (numDocs > 0) { this->onAddDoc(numDocs - 1); @@ -98,6 +99,7 @@ SingleValueNumericEnumAttribute<B>::onLoadEnumerated(ReaderBase &attrReader) } else { auto loader = this->getEnumStore().make_enumerated_loader(); loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.build_enum_value_remapping(); this->load_enumerated_data(attrReader, loader); } return true; diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 56a644a68b1..a308fc06af0 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -308,6 +308,7 @@ StringAttribute::onLoadEnumerated(ReaderBase &attrReader) if (hasPostings()) { auto loader = this->getEnumStoreBase()->make_enumerated_postings_loader(); loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.build_enum_value_remapping(); load_enumerated_data(attrReader, loader, numValues); if (numDocs > 0) { onAddDoc(numDocs - 1); @@ -316,6 +317,7 @@ StringAttribute::onLoadEnumerated(ReaderBase &attrReader) } else { auto loader = this->getEnumStoreBase()->make_enumerated_loader(); loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.build_enum_value_remapping(); load_enumerated_data(attrReader, loader); } return true; diff --git a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp index e61713e40d8..d9b3d25a908 100644 --- a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp +++ b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp @@ -133,7 +133,7 @@ struct TestBase : public ::testing::Test { } size_t entrySize() const { return sizeof(ValueType); } auto getBuilder(uint32_t uniqueValuesHint) { return store.getBuilder(uniqueValuesHint); } - auto getEnumerator() { return store.getEnumerator(); } + auto getEnumerator(bool sort_unique_values) { return store.getEnumerator(sort_unique_values); } size_t get_reserved(EntryRef ref) { return store.bufferState(ref).getTypeHandler()->getReservedElements(getBufferId(ref)); } @@ -404,7 +404,7 @@ TYPED_TEST(TestBase, store_can_be_enumerated) this->remove(this->add(this->values()[2])); this->trimHoldLists(); - auto enumerator = this->getEnumerator(); + auto enumerator = this->getEnumerator(true); std::vector<uint32_t> refs; enumerator.foreach_key([&](EntryRef ref) { refs.push_back(ref.ref()); }); std::vector<uint32_t> expRefs; @@ -445,7 +445,7 @@ TEST_F(DoubleTest, nan_is_handled) EXPECT_FALSE(std::signbit(store.get(refs[2]))); EXPECT_TRUE(std::isinf(store.get(refs[3]))); EXPECT_TRUE(std::signbit(store.get(refs[3]))); - auto enumerator = getEnumerator(); + auto enumerator = getEnumerator(true); enumerator.enumerateValues(); std::vector<uint32_t> enumerated; for (auto &ref : refs) { diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.h b/vespalib/src/vespa/vespalib/datastore/unique_store.h index 565c1ceee61..d0a12ddd290 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.h @@ -76,7 +76,7 @@ public: uint32_t getNumUniques() const; Builder getBuilder(uint32_t uniqueValuesHint); - Enumerator getEnumerator() const; + Enumerator getEnumerator(bool sort_unique_values) const; // Should only be used for unit testing const BufferState &bufferState(EntryRef ref) const; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp index c5e4eb0d6fd..a883b2351de 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp @@ -229,9 +229,9 @@ UniqueStore<EntryT, RefT, Compare, Allocator>::getBuilder(uint32_t uniqueValuesH template <typename EntryT, typename RefT, typename Compare, typename Allocator> typename UniqueStore<EntryT, RefT, Compare, Allocator>::Enumerator -UniqueStore<EntryT, RefT, Compare, Allocator>::getEnumerator() const +UniqueStore<EntryT, RefT, Compare, Allocator>::getEnumerator(bool sort_unique_values) const { - return Enumerator(*_dict, _store); + return Enumerator(*_dict, _store, sort_unique_values); } template <typename EntryT, typename RefT, typename Compare, typename Allocator> diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h index be591649310..d7ea449754c 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h @@ -31,7 +31,7 @@ private: void allocate_enum_values(); public: - UniqueStoreEnumerator(const IUniqueStoreDictionary &dict, const DataStoreBase &store); + UniqueStoreEnumerator(const IUniqueStoreDictionary &dict, const DataStoreBase &store, bool sort_unique_values); ~UniqueStoreEnumerator(); void enumerateValue(EntryRef ref); void enumerateValues(); diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp index 7a43b16e66a..378fc54750d 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp @@ -9,14 +9,16 @@ namespace vespalib::datastore { template <typename RefT> -UniqueStoreEnumerator<RefT>::UniqueStoreEnumerator(const IUniqueStoreDictionary &dict, const DataStoreBase &store) +UniqueStoreEnumerator<RefT>::UniqueStoreEnumerator(const IUniqueStoreDictionary &dict, const DataStoreBase &store, bool sort_unique_values) : _dict_snapshot(dict.get_read_snapshot()), _store(store), _enumValues(), _next_enum_val(1) { _dict_snapshot->fill(); - _dict_snapshot->sort(); + if (sort_unique_values) { + _dict_snapshot->sort(); + } allocate_enum_values(); } |