diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-02-15 11:12:54 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-02-15 11:12:54 +0100 |
commit | 2b368f3d23273add4cd042536b0de1beb6d86276 (patch) | |
tree | ddea9df57de01ad3ca24fb0a8db3723f7fdeb212 /searchlib/src | |
parent | d494044b85acd33bc9ecd4db56611e03032556d3 (diff) |
Add search::predicate::PredicateIndexSaver.
Diffstat (limited to 'searchlib/src')
15 files changed, 133 insertions, 37 deletions
diff --git a/searchlib/src/tests/predicate/document_features_store_test.cpp b/searchlib/src/tests/predicate/document_features_store_test.cpp index 11ca20349c3..0e77a1be878 100644 --- a/searchlib/src/tests/predicate/document_features_store_test.cpp +++ b/searchlib/src/tests/predicate/document_features_store_test.cpp @@ -26,6 +26,7 @@ const uint32_t doc_id = 42; void save_document_features_store(DocumentFeaturesStore& store, vespalib::DataBuffer& buffer) { + store.commit(); DataBufferWriter writer(buffer); store.make_saver()->save(writer); writer.flush(); diff --git a/searchlib/src/tests/predicate/predicate_index_test.cpp b/searchlib/src/tests/predicate/predicate_index_test.cpp index 40b650e489a..6351807f4fc 100644 --- a/searchlib/src/tests/predicate/predicate_index_test.cpp +++ b/searchlib/src/tests/predicate/predicate_index_test.cpp @@ -4,6 +4,7 @@ #include <vespa/searchlib/predicate/predicate_index.h> #include <vespa/searchlib/predicate/simple_index.hpp> #include <vespa/searchlib/predicate/predicate_tree_annotator.h> +#include <vespa/searchlib/util/data_buffer_writer.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/attribute/predicate_attribute.h> #include <vespa/vespalib/util/stringfmt.h> @@ -32,6 +33,15 @@ vespalib::GenerationHolder generation_holder; DummyDocIdLimitProvider dummy_provider; SimpleIndexConfig simple_index_config; +void +save_predicate_index(PredicateIndex& index, vespalib::DataBuffer& buffer) +{ + index.commit(); + DataBufferWriter writer(buffer); + index.make_saver()->save(writer); + writer.flush(); +} + TEST("require that PredicateIndex can index empty documents") { PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); @@ -292,7 +302,7 @@ TEST("require that PredicateIndex can be (de)serialized") { index.commit(); vespalib::DataBuffer buffer; - index.serialize(buffer); + save_predicate_index(index, buffer); uint32_t doc_id_limit; DocIdLimitFinder finder(doc_id_limit); PredicateIndex index2(generation_holder, dummy_provider, simple_index_config, @@ -336,7 +346,7 @@ TEST("require that DocumentFeaturesStore is restored on deserialization") { EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); indexFeature(index, doc_id, min_feature, {{hash, interval}}, {{hash2, bounds}}); vespalib::DataBuffer buffer; - index.serialize(buffer); + save_predicate_index(index, buffer); uint32_t doc_id_limit; DocIdLimitFinder finder(doc_id_limit); PredicateIndex index2(generation_holder, dummy_provider, simple_index_config, diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp index ddf71063306..1c273f17176 100644 --- a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp @@ -6,7 +6,9 @@ #include "load_utils.h" #include <vespa/document/fieldvalue/predicatefieldvalue.h> #include <vespa/document/predicate/predicate.h> +#include <vespa/searchlib/predicate/i_saver.h> #include <vespa/searchlib/predicate/predicate_index.h> +#include <vespa/searchlib/util/data_buffer_writer.h> #include <vespa/searchlib/util/fileutil.h> #include <vespa/searchcommon/attribute/config.h> #include <vespa/vespalib/data/slime/slime.h> @@ -140,9 +142,14 @@ PredicateAttribute::before_inc_generation(generation_t current_gen) void PredicateAttribute::onSave(IAttributeSaveTarget &saveTarget) { - LOG(info, "Saving predicate attribute version %d", getVersion()); + LOG(info, "Saving predicate attribute version %d name '%s'", getVersion(), getName().c_str()); IAttributeSaveTarget::Buffer buffer(saveTarget.datWriter().allocBuf(4_Ki)); - _index->serialize(*buffer); + { + DataBufferWriter writer(*buffer); + auto saver = _index->make_saver(); + saver->save(writer); + writer.flush(); + } uint32_t highest_doc_id = static_cast<uint32_t>(_min_feature.size() - 1); buffer->writeInt32(highest_doc_id); for (size_t i = 1; i <= highest_doc_id; ++i) { diff --git a/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt b/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt index 48e79648675..4dc227433e1 100644 --- a/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt @@ -4,6 +4,7 @@ vespa_add_library(searchlib_predicate OBJECT document_features_store.cpp document_features_store_saver.cpp predicate_index.cpp + predicate_index_saver.cpp predicate_interval.cpp predicate_interval_store.cpp predicate_range_expander.cpp diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp index a4e415501b7..3237dc84fc7 100644 --- a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp +++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp @@ -274,7 +274,7 @@ DocumentFeaturesStore::getMemoryUsage() const { return usage; } -std::unique_ptr<DocumentFeaturesStoreSaver> +std::unique_ptr<ISaver> DocumentFeaturesStore::make_saver() const { return std::make_unique<DocumentFeaturesStoreSaver>(*this); diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.h b/searchlib/src/vespa/searchlib/predicate/document_features_store.h index d12e703957a..9b268c375e6 100644 --- a/searchlib/src/vespa/searchlib/predicate/document_features_store.h +++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.h @@ -15,6 +15,7 @@ namespace search::predicate { class DocumentFeaturesStoreSaver; +class ISaver; /** * Class used to track the {featureId, docId} pairs that are inserted @@ -95,7 +96,7 @@ public: void assign_generation(generation_t current_gen); vespalib::MemoryUsage getMemoryUsage() const; - std::unique_ptr<DocumentFeaturesStoreSaver> make_saver() const; + std::unique_ptr<ISaver> make_saver() const; }; } diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store_saver.h b/searchlib/src/vespa/searchlib/predicate/document_features_store_saver.h index 630ac3900f5..289b1fc076f 100644 --- a/searchlib/src/vespa/searchlib/predicate/document_features_store_saver.h +++ b/searchlib/src/vespa/searchlib/predicate/document_features_store_saver.h @@ -2,6 +2,7 @@ #pragma once +#include "i_saver.h" #include "document_features_store.h" namespace search { class BufferWriter; } @@ -12,7 +13,7 @@ namespace search::predicate { * Class used to save a DocumentFeaturesStore instance, streaming the * serialized data via a BufferWriter. */ -class DocumentFeaturesStoreSaver { +class DocumentFeaturesStoreSaver : public ISaver { using RefsVector = DocumentFeaturesStore::RefsVector; using FeaturesStore = DocumentFeaturesStore::FeaturesStore; using RangesStore = DocumentFeaturesStore::RangesStore; @@ -26,8 +27,8 @@ class DocumentFeaturesStoreSaver { public: DocumentFeaturesStoreSaver(const DocumentFeaturesStore& store); - ~DocumentFeaturesStoreSaver(); - void save(BufferWriter& writer) const; + ~DocumentFeaturesStoreSaver() override; + void save(BufferWriter& writer) const override; }; } diff --git a/searchlib/src/vespa/searchlib/predicate/i_saver.h b/searchlib/src/vespa/searchlib/predicate/i_saver.h new file mode 100644 index 00000000000..82c97d5ceb9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/i_saver.h @@ -0,0 +1,18 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { class BufferWriter; } + +namespace search::predicate { + +/* + * Interface class for saving (parts of) predicate index. + */ +class ISaver { +public: + virtual ~ISaver() = default; + virtual void save(BufferWriter& writer) const = 0; +}; + +} diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp index 296a6ff5c2e..6038f0da68f 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp @@ -3,6 +3,7 @@ #include "predicate_index.h" #include "document_features_store_saver.h" #include "predicate_hash.h" +#include "predicate_index_saver.h" #include "simple_index_saver.h" #include <vespa/searchlib/util/data_buffer_writer.h> #include <vespa/vespalib/datastore/buffer_type.hpp> @@ -127,27 +128,14 @@ PredicateIndex::PredicateIndex(GenerationHolder &genHolder, PredicateIndex::~PredicateIndex() = default; -void -PredicateIndex::serialize(DataBuffer &buffer) const { - { - auto saver = _features_store.make_saver(); - DataBufferWriter writer(buffer); - saver->save(writer); - writer.flush(); - } - buffer.writeInt16(_arity); - buffer.writeInt32(_zero_constraint_docs.size()); - for (auto it = _zero_constraint_docs.begin(); it.valid(); ++it) { - buffer.writeInt32(it.getKey()); - } - { - DataBufferWriter writer(buffer); - auto interval_saver = _interval_index.make_saver(std::make_unique<IntervalSaver<Interval>>(_interval_store)); - interval_saver->save(writer); - auto bounds_saver = _bounds_index.make_saver(std::make_unique<IntervalSaver<IntervalWithBounds>>(_interval_store)); - bounds_saver->save(writer); - writer.flush(); - } +std::unique_ptr<ISaver> +PredicateIndex::make_saver() const +{ + return std::make_unique<PredicateIndexSaver>(_features_store.make_saver(), + _arity, + _zero_constraint_docs.getFrozenView(), + _interval_index.make_saver(std::make_unique<IntervalSaver<Interval>>(_interval_store)), + _bounds_index.make_saver(std::make_unique<IntervalSaver<IntervalWithBounds>>(_interval_store))); } void diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.h b/searchlib/src/vespa/searchlib/predicate/predicate_index.h index 439187bccd7..a5e0e5d0509 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_index.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.h @@ -14,6 +14,7 @@ namespace search::predicate { +class ISaver; struct PredicateTreeAnnotations; /** @@ -66,7 +67,7 @@ public: SimpleIndexDeserializeObserver<> & observer, uint32_t version); ~PredicateIndex() override; - void serialize(vespalib::DataBuffer &buffer) const; + std::unique_ptr<ISaver> make_saver() const; void onDeserializationCompleted(); void indexEmptyDocument(uint32_t doc_id); diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.cpp new file mode 100644 index 00000000000..3b704b6082f --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.cpp @@ -0,0 +1,37 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "predicate_index_saver.h" +#include "nbo_write.h" + +namespace search::predicate { + +PredicateIndexSaver::PredicateIndexSaver(std::unique_ptr<ISaver> features_store_saver, + uint32_t arity, + ZeroConstraintDocs zero_constraint_docs, + std::unique_ptr<ISaver> interval_index_saver, + std::unique_ptr<ISaver> bounds_index_saver) + : ISaver(), + _features_store_saver(std::move(features_store_saver)), + _arity(arity), + _zero_constraint_docs(std::move(zero_constraint_docs)), + _interval_index_saver(std::move(interval_index_saver)), + _bounds_index_saver(std::move(bounds_index_saver)) +{ +} + +PredicateIndexSaver::~PredicateIndexSaver() = default; + +void +PredicateIndexSaver::save(BufferWriter& writer) const +{ + _features_store_saver->save(writer); + nbo_write<uint16_t>(writer, _arity); + nbo_write<uint32_t>(writer, _zero_constraint_docs.size()); + for (auto it = _zero_constraint_docs.begin(); it.valid(); ++it) { + nbo_write<uint32_t>(writer, it.getKey()); + } + _interval_index_saver->save(writer); + _bounds_index_saver->save(writer); +} + +} diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.h b/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.h new file mode 100644 index 00000000000..faec9d2cef3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.h @@ -0,0 +1,30 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "i_saver.h" +#include "common.h" + +namespace search::predicate { + +/* + * Class used to save a PredicateIndex instance, streaming the + * serialized data via a BufferWriter. + */ +class PredicateIndexSaver : public ISaver { + std::unique_ptr<ISaver> _features_store_saver; + uint32_t _arity; + ZeroConstraintDocs _zero_constraint_docs; + std::unique_ptr<ISaver> _interval_index_saver; + std::unique_ptr<ISaver> _bounds_index_saver; +public: + PredicateIndexSaver(std::unique_ptr<ISaver> features_store_saver, + uint32_t _arity, + ZeroConstraintDocs zero_constraint_docs, + std::unique_ptr<ISaver> interval_index_saver, + std::unique_ptr<ISaver> bounds_index_saver); + ~PredicateIndexSaver() override; + void save(BufferWriter& writer) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.h b/searchlib/src/vespa/searchlib/predicate/simple_index.h index 1acfda24eeb..0e3c9828b21 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.h +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.h @@ -12,7 +12,7 @@ namespace search { class BufferWriter; } namespace search::predicate { -template <typename, typename, typename> class SimpleIndexSaver; +class ISaver; template <typename Key = uint64_t, typename DocId = uint32_t> struct SimpleIndexDeserializeObserver { @@ -218,7 +218,7 @@ public: } - std::unique_ptr<SimpleIndexSaver<Posting, Key, DocId>> make_saver(std::unique_ptr<PostingSaver<Posting>> subsaver) const; + std::unique_ptr<ISaver> make_saver(std::unique_ptr<PostingSaver<Posting>> subsaver) const; }; template<typename Posting, typename Key, typename DocId> diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp index b0a65622d86..5c5afedfe90 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp @@ -301,7 +301,7 @@ SimpleIndex<Posting, Key, DocId>::getMemoryUsage() const { }; template <typename Posting, typename Key, typename DocId> -std::unique_ptr<SimpleIndexSaver<Posting, Key, DocId>> +std::unique_ptr<ISaver> SimpleIndex<Posting, Key, DocId>::make_saver(std::unique_ptr<PostingSaver<Posting>> subsaver) const { return std::make_unique<SimpleIndexSaver<Posting, Key, DocId>>(_dictionary, _btree_posting_lists, std::move(subsaver)); diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index_saver.h b/searchlib/src/vespa/searchlib/predicate/simple_index_saver.h index e312f42931d..eefaf7c79f4 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index_saver.h +++ b/searchlib/src/vespa/searchlib/predicate/simple_index_saver.h @@ -2,6 +2,7 @@ #pragma once +#include "i_saver.h" #include "simple_index.h" namespace search::predicate { @@ -12,7 +13,7 @@ namespace search::predicate { */ template <typename Posting, typename Key = uint64_t, typename DocId = uint32_t> -class SimpleIndexSaver +class SimpleIndexSaver : public ISaver { using Source = SimpleIndex<Posting,Key,DocId>; using Dictionary = Source::Dictionary; @@ -23,8 +24,8 @@ class SimpleIndexSaver std::unique_ptr<PostingSaver<Posting>> _subsaver; public: SimpleIndexSaver(const Dictionary& dictionary, const BTreeStore& btree_posting_lists, std::unique_ptr<PostingSaver<Posting>> _subsaver); - ~SimpleIndexSaver(); - void save(BufferWriter& writer) const; + ~SimpleIndexSaver() override; + void save(BufferWriter& writer) const override; }; } |