summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-02-15 11:12:54 +0100
committerTor Egge <Tor.Egge@online.no>2024-02-15 11:12:54 +0100
commit2b368f3d23273add4cd042536b0de1beb6d86276 (patch)
treeddea9df57de01ad3ca24fb0a8db3723f7fdeb212 /searchlib
parentd494044b85acd33bc9ecd4db56611e03032556d3 (diff)
Add search::predicate::PredicateIndexSaver.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/predicate/document_features_store_test.cpp1
-rw-r--r--searchlib/src/tests/predicate/predicate_index_test.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/predicate/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/predicate/document_features_store.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/predicate/document_features_store.h3
-rw-r--r--searchlib/src/vespa/searchlib/predicate/document_features_store_saver.h7
-rw-r--r--searchlib/src/vespa/searchlib/predicate/i_saver.h18
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_index.cpp30
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_index.h3
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_index_saver.cpp37
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_index_saver.h30
-rw-r--r--searchlib/src/vespa/searchlib/predicate/simple_index.h4
-rw-r--r--searchlib/src/vespa/searchlib/predicate/simple_index.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/predicate/simple_index_saver.h7
15 files changed, 133 insertions, 37 deletions
diff --git a/searchlib/src/tests/predicate/document_features_store_test.cpp b/searchlib/src/tests/predicate/document_features_store_test.cpp
index 11ca20349c3..0e77a1be878 100644
--- a/searchlib/src/tests/predicate/document_features_store_test.cpp
+++ b/searchlib/src/tests/predicate/document_features_store_test.cpp
@@ -26,6 +26,7 @@ const uint32_t doc_id = 42;
void
save_document_features_store(DocumentFeaturesStore& store, vespalib::DataBuffer& buffer)
{
+ store.commit();
DataBufferWriter writer(buffer);
store.make_saver()->save(writer);
writer.flush();
diff --git a/searchlib/src/tests/predicate/predicate_index_test.cpp b/searchlib/src/tests/predicate/predicate_index_test.cpp
index 40b650e489a..6351807f4fc 100644
--- a/searchlib/src/tests/predicate/predicate_index_test.cpp
+++ b/searchlib/src/tests/predicate/predicate_index_test.cpp
@@ -4,6 +4,7 @@
#include <vespa/searchlib/predicate/predicate_index.h>
#include <vespa/searchlib/predicate/simple_index.hpp>
#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vespa/searchlib/util/data_buffer_writer.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/searchlib/attribute/predicate_attribute.h>
#include <vespa/vespalib/util/stringfmt.h>
@@ -32,6 +33,15 @@ vespalib::GenerationHolder generation_holder;
DummyDocIdLimitProvider dummy_provider;
SimpleIndexConfig simple_index_config;
+void
+save_predicate_index(PredicateIndex& index, vespalib::DataBuffer& buffer)
+{
+ index.commit();
+ DataBufferWriter writer(buffer);
+ index.make_saver()->save(writer);
+ writer.flush();
+}
+
TEST("require that PredicateIndex can index empty documents") {
PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10);
EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size());
@@ -292,7 +302,7 @@ TEST("require that PredicateIndex can be (de)serialized") {
index.commit();
vespalib::DataBuffer buffer;
- index.serialize(buffer);
+ save_predicate_index(index, buffer);
uint32_t doc_id_limit;
DocIdLimitFinder finder(doc_id_limit);
PredicateIndex index2(generation_holder, dummy_provider, simple_index_config,
@@ -336,7 +346,7 @@ TEST("require that DocumentFeaturesStore is restored on deserialization") {
EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
indexFeature(index, doc_id, min_feature, {{hash, interval}}, {{hash2, bounds}});
vespalib::DataBuffer buffer;
- index.serialize(buffer);
+ save_predicate_index(index, buffer);
uint32_t doc_id_limit;
DocIdLimitFinder finder(doc_id_limit);
PredicateIndex index2(generation_holder, dummy_provider, simple_index_config,
diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp
index ddf71063306..1c273f17176 100644
--- a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp
@@ -6,7 +6,9 @@
#include "load_utils.h"
#include <vespa/document/fieldvalue/predicatefieldvalue.h>
#include <vespa/document/predicate/predicate.h>
+#include <vespa/searchlib/predicate/i_saver.h>
#include <vespa/searchlib/predicate/predicate_index.h>
+#include <vespa/searchlib/util/data_buffer_writer.h>
#include <vespa/searchlib/util/fileutil.h>
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/vespalib/data/slime/slime.h>
@@ -140,9 +142,14 @@ PredicateAttribute::before_inc_generation(generation_t current_gen)
void
PredicateAttribute::onSave(IAttributeSaveTarget &saveTarget) {
- LOG(info, "Saving predicate attribute version %d", getVersion());
+ LOG(info, "Saving predicate attribute version %d name '%s'", getVersion(), getName().c_str());
IAttributeSaveTarget::Buffer buffer(saveTarget.datWriter().allocBuf(4_Ki));
- _index->serialize(*buffer);
+ {
+ DataBufferWriter writer(*buffer);
+ auto saver = _index->make_saver();
+ saver->save(writer);
+ writer.flush();
+ }
uint32_t highest_doc_id = static_cast<uint32_t>(_min_feature.size() - 1);
buffer->writeInt32(highest_doc_id);
for (size_t i = 1; i <= highest_doc_id; ++i) {
diff --git a/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt b/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt
index 48e79648675..4dc227433e1 100644
--- a/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt
@@ -4,6 +4,7 @@ vespa_add_library(searchlib_predicate OBJECT
document_features_store.cpp
document_features_store_saver.cpp
predicate_index.cpp
+ predicate_index_saver.cpp
predicate_interval.cpp
predicate_interval_store.cpp
predicate_range_expander.cpp
diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp
index a4e415501b7..3237dc84fc7 100644
--- a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp
+++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp
@@ -274,7 +274,7 @@ DocumentFeaturesStore::getMemoryUsage() const {
return usage;
}
-std::unique_ptr<DocumentFeaturesStoreSaver>
+std::unique_ptr<ISaver>
DocumentFeaturesStore::make_saver() const
{
return std::make_unique<DocumentFeaturesStoreSaver>(*this);
diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.h b/searchlib/src/vespa/searchlib/predicate/document_features_store.h
index d12e703957a..9b268c375e6 100644
--- a/searchlib/src/vespa/searchlib/predicate/document_features_store.h
+++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.h
@@ -15,6 +15,7 @@
namespace search::predicate {
class DocumentFeaturesStoreSaver;
+class ISaver;
/**
* Class used to track the {featureId, docId} pairs that are inserted
@@ -95,7 +96,7 @@ public:
void assign_generation(generation_t current_gen);
vespalib::MemoryUsage getMemoryUsage() const;
- std::unique_ptr<DocumentFeaturesStoreSaver> make_saver() const;
+ std::unique_ptr<ISaver> make_saver() const;
};
}
diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store_saver.h b/searchlib/src/vespa/searchlib/predicate/document_features_store_saver.h
index 630ac3900f5..289b1fc076f 100644
--- a/searchlib/src/vespa/searchlib/predicate/document_features_store_saver.h
+++ b/searchlib/src/vespa/searchlib/predicate/document_features_store_saver.h
@@ -2,6 +2,7 @@
#pragma once
+#include "i_saver.h"
#include "document_features_store.h"
namespace search { class BufferWriter; }
@@ -12,7 +13,7 @@ namespace search::predicate {
* Class used to save a DocumentFeaturesStore instance, streaming the
* serialized data via a BufferWriter.
*/
-class DocumentFeaturesStoreSaver {
+class DocumentFeaturesStoreSaver : public ISaver {
using RefsVector = DocumentFeaturesStore::RefsVector;
using FeaturesStore = DocumentFeaturesStore::FeaturesStore;
using RangesStore = DocumentFeaturesStore::RangesStore;
@@ -26,8 +27,8 @@ class DocumentFeaturesStoreSaver {
public:
DocumentFeaturesStoreSaver(const DocumentFeaturesStore& store);
- ~DocumentFeaturesStoreSaver();
- void save(BufferWriter& writer) const;
+ ~DocumentFeaturesStoreSaver() override;
+ void save(BufferWriter& writer) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/predicate/i_saver.h b/searchlib/src/vespa/searchlib/predicate/i_saver.h
new file mode 100644
index 00000000000..82c97d5ceb9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/i_saver.h
@@ -0,0 +1,18 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search { class BufferWriter; }
+
+namespace search::predicate {
+
+/*
+ * Interface class for saving (parts of) predicate index.
+ */
+class ISaver {
+public:
+ virtual ~ISaver() = default;
+ virtual void save(BufferWriter& writer) const = 0;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp
index 296a6ff5c2e..6038f0da68f 100644
--- a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp
@@ -3,6 +3,7 @@
#include "predicate_index.h"
#include "document_features_store_saver.h"
#include "predicate_hash.h"
+#include "predicate_index_saver.h"
#include "simple_index_saver.h"
#include <vespa/searchlib/util/data_buffer_writer.h>
#include <vespa/vespalib/datastore/buffer_type.hpp>
@@ -127,27 +128,14 @@ PredicateIndex::PredicateIndex(GenerationHolder &genHolder,
PredicateIndex::~PredicateIndex() = default;
-void
-PredicateIndex::serialize(DataBuffer &buffer) const {
- {
- auto saver = _features_store.make_saver();
- DataBufferWriter writer(buffer);
- saver->save(writer);
- writer.flush();
- }
- buffer.writeInt16(_arity);
- buffer.writeInt32(_zero_constraint_docs.size());
- for (auto it = _zero_constraint_docs.begin(); it.valid(); ++it) {
- buffer.writeInt32(it.getKey());
- }
- {
- DataBufferWriter writer(buffer);
- auto interval_saver = _interval_index.make_saver(std::make_unique<IntervalSaver<Interval>>(_interval_store));
- interval_saver->save(writer);
- auto bounds_saver = _bounds_index.make_saver(std::make_unique<IntervalSaver<IntervalWithBounds>>(_interval_store));
- bounds_saver->save(writer);
- writer.flush();
- }
+std::unique_ptr<ISaver>
+PredicateIndex::make_saver() const
+{
+ return std::make_unique<PredicateIndexSaver>(_features_store.make_saver(),
+ _arity,
+ _zero_constraint_docs.getFrozenView(),
+ _interval_index.make_saver(std::make_unique<IntervalSaver<Interval>>(_interval_store)),
+ _bounds_index.make_saver(std::make_unique<IntervalSaver<IntervalWithBounds>>(_interval_store)));
}
void
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.h b/searchlib/src/vespa/searchlib/predicate/predicate_index.h
index 439187bccd7..a5e0e5d0509 100644
--- a/searchlib/src/vespa/searchlib/predicate/predicate_index.h
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.h
@@ -14,6 +14,7 @@
namespace search::predicate {
+class ISaver;
struct PredicateTreeAnnotations;
/**
@@ -66,7 +67,7 @@ public:
SimpleIndexDeserializeObserver<> & observer, uint32_t version);
~PredicateIndex() override;
- void serialize(vespalib::DataBuffer &buffer) const;
+ std::unique_ptr<ISaver> make_saver() const;
void onDeserializationCompleted();
void indexEmptyDocument(uint32_t doc_id);
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.cpp
new file mode 100644
index 00000000000..3b704b6082f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.cpp
@@ -0,0 +1,37 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "predicate_index_saver.h"
+#include "nbo_write.h"
+
+namespace search::predicate {
+
+PredicateIndexSaver::PredicateIndexSaver(std::unique_ptr<ISaver> features_store_saver,
+ uint32_t arity,
+ ZeroConstraintDocs zero_constraint_docs,
+ std::unique_ptr<ISaver> interval_index_saver,
+ std::unique_ptr<ISaver> bounds_index_saver)
+ : ISaver(),
+ _features_store_saver(std::move(features_store_saver)),
+ _arity(arity),
+ _zero_constraint_docs(std::move(zero_constraint_docs)),
+ _interval_index_saver(std::move(interval_index_saver)),
+ _bounds_index_saver(std::move(bounds_index_saver))
+{
+}
+
+PredicateIndexSaver::~PredicateIndexSaver() = default;
+
+void
+PredicateIndexSaver::save(BufferWriter& writer) const
+{
+ _features_store_saver->save(writer);
+ nbo_write<uint16_t>(writer, _arity);
+ nbo_write<uint32_t>(writer, _zero_constraint_docs.size());
+ for (auto it = _zero_constraint_docs.begin(); it.valid(); ++it) {
+ nbo_write<uint32_t>(writer, it.getKey());
+ }
+ _interval_index_saver->save(writer);
+ _bounds_index_saver->save(writer);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.h b/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.h
new file mode 100644
index 00000000000..faec9d2cef3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_index_saver.h
@@ -0,0 +1,30 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "i_saver.h"
+#include "common.h"
+
+namespace search::predicate {
+
+/*
+ * Class used to save a PredicateIndex instance, streaming the
+ * serialized data via a BufferWriter.
+ */
+class PredicateIndexSaver : public ISaver {
+ std::unique_ptr<ISaver> _features_store_saver;
+ uint32_t _arity;
+ ZeroConstraintDocs _zero_constraint_docs;
+ std::unique_ptr<ISaver> _interval_index_saver;
+ std::unique_ptr<ISaver> _bounds_index_saver;
+public:
+ PredicateIndexSaver(std::unique_ptr<ISaver> features_store_saver,
+ uint32_t _arity,
+ ZeroConstraintDocs zero_constraint_docs,
+ std::unique_ptr<ISaver> interval_index_saver,
+ std::unique_ptr<ISaver> bounds_index_saver);
+ ~PredicateIndexSaver() override;
+ void save(BufferWriter& writer) const override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.h b/searchlib/src/vespa/searchlib/predicate/simple_index.h
index 1acfda24eeb..0e3c9828b21 100644
--- a/searchlib/src/vespa/searchlib/predicate/simple_index.h
+++ b/searchlib/src/vespa/searchlib/predicate/simple_index.h
@@ -12,7 +12,7 @@ namespace search { class BufferWriter; }
namespace search::predicate {
-template <typename, typename, typename> class SimpleIndexSaver;
+class ISaver;
template <typename Key = uint64_t, typename DocId = uint32_t>
struct SimpleIndexDeserializeObserver {
@@ -218,7 +218,7 @@ public:
}
- std::unique_ptr<SimpleIndexSaver<Posting, Key, DocId>> make_saver(std::unique_ptr<PostingSaver<Posting>> subsaver) const;
+ std::unique_ptr<ISaver> make_saver(std::unique_ptr<PostingSaver<Posting>> subsaver) const;
};
template<typename Posting, typename Key, typename DocId>
diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp
index b0a65622d86..5c5afedfe90 100644
--- a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp
+++ b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp
@@ -301,7 +301,7 @@ SimpleIndex<Posting, Key, DocId>::getMemoryUsage() const {
};
template <typename Posting, typename Key, typename DocId>
-std::unique_ptr<SimpleIndexSaver<Posting, Key, DocId>>
+std::unique_ptr<ISaver>
SimpleIndex<Posting, Key, DocId>::make_saver(std::unique_ptr<PostingSaver<Posting>> subsaver) const
{
return std::make_unique<SimpleIndexSaver<Posting, Key, DocId>>(_dictionary, _btree_posting_lists, std::move(subsaver));
diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index_saver.h b/searchlib/src/vespa/searchlib/predicate/simple_index_saver.h
index e312f42931d..eefaf7c79f4 100644
--- a/searchlib/src/vespa/searchlib/predicate/simple_index_saver.h
+++ b/searchlib/src/vespa/searchlib/predicate/simple_index_saver.h
@@ -2,6 +2,7 @@
#pragma once
+#include "i_saver.h"
#include "simple_index.h"
namespace search::predicate {
@@ -12,7 +13,7 @@ namespace search::predicate {
*/
template <typename Posting,
typename Key = uint64_t, typename DocId = uint32_t>
-class SimpleIndexSaver
+class SimpleIndexSaver : public ISaver
{
using Source = SimpleIndex<Posting,Key,DocId>;
using Dictionary = Source::Dictionary;
@@ -23,8 +24,8 @@ class SimpleIndexSaver
std::unique_ptr<PostingSaver<Posting>> _subsaver;
public:
SimpleIndexSaver(const Dictionary& dictionary, const BTreeStore& btree_posting_lists, std::unique_ptr<PostingSaver<Posting>> _subsaver);
- ~SimpleIndexSaver();
- void save(BufferWriter& writer) const;
+ ~SimpleIndexSaver() override;
+ void save(BufferWriter& writer) const override;
};
}