summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-03-09 14:25:13 +0100
committerGitHub <noreply@github.com>2023-03-09 14:25:13 +0100
commitc96886fe1c28babb62b2e837c6352d77edf3ee7e (patch)
treeb11a4f5f9e203d317e0f38e8ac4b615d70cc7542
parentc61498eb1beac92dc2ca32659b3234f8a87c8a72 (diff)
parentfc689e6c8ca15644d04197a908a2fb0d2f9eb74b (diff)
Merge pull request #26384 from vespa-engine/toregge/add-saver-and-loader-for-single-raw-attribute-vector
Add saver and loader for SingleRawAttribute.
-rw-r--r--searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp44
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.h29
-rw-r--r--searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.cpp33
-rw-r--r--searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.h26
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.cpp51
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.h36
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.cpp42
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.h32
13 files changed, 350 insertions, 4 deletions
diff --git a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp
index 82e4fd065cf..9f728cc0482 100644
--- a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp
+++ b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp
@@ -4,6 +4,7 @@
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/vespalib/gtest/gtest.h>
+#include <filesystem>
#include <memory>
using search::AttributeFactory;
@@ -19,6 +20,8 @@ std::vector<char> empty;
vespalib::string hello("hello");
vespalib::ConstArrayRef<char> raw_hello(hello.c_str(), hello.size());
+std::filesystem::path attr_path("raw.dat");
+
std::vector<char> as_vector(vespalib::stringref value) {
return {value.data(), value.data() + value.size()};
}
@@ -27,6 +30,10 @@ std::vector<char> as_vector(vespalib::ConstArrayRef<char> value) {
return {value.data(), value.data() + value.size()};
}
+void remove_saved_attr() {
+ std::filesystem::remove(attr_path);
+}
+
class RawAttributeTest : public ::testing::Test
{
protected:
@@ -36,6 +43,7 @@ protected:
RawAttributeTest();
~RawAttributeTest() override;
std::vector<char> get_raw(uint32_t docid);
+ void reset_attr(bool add_reserved);
};
@@ -44,10 +52,7 @@ RawAttributeTest::RawAttributeTest()
_attr(),
_raw(nullptr)
{
- Config cfg(BasicType::RAW, CollectionType::SINGLE);
- _attr = AttributeFactory::createAttribute("raw", cfg);
- _raw = &dynamic_cast<SingleRawAttribute&>(*_attr);
- _attr->addReservedDoc();
+ reset_attr(true);
}
RawAttributeTest::~RawAttributeTest() = default;
@@ -58,6 +63,17 @@ RawAttributeTest::get_raw(uint32_t docid)
return as_vector(_raw->get_raw(docid));
}
+void
+RawAttributeTest::reset_attr(bool add_reserved)
+{
+ Config cfg(BasicType::RAW, CollectionType::SINGLE);
+ _attr = AttributeFactory::createAttribute("raw", cfg);
+ _raw = &dynamic_cast<SingleRawAttribute&>(*_attr);
+ if (add_reserved) {
+ _attr->addReservedDoc();
+ }
+}
+
TEST_F(RawAttributeTest, can_set_and_clear_value)
{
EXPECT_TRUE(_attr->addDocs(10));
@@ -89,4 +105,24 @@ TEST_F(RawAttributeTest, implements_serialize_for_sort) {
EXPECT_EQ(-1, _attr->serializeForDescendingSort(1, buf, sizeof(buf)));
}
+TEST_F(RawAttributeTest, save_and_load)
+{
+ auto mini_test = as_vector("mini test");
+ remove_saved_attr();
+ _attr->addDocs(10);
+ _attr->commit();
+ _raw->set_raw(1, raw_hello);
+ _raw->set_raw(2, mini_test);
+ _attr->setCreateSerialNum(20);
+ _attr->save();
+ reset_attr(false);
+ _attr->load();
+ EXPECT_EQ(11, _attr->getCommittedDocIdLimit());
+ EXPECT_EQ(11, _attr->getStatus().getNumDocs());
+ EXPECT_EQ(20, _attr->getCreateSerialNum());
+ EXPECT_EQ(as_vector("hello"), as_vector(_raw->get_raw(1)));
+ EXPECT_EQ(mini_test, as_vector(_raw->get_raw(2)));
+ remove_saved_attr();
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index ebd4dc6998c..c966c4f81b6 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -106,6 +106,8 @@ vespa_add_library(searchlib_attribute OBJECT
postingstore.cpp
predicate_attribute.cpp
raw_buffer_store.cpp
+ raw_buffer_store_reader.cpp
+ raw_buffer_store_writer.cpp
raw_buffer_type_mapper.cpp
raw_multi_value_read_view.cpp
readerbase.cpp
@@ -128,6 +130,8 @@ vespa_add_library(searchlib_attribute OBJECT
single_numeric_enum_search_context.cpp
single_numeric_search_context.cpp
single_raw_attribute.cpp
+ single_raw_attribute_loader.cpp
+ single_raw_attribute_saver.cpp
single_small_numeric_search_context.cpp
single_string_enum_search_context.cpp
single_string_enum_hint_search_context.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h
index 60132c70852..2731157b13c 100644
--- a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h
@@ -30,6 +30,7 @@ public:
std::unique_ptr<vespalib::datastore::ICompactionContext> start_compact(const vespalib::datastore::CompactionStrategy& compaction_strategy);
void reclaim_memory(generation_t oldest_used_gen) { _array_store.reclaim_memory(oldest_used_gen); }
void assign_generation(generation_t current_gen) { _array_store.assign_generation(current_gen); }
+ void set_initializing(bool initializing) { _array_store.setInitializing(initializing); }
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.cpp b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.cpp
new file mode 100644
index 00000000000..8fbf6fa6ea8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.cpp
@@ -0,0 +1,34 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "raw_buffer_store_reader.h"
+#include "raw_buffer_store.h"
+#include "blob_sequence_reader.h"
+
+using vespalib::datastore::EntryRef;
+
+namespace search::attribute {
+
+RawBufferStoreReader::RawBufferStoreReader(RawBufferStore& store, BlobSequenceReader& reader)
+ : _store(store),
+ _reader(reader),
+ _buffer(1024)
+{
+}
+
+RawBufferStoreReader::~RawBufferStoreReader() = default;
+
+EntryRef
+RawBufferStoreReader::read()
+{
+ uint32_t size = _reader.getNextSize();
+ if (size == 0) {
+ return EntryRef();
+ }
+ if (size > _buffer.size()) {
+ _buffer.resize(size + 1024);
+ }
+ _reader.readBlob(_buffer.data(), size);
+ return _store.set({_buffer.data(), size});
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.h
new file mode 100644
index 00000000000..e58713ed0b2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.h
@@ -0,0 +1,29 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/datastore/entryref.h>
+#include <vespa/vespalib/stllike/allocator.h>
+#include <vector>
+
+namespace search::attribute {
+
+class BlobSequenceReader;
+class RawBufferStore;
+
+/**
+ * Class for reading raw values into a raw buffer store from a
+ * BlobSequenceReader.
+ */
+class RawBufferStoreReader
+{
+ RawBufferStore& _store;
+ BlobSequenceReader& _reader;
+ std::vector<char, vespalib::allocator_large<char>> _buffer;
+public:
+ RawBufferStoreReader(RawBufferStore& store, BlobSequenceReader& reader);
+ ~RawBufferStoreReader();
+ vespalib::datastore::EntryRef read();
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.cpp b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.cpp
new file mode 100644
index 00000000000..78aa3ddd2eb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.cpp
@@ -0,0 +1,33 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "raw_buffer_store_writer.h"
+#include "raw_buffer_store.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+
+using vespalib::datastore::EntryRef;
+
+namespace search::attribute {
+
+RawBufferStoreWriter::RawBufferStoreWriter(const RawBufferStore& store, BufferWriter& writer)
+ : _store(store),
+ _writer(writer)
+{
+}
+
+RawBufferStoreWriter::~RawBufferStoreWriter() = default;
+
+void
+RawBufferStoreWriter::write(EntryRef ref)
+{
+ if (ref.valid()) {
+ auto raw = _store.get(ref);
+ uint32_t size = raw.size();
+ _writer.write(&size, sizeof(size));
+ _writer.write(raw.data(), raw.size());
+ } else {
+ uint32_t size = 0;
+ _writer.write(&size, sizeof(size));
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.h
new file mode 100644
index 00000000000..cfcd6fa9093
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.h
@@ -0,0 +1,26 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/datastore/entryref.h>
+
+namespace search { class BufferWriter; }
+
+namespace search::attribute {
+
+class RawBufferStore;
+
+/**
+ * Class for writing raw values from a raw buffer store to a BufferWriter.
+ */
+class RawBufferStoreWriter
+{
+ const RawBufferStore& _store;
+ BufferWriter& _writer;
+public:
+ RawBufferStoreWriter(const RawBufferStore& store, BufferWriter& writer);
+ ~RawBufferStoreWriter();
+ void write(vespalib::datastore::EntryRef ref);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp
index 9746929c666..5b78304d6c1 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp
@@ -1,6 +1,8 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "single_raw_attribute.h"
+#include "single_raw_attribute_loader.h"
+#include "single_raw_attribute_saver.h"
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/vespalib/datastore/array_store.hpp>
@@ -171,4 +173,22 @@ SingleRawAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long a
return buf.size();
}
+std::unique_ptr<AttributeSaver>
+SingleRawAttribute::onInitSave(vespalib::stringref fileName)
+{
+ vespalib::GenerationHandler::Guard guard(getGenerationHandler().takeGuard());
+ return std::make_unique<SingleRawAttributeSaver>
+ (std::move(guard),
+ this->createAttributeHeader(fileName),
+ make_entry_ref_vector_snapshot(_ref_vector, getCommittedDocIdLimit()),
+ _raw_store);
+}
+
+bool
+SingleRawAttribute::onLoad(vespalib::Executor* executor)
+{
+ SingleRawAttributeLoader loader(*this, _ref_vector, _raw_store);
+ return loader.on_load(executor);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h
index d7ea321a3d4..316f19d19ce 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h
@@ -22,6 +22,8 @@ class SingleRawAttribute : public NotImplementedAttribute
vespalib::MemoryUsage update_stat();
EntryRef acquire_entry_ref(DocId docid) const noexcept { return _ref_vector.acquire_elem_ref(docid).load_acquire(); }
+ bool onLoad(vespalib::Executor *executor) override;
+ std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override;
public:
SingleRawAttribute(const vespalib::string& name, const Config& config);
~SingleRawAttribute() override;
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.cpp
new file mode 100644
index 00000000000..9ccb6c9ef26
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.cpp
@@ -0,0 +1,51 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "single_raw_attribute_loader.h"
+#include "attributevector.h"
+#include "blob_sequence_reader.h"
+#include "raw_buffer_store.h"
+#include "raw_buffer_store_reader.h"
+
+using vespalib::datastore::EntryRef;
+
+namespace search::attribute {
+
+SingleRawAttributeLoader::SingleRawAttributeLoader(AttributeVector& attr, RefVector& ref_vector, RawBufferStore& raw_store)
+ : _attr(attr),
+ _ref_vector(ref_vector),
+ _raw_store(raw_store)
+{
+}
+
+SingleRawAttributeLoader::~SingleRawAttributeLoader() = default;
+
+void
+SingleRawAttributeLoader::load_raw_store(BlobSequenceReader& reader, uint32_t docid_limit)
+{
+ RawBufferStoreReader raw_reader(_raw_store, reader);
+ _raw_store.set_initializing(true);
+ for (uint32_t lid = 0; lid < docid_limit; ++lid) {
+ _ref_vector.push_back(AtomicEntryRef(raw_reader.read()));
+ }
+ _raw_store.set_initializing(false);
+}
+
+bool
+SingleRawAttributeLoader::on_load(vespalib::Executor*)
+{
+ BlobSequenceReader reader(_attr);
+ if (!reader.hasData()) {
+ return false;
+ }
+ _attr.setCreateSerialNum(reader.getCreateSerialNum());
+ uint32_t docid_limit(reader.getDocIdLimit());
+ _ref_vector.reset();
+ _ref_vector.unsafe_reserve(docid_limit);
+ load_raw_store(reader, docid_limit);
+ _attr.commit();
+ _attr.getStatus().setNumDocs(docid_limit);
+ _attr.setCommittedDocIdLimit(docid_limit);
+ return true;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.h
new file mode 100644
index 00000000000..1ed2fd05b2d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.h
@@ -0,0 +1,36 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/datastore/atomic_entry_ref.h>
+#include <vespa/vespalib/util/rcuvector.h>
+
+namespace search { class AttributeVector; }
+
+namespace vespalib { class Executor; }
+
+namespace search::attribute {
+
+class BlobSequenceReader;
+class RawBufferStore;
+
+/**
+ * Class for loading a single raw attribute.
+ */
+class SingleRawAttributeLoader
+{
+ using AtomicEntryRef = vespalib::datastore::AtomicEntryRef;
+ using RefVector = vespalib::RcuVectorBase<AtomicEntryRef>;
+
+ AttributeVector& _attr;
+ RefVector& _ref_vector;
+ RawBufferStore& _raw_store;
+
+ void load_raw_store(BlobSequenceReader& reader, uint32_t docid_limit);
+public:
+ SingleRawAttributeLoader(AttributeVector& attr, RefVector& ref_vector, RawBufferStore& raw_store);
+ ~SingleRawAttributeLoader();
+ bool on_load(vespalib::Executor*);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.cpp
new file mode 100644
index 00000000000..260010a85a0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.cpp
@@ -0,0 +1,42 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "single_raw_attribute_saver.h"
+#include "raw_buffer_store.h"
+#include "raw_buffer_store_writer.h"
+#include <vespa/searchlib/attribute/iattributesavetarget.h>
+#include <vespa/searchlib/util/bufferwriter.h>
+
+namespace search::attribute {
+
+SingleRawAttributeSaver::SingleRawAttributeSaver(vespalib::GenerationHandler::Guard &&guard,
+ const attribute::AttributeHeader &header,
+ EntryRefVector&& ref_vector,
+ const RawBufferStore& raw_store)
+ : AttributeSaver(std::move(guard), header),
+ _ref_vector(std::move(ref_vector)),
+ _raw_store(raw_store)
+{
+}
+
+SingleRawAttributeSaver::~SingleRawAttributeSaver() = default;
+
+void
+SingleRawAttributeSaver::save_raw_store(BufferWriter& writer) const
+{
+ RawBufferStoreWriter raw_writer(_raw_store, writer);
+ for (auto ref : _ref_vector) {
+ raw_writer.write(ref);
+ }
+ writer.flush();
+}
+
+bool
+SingleRawAttributeSaver::onSave(IAttributeSaveTarget &saveTarget)
+{
+ std::unique_ptr<search::BufferWriter> writer(saveTarget.datWriter().allocBufferWriter());
+ assert(!saveTarget.getEnumerated());
+ save_raw_store(*writer);
+ return true;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.h
new file mode 100644
index 00000000000..ebcdc504231
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.h
@@ -0,0 +1,32 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributesaver.h"
+#include "save_utils.h"
+
+namespace search { class BufferWriter; }
+
+namespace search::attribute {
+
+class RawBufferStore;
+
+/**
+ * Class for saving a single raw attribute.
+ */
+class SingleRawAttributeSaver : public AttributeSaver
+{
+ EntryRefVector _ref_vector;
+ const RawBufferStore& _raw_store;
+
+ void save_raw_store(BufferWriter& writer) const;
+ bool onSave(IAttributeSaveTarget &saveTarget) override;
+public:
+ SingleRawAttributeSaver(vespalib::GenerationHandler::Guard &&guard,
+ const attribute::AttributeHeader &header,
+ EntryRefVector&& ref_vector,
+ const RawBufferStore& raw_store);
+ ~SingleRawAttributeSaver();
+};
+
+}