diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-03-09 12:54:02 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-03-09 12:54:02 +0100 |
commit | fc689e6c8ca15644d04197a908a2fb0d2f9eb74b (patch) | |
tree | 57bf720a5b6d5853df1d176c9ec14a3222fb21c3 | |
parent | 5f4bf637a8ede90fa66ecc18ed2864c0c826f80b (diff) |
Add saver and loader for SingleRawAttribute.
13 files changed, 350 insertions, 4 deletions
diff --git a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp index 82e4fd065cf..9f728cc0482 100644 --- a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp +++ b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp @@ -4,6 +4,7 @@ #include <vespa/searchlib/attribute/attributefactory.h> #include <vespa/searchcommon/attribute/config.h> #include <vespa/vespalib/gtest/gtest.h> +#include <filesystem> #include <memory> using search::AttributeFactory; @@ -19,6 +20,8 @@ std::vector<char> empty; vespalib::string hello("hello"); vespalib::ConstArrayRef<char> raw_hello(hello.c_str(), hello.size()); +std::filesystem::path attr_path("raw.dat"); + std::vector<char> as_vector(vespalib::stringref value) { return {value.data(), value.data() + value.size()}; } @@ -27,6 +30,10 @@ std::vector<char> as_vector(vespalib::ConstArrayRef<char> value) { return {value.data(), value.data() + value.size()}; } +void remove_saved_attr() { + std::filesystem::remove(attr_path); +} + class RawAttributeTest : public ::testing::Test { protected: @@ -36,6 +43,7 @@ protected: RawAttributeTest(); ~RawAttributeTest() override; std::vector<char> get_raw(uint32_t docid); + void reset_attr(bool add_reserved); }; @@ -44,10 +52,7 @@ RawAttributeTest::RawAttributeTest() _attr(), _raw(nullptr) { - Config cfg(BasicType::RAW, CollectionType::SINGLE); - _attr = AttributeFactory::createAttribute("raw", cfg); - _raw = &dynamic_cast<SingleRawAttribute&>(*_attr); - _attr->addReservedDoc(); + reset_attr(true); } RawAttributeTest::~RawAttributeTest() = default; @@ -58,6 +63,17 @@ RawAttributeTest::get_raw(uint32_t docid) return as_vector(_raw->get_raw(docid)); } +void +RawAttributeTest::reset_attr(bool add_reserved) +{ + Config cfg(BasicType::RAW, CollectionType::SINGLE); + _attr = AttributeFactory::createAttribute("raw", cfg); + _raw = &dynamic_cast<SingleRawAttribute&>(*_attr); + if (add_reserved) { + _attr->addReservedDoc(); + } +} + TEST_F(RawAttributeTest, can_set_and_clear_value) { EXPECT_TRUE(_attr->addDocs(10)); @@ -89,4 +105,24 @@ TEST_F(RawAttributeTest, implements_serialize_for_sort) { EXPECT_EQ(-1, _attr->serializeForDescendingSort(1, buf, sizeof(buf))); } +TEST_F(RawAttributeTest, save_and_load) +{ + auto mini_test = as_vector("mini test"); + remove_saved_attr(); + _attr->addDocs(10); + _attr->commit(); + _raw->set_raw(1, raw_hello); + _raw->set_raw(2, mini_test); + _attr->setCreateSerialNum(20); + _attr->save(); + reset_attr(false); + _attr->load(); + EXPECT_EQ(11, _attr->getCommittedDocIdLimit()); + EXPECT_EQ(11, _attr->getStatus().getNumDocs()); + EXPECT_EQ(20, _attr->getCreateSerialNum()); + EXPECT_EQ(as_vector("hello"), as_vector(_raw->get_raw(1))); + EXPECT_EQ(mini_test, as_vector(_raw->get_raw(2))); + remove_saved_attr(); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index ebd4dc6998c..c966c4f81b6 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -106,6 +106,8 @@ vespa_add_library(searchlib_attribute OBJECT postingstore.cpp predicate_attribute.cpp raw_buffer_store.cpp + raw_buffer_store_reader.cpp + raw_buffer_store_writer.cpp raw_buffer_type_mapper.cpp raw_multi_value_read_view.cpp readerbase.cpp @@ -128,6 +130,8 @@ vespa_add_library(searchlib_attribute OBJECT single_numeric_enum_search_context.cpp single_numeric_search_context.cpp single_raw_attribute.cpp + single_raw_attribute_loader.cpp + single_raw_attribute_saver.cpp single_small_numeric_search_context.cpp single_string_enum_search_context.cpp single_string_enum_hint_search_context.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h index 60132c70852..2731157b13c 100644 --- a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h @@ -30,6 +30,7 @@ public: std::unique_ptr<vespalib::datastore::ICompactionContext> start_compact(const vespalib::datastore::CompactionStrategy& compaction_strategy); void reclaim_memory(generation_t oldest_used_gen) { _array_store.reclaim_memory(oldest_used_gen); } void assign_generation(generation_t current_gen) { _array_store.assign_generation(current_gen); } + void set_initializing(bool initializing) { _array_store.setInitializing(initializing); } }; } diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.cpp b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.cpp new file mode 100644 index 00000000000..8fbf6fa6ea8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.cpp @@ -0,0 +1,34 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "raw_buffer_store_reader.h" +#include "raw_buffer_store.h" +#include "blob_sequence_reader.h" + +using vespalib::datastore::EntryRef; + +namespace search::attribute { + +RawBufferStoreReader::RawBufferStoreReader(RawBufferStore& store, BlobSequenceReader& reader) + : _store(store), + _reader(reader), + _buffer(1024) +{ +} + +RawBufferStoreReader::~RawBufferStoreReader() = default; + +EntryRef +RawBufferStoreReader::read() +{ + uint32_t size = _reader.getNextSize(); + if (size == 0) { + return EntryRef(); + } + if (size > _buffer.size()) { + _buffer.resize(size + 1024); + } + _reader.readBlob(_buffer.data(), size); + return _store.set({_buffer.data(), size}); +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.h new file mode 100644 index 00000000000..e58713ed0b2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_reader.h @@ -0,0 +1,29 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/datastore/entryref.h> +#include <vespa/vespalib/stllike/allocator.h> +#include <vector> + +namespace search::attribute { + +class BlobSequenceReader; +class RawBufferStore; + +/** + * Class for reading raw values into a raw buffer store from a + * BlobSequenceReader. + */ +class RawBufferStoreReader +{ + RawBufferStore& _store; + BlobSequenceReader& _reader; + std::vector<char, vespalib::allocator_large<char>> _buffer; +public: + RawBufferStoreReader(RawBufferStore& store, BlobSequenceReader& reader); + ~RawBufferStoreReader(); + vespalib::datastore::EntryRef read(); +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.cpp b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.cpp new file mode 100644 index 00000000000..78aa3ddd2eb --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.cpp @@ -0,0 +1,33 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "raw_buffer_store_writer.h" +#include "raw_buffer_store.h" +#include <vespa/searchlib/util/bufferwriter.h> + +using vespalib::datastore::EntryRef; + +namespace search::attribute { + +RawBufferStoreWriter::RawBufferStoreWriter(const RawBufferStore& store, BufferWriter& writer) + : _store(store), + _writer(writer) +{ +} + +RawBufferStoreWriter::~RawBufferStoreWriter() = default; + +void +RawBufferStoreWriter::write(EntryRef ref) +{ + if (ref.valid()) { + auto raw = _store.get(ref); + uint32_t size = raw.size(); + _writer.write(&size, sizeof(size)); + _writer.write(raw.data(), raw.size()); + } else { + uint32_t size = 0; + _writer.write(&size, sizeof(size)); + } +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.h new file mode 100644 index 00000000000..cfcd6fa9093 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store_writer.h @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/datastore/entryref.h> + +namespace search { class BufferWriter; } + +namespace search::attribute { + +class RawBufferStore; + +/** + * Class for writing raw values from a raw buffer store to a BufferWriter. + */ +class RawBufferStoreWriter +{ + const RawBufferStore& _store; + BufferWriter& _writer; +public: + RawBufferStoreWriter(const RawBufferStore& store, BufferWriter& writer); + ~RawBufferStoreWriter(); + void write(vespalib::datastore::EntryRef ref); +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp index 9746929c666..5b78304d6c1 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp @@ -1,6 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "single_raw_attribute.h" +#include "single_raw_attribute_loader.h" +#include "single_raw_attribute_saver.h" #include <vespa/searchcommon/attribute/config.h> #include <vespa/vespalib/datastore/array_store.hpp> @@ -171,4 +173,22 @@ SingleRawAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long a return buf.size(); } +std::unique_ptr<AttributeSaver> +SingleRawAttribute::onInitSave(vespalib::stringref fileName) +{ + vespalib::GenerationHandler::Guard guard(getGenerationHandler().takeGuard()); + return std::make_unique<SingleRawAttributeSaver> + (std::move(guard), + this->createAttributeHeader(fileName), + make_entry_ref_vector_snapshot(_ref_vector, getCommittedDocIdLimit()), + _raw_store); +} + +bool +SingleRawAttribute::onLoad(vespalib::Executor* executor) +{ + SingleRawAttributeLoader loader(*this, _ref_vector, _raw_store); + return loader.on_load(executor); +} + } diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h index d7ea321a3d4..316f19d19ce 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h @@ -22,6 +22,8 @@ class SingleRawAttribute : public NotImplementedAttribute vespalib::MemoryUsage update_stat(); EntryRef acquire_entry_ref(DocId docid) const noexcept { return _ref_vector.acquire_elem_ref(docid).load_acquire(); } + bool onLoad(vespalib::Executor *executor) override; + std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override; public: SingleRawAttribute(const vespalib::string& name, const Config& config); ~SingleRawAttribute() override; diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.cpp new file mode 100644 index 00000000000..9ccb6c9ef26 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.cpp @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "single_raw_attribute_loader.h" +#include "attributevector.h" +#include "blob_sequence_reader.h" +#include "raw_buffer_store.h" +#include "raw_buffer_store_reader.h" + +using vespalib::datastore::EntryRef; + +namespace search::attribute { + +SingleRawAttributeLoader::SingleRawAttributeLoader(AttributeVector& attr, RefVector& ref_vector, RawBufferStore& raw_store) + : _attr(attr), + _ref_vector(ref_vector), + _raw_store(raw_store) +{ +} + +SingleRawAttributeLoader::~SingleRawAttributeLoader() = default; + +void +SingleRawAttributeLoader::load_raw_store(BlobSequenceReader& reader, uint32_t docid_limit) +{ + RawBufferStoreReader raw_reader(_raw_store, reader); + _raw_store.set_initializing(true); + for (uint32_t lid = 0; lid < docid_limit; ++lid) { + _ref_vector.push_back(AtomicEntryRef(raw_reader.read())); + } + _raw_store.set_initializing(false); +} + +bool +SingleRawAttributeLoader::on_load(vespalib::Executor*) +{ + BlobSequenceReader reader(_attr); + if (!reader.hasData()) { + return false; + } + _attr.setCreateSerialNum(reader.getCreateSerialNum()); + uint32_t docid_limit(reader.getDocIdLimit()); + _ref_vector.reset(); + _ref_vector.unsafe_reserve(docid_limit); + load_raw_store(reader, docid_limit); + _attr.commit(); + _attr.getStatus().setNumDocs(docid_limit); + _attr.setCommittedDocIdLimit(docid_limit); + return true; +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.h new file mode 100644 index 00000000000..1ed2fd05b2d --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_loader.h @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/datastore/atomic_entry_ref.h> +#include <vespa/vespalib/util/rcuvector.h> + +namespace search { class AttributeVector; } + +namespace vespalib { class Executor; } + +namespace search::attribute { + +class BlobSequenceReader; +class RawBufferStore; + +/** + * Class for loading a single raw attribute. + */ +class SingleRawAttributeLoader +{ + using AtomicEntryRef = vespalib::datastore::AtomicEntryRef; + using RefVector = vespalib::RcuVectorBase<AtomicEntryRef>; + + AttributeVector& _attr; + RefVector& _ref_vector; + RawBufferStore& _raw_store; + + void load_raw_store(BlobSequenceReader& reader, uint32_t docid_limit); +public: + SingleRawAttributeLoader(AttributeVector& attr, RefVector& ref_vector, RawBufferStore& raw_store); + ~SingleRawAttributeLoader(); + bool on_load(vespalib::Executor*); +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.cpp new file mode 100644 index 00000000000..260010a85a0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.cpp @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "single_raw_attribute_saver.h" +#include "raw_buffer_store.h" +#include "raw_buffer_store_writer.h" +#include <vespa/searchlib/attribute/iattributesavetarget.h> +#include <vespa/searchlib/util/bufferwriter.h> + +namespace search::attribute { + +SingleRawAttributeSaver::SingleRawAttributeSaver(vespalib::GenerationHandler::Guard &&guard, + const attribute::AttributeHeader &header, + EntryRefVector&& ref_vector, + const RawBufferStore& raw_store) + : AttributeSaver(std::move(guard), header), + _ref_vector(std::move(ref_vector)), + _raw_store(raw_store) +{ +} + +SingleRawAttributeSaver::~SingleRawAttributeSaver() = default; + +void +SingleRawAttributeSaver::save_raw_store(BufferWriter& writer) const +{ + RawBufferStoreWriter raw_writer(_raw_store, writer); + for (auto ref : _ref_vector) { + raw_writer.write(ref); + } + writer.flush(); +} + +bool +SingleRawAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) +{ + std::unique_ptr<search::BufferWriter> writer(saveTarget.datWriter().allocBufferWriter()); + assert(!saveTarget.getEnumerated()); + save_raw_store(*writer); + return true; +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.h new file mode 100644 index 00000000000..ebcdc504231 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute_saver.h @@ -0,0 +1,32 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributesaver.h" +#include "save_utils.h" + +namespace search { class BufferWriter; } + +namespace search::attribute { + +class RawBufferStore; + +/** + * Class for saving a single raw attribute. + */ +class SingleRawAttributeSaver : public AttributeSaver +{ + EntryRefVector _ref_vector; + const RawBufferStore& _raw_store; + + void save_raw_store(BufferWriter& writer) const; + bool onSave(IAttributeSaveTarget &saveTarget) override; +public: + SingleRawAttributeSaver(vespalib::GenerationHandler::Guard &&guard, + const attribute::AttributeHeader &header, + EntryRefVector&& ref_vector, + const RawBufferStore& raw_store); + ~SingleRawAttributeSaver(); +}; + +} |