From 1da71b42663246f87702f63d6c78b152cc294b86 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 6 Mar 2023 22:03:29 +0100 Subject: Add search::attribute::SingleRawAttribute. --- searchlib/CMakeLists.txt | 1 + .../tests/attribute/raw_attribute/CMakeLists.txt | 9 ++ .../attribute/raw_attribute/raw_attribute_test.cpp | 72 +++++++++ .../src/vespa/searchlib/attribute/CMakeLists.txt | 1 + .../vespa/searchlib/attribute/createsinglestd.cpp | 3 + .../searchlib/attribute/single_raw_attribute.cpp | 170 +++++++++++++++++++++ .../searchlib/attribute/single_raw_attribute.h | 40 +++++ 7 files changed, 296 insertions(+) create mode 100644 searchlib/src/tests/attribute/raw_attribute/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 44051a96578..07045684d6e 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -91,6 +91,7 @@ vespa_define_module( src/tests/attribute/posting_store src/tests/attribute/postinglist src/tests/attribute/postinglistattribute + src/tests/attribute/raw_attribute src/tests/attribute/reference_attribute src/tests/attribute/save_target src/tests/attribute/searchable diff --git a/searchlib/src/tests/attribute/raw_attribute/CMakeLists.txt b/searchlib/src/tests/attribute/raw_attribute/CMakeLists.txt new file mode 100644 index 00000000000..21e34f42193 --- /dev/null +++ b/searchlib/src/tests/attribute/raw_attribute/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_raw_attribute_test_app TEST + SOURCES + raw_attribute_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_raw_attribute_test_app COMMAND searchlib_raw_attribute_test_app) diff --git a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp new file mode 100644 index 00000000000..bc9d361e29a --- /dev/null +++ b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include + +using search::AttributeFactory; +using search::AttributeVector; +using search::attribute::BasicType; +using search::attribute::CollectionType; +using search::attribute::Config; +using search::attribute::SingleRawAttribute; +using vespalib::ConstArrayRef; + + +std::vector empty; +vespalib::string hello("hello"); +vespalib::ConstArrayRef raw_hello(hello.c_str(), hello.size()); + +std::vector as_vector(vespalib::stringref value) { + return {value.data(), value.data() + value.size()}; +} + +std::vector as_vector(vespalib::ConstArrayRef value) { + return {value.data(), value.data() + value.size()}; +} + +class RawAttributeTest : public ::testing::Test +{ +protected: + std::shared_ptr _attr; + SingleRawAttribute* _raw; + + RawAttributeTest(); + ~RawAttributeTest() override; + std::vector get_raw(uint32_t docid); +}; + + +RawAttributeTest::RawAttributeTest() + : ::testing::Test(), + _attr(), + _raw(nullptr) +{ + Config cfg(BasicType::RAW, CollectionType::SINGLE); + _attr = AttributeFactory::createAttribute("raw", cfg); + _raw = &dynamic_cast(*_attr); + _attr->addReservedDoc(); +} + +RawAttributeTest::~RawAttributeTest() = default; + +std::vector +RawAttributeTest::get_raw(uint32_t docid) +{ + return as_vector(_raw->get_raw(docid)); +} + +TEST_F(RawAttributeTest, can_set_and_clear_value) +{ + EXPECT_TRUE(_attr->addDocs(10)); + _attr->commit(); + EXPECT_EQ(empty, get_raw(1)); + _raw->set_raw(1, raw_hello); + EXPECT_EQ(as_vector(hello), get_raw(1)); + _attr->clearDoc(1); + EXPECT_EQ(empty, get_raw(1)); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 72c7efe3094..594afc31795 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -123,6 +123,7 @@ vespa_add_library(searchlib_attribute OBJECT single_enum_search_context.cpp single_numeric_enum_search_context.cpp single_numeric_search_context.cpp + single_raw_attribute.cpp single_small_numeric_search_context.cpp single_string_enum_search_context.cpp single_string_enum_hint_search_context.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp index fe2b0c9f989..1a0d24b0595 100644 --- a/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp +++ b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp @@ -7,6 +7,7 @@ #include "singlestringattribute.h" #include "singleboolattribute.h" #include "singlenumericattribute.hpp" +#include "single_raw_attribute.h" #include #include #include @@ -51,6 +52,8 @@ AttributeFactory::createSingleStd(stringref name, const Config & info) } case BasicType::REFERENCE: return std::make_shared(name, info); + case BasicType::RAW: + return std::make_shared(name, info); default: break; } diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp new file mode 100644 index 00000000000..9bd3a81482a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp @@ -0,0 +1,170 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "single_raw_attribute.h" +#include +#include + +using vespalib::alloc::MemoryAllocator; +using vespalib::datastore::EntryRef; + +namespace { + +constexpr float ALLOC_GROW_FACTOR = 0.2; + +} + +namespace search::attribute { + +SingleRawAttribute::SingleRawAttribute(const vespalib::string& name, const Config& config) + : NotImplementedAttribute(name, config), + _ref_vector(config.getGrowStrategy(), getGenerationHolder()), + _array_store(ArrayStoreType::optimizedConfigForHugePage(1000u, + MemoryAllocator::HUGEPAGE_SIZE, + MemoryAllocator::PAGE_SIZE, + 8_Ki, ALLOC_GROW_FACTOR), + get_memory_allocator()) +{ +} + +SingleRawAttribute::~SingleRawAttribute() +{ + getGenerationHolder().reclaim_all(); +} + +void +SingleRawAttribute::reclaim_memory(generation_t oldest_used_gen) +{ + _array_store.reclaim_memory(oldest_used_gen); + getGenerationHolder().reclaim(oldest_used_gen); +} + +void +SingleRawAttribute::before_inc_generation(generation_t current_gen) +{ + getGenerationHolder().assign_generation(current_gen); + _array_store.assign_generation(current_gen); +} + +bool +SingleRawAttribute::addDoc(DocId &docId) +{ + bool incGen = _ref_vector.isFull(); + _ref_vector.push_back(AtomicEntryRef()); + AttributeVector::incNumDocs(); + docId = AttributeVector::getNumDocs() - 1; + updateUncommittedDocIdLimit(docId); + if (incGen) { + incGeneration(); + } else { + reclaim_unused_memory(); + } + return true; +} + +void +SingleRawAttribute::onCommit() +{ + incGeneration(); + if (_array_store.consider_compact()) { + auto context = _array_store.compact_worst(getConfig().getCompactionStrategy()); + if (context) { + context->compact(vespalib::ArrayRef(&_ref_vector[0], _ref_vector.size())); + } + incGeneration(); + updateStat(true); + } +} + +void +SingleRawAttribute::onUpdateStat() +{ + vespalib::MemoryUsage total = update_stat(); + this->updateStatistics(_ref_vector.size(), + _ref_vector.size(), + total.allocatedBytes(), + total.usedBytes(), + total.deadBytes(), + total.allocatedBytesOnHold()); +} + +vespalib::MemoryUsage +SingleRawAttribute::update_stat() +{ + vespalib::MemoryUsage result = _ref_vector.getMemoryUsage(); + result.merge(_array_store.update_stat(getConfig().getCompactionStrategy())); + result.mergeGenerationHeldBytes(getGenerationHolder().get_held_bytes()); + return result; +} + +vespalib::ConstArrayRef +SingleRawAttribute::get_raw(EntryRef ref) const +{ + auto array = _array_store.get(ref); + uint32_t size = 0; + assert(array.size() >= sizeof(size)); + memcpy(&size, array.data(), sizeof(size)); + assert(array.size() >= sizeof(size) + size); + return {array.data() + sizeof(size), size}; +} + +vespalib::ConstArrayRef +SingleRawAttribute::get_raw(DocId docid) const +{ + EntryRef ref; + if (docid < getCommittedDocIdLimit()) { + ref = acquire_entry_ref(docid); + } + if (!ref.valid()) { + return {}; + } + return get_raw(ref); +} + +EntryRef +SingleRawAttribute::set_raw(vespalib::ConstArrayRef raw) +{ + uint32_t size = raw.size(); + size_t buffer_size = raw.size() + sizeof(size); + auto& mapper = _array_store.get_mapper(); + auto type_id = mapper.get_type_id(buffer_size); + auto array_size = (type_id != 0) ? mapper.get_array_size(type_id) : buffer_size; + assert(array_size >= buffer_size); + auto ref = _array_store.allocate(array_size); + auto buf = _array_store.get_writable(ref); + memcpy(buf.data(), &size, sizeof(size)); + memcpy(buf.data() + sizeof(size), raw.data(), size); + if (array_size > buffer_size) { + memset(buf.data() + buffer_size, 0, array_size - buffer_size); + } + return ref; +} + +void +SingleRawAttribute::set_raw(DocId docid, vespalib::ConstArrayRef raw) +{ + auto ref = set_raw(raw); + assert(docid < _ref_vector.size()); + updateUncommittedDocIdLimit(docid); + auto& elem_ref = _ref_vector[docid]; + EntryRef old_ref(elem_ref.load_relaxed()); + elem_ref.store_release(ref); + if (old_ref.valid()) { + _array_store.remove(old_ref); + } +} + +uint32_t +SingleRawAttribute::clearDoc(DocId docId) +{ + updateUncommittedDocIdLimit(docId); + auto& elem_ref = _ref_vector[docId]; + EntryRef old_ref(elem_ref.load_relaxed()); + elem_ref.store_relaxed(EntryRef()); + if (old_ref.valid()) { + _array_store.remove(old_ref); + return 1u; + } + return 0u; +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h new file mode 100644 index 00000000000..429cd5bef31 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "not_implemented_attribute.h" +#include +#include + +namespace search::attribute { + +class SingleRawAttribute: public NotImplementedAttribute +{ + using AtomicEntryRef = vespalib::datastore::AtomicEntryRef; + using EntryRef = vespalib::datastore::EntryRef; + using RefVector = vespalib::RcuVectorBase; + using RefType = vespalib::datastore::EntryRefT<19>; + using ArrayStoreType = vespalib::datastore::ArrayStore; + + RefVector _ref_vector; + ArrayStoreType _array_store; + vespalib::datastore::CompactionSpec _compaction_spec; + + vespalib::MemoryUsage update_stat(); + EntryRef acquire_entry_ref(DocId docid) const noexcept { return _ref_vector.acquire_elem_ref(docid).load_acquire(); } + EntryRef set_raw(vespalib::ConstArrayRef raw); + vespalib::ConstArrayRef get_raw(EntryRef ref) const; +public: + SingleRawAttribute(const vespalib::string& name, const Config& config); + ~SingleRawAttribute() override; + void onCommit() override; + void onUpdateStat() override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; + bool addDoc(DocId &docId) override; + vespalib::ConstArrayRef get_raw(DocId docid) const override; + void set_raw(DocId docid, vespalib::ConstArrayRef raw); + uint32_t clearDoc(DocId docId) override; +}; + +} -- cgit v1.2.3 From c4f6f9dc8ae146e90f96b28882090f975e44b55a Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 7 Mar 2023 11:09:19 +0100 Subject: Add class comment. --- searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h index 429cd5bef31..7477b13bc5a 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h @@ -8,7 +8,10 @@ namespace search::attribute { -class SingleRawAttribute: public NotImplementedAttribute +/** + * Attribute vector storing a single raw value per document. + */ +class SingleRawAttribute : public NotImplementedAttribute { using AtomicEntryRef = vespalib::datastore::AtomicEntryRef; using EntryRef = vespalib::datastore::EntryRef; -- cgit v1.2.3