summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-03-07 11:23:36 +0100
committerGitHub <noreply@github.com>2023-03-07 11:23:36 +0100
commitf1474b050a571d97cd67bbb9240813e2d8b35bbd (patch)
tree112f12a14af19b95b18c8932bd71dc1bc889fe07 /searchlib
parent028316d562f645540fb27c9be5b8c8b9249d9d11 (diff)
parentc4f6f9dc8ae146e90f96b28882090f975e44b55a (diff)
Merge pull request #26323 from vespa-engine/toregge/add-single-raw-attribute
Add search::attribute::SingleRawAttribute.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/raw_attribute/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp72
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp170
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h43
7 files changed, 299 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 44051a96578..07045684d6e 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -91,6 +91,7 @@ vespa_define_module(
src/tests/attribute/posting_store
src/tests/attribute/postinglist
src/tests/attribute/postinglistattribute
+ src/tests/attribute/raw_attribute
src/tests/attribute/reference_attribute
src/tests/attribute/save_target
src/tests/attribute/searchable
diff --git a/searchlib/src/tests/attribute/raw_attribute/CMakeLists.txt b/searchlib/src/tests/attribute/raw_attribute/CMakeLists.txt
new file mode 100644
index 00000000000..21e34f42193
--- /dev/null
+++ b/searchlib/src/tests/attribute/raw_attribute/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_raw_attribute_test_app TEST
+ SOURCES
+ raw_attribute_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_raw_attribute_test_app COMMAND searchlib_raw_attribute_test_app)
diff --git a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp
new file mode 100644
index 00000000000..bc9d361e29a
--- /dev/null
+++ b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp
@@ -0,0 +1,72 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/attribute/single_raw_attribute.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <memory>
+
+using search::AttributeFactory;
+using search::AttributeVector;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using search::attribute::Config;
+using search::attribute::SingleRawAttribute;
+using vespalib::ConstArrayRef;
+
+
+std::vector<char> empty;
+vespalib::string hello("hello");
+vespalib::ConstArrayRef<char> raw_hello(hello.c_str(), hello.size());
+
+std::vector<char> as_vector(vespalib::stringref value) {
+ return {value.data(), value.data() + value.size()};
+}
+
+std::vector<char> as_vector(vespalib::ConstArrayRef<char> value) {
+ return {value.data(), value.data() + value.size()};
+}
+
+class RawAttributeTest : public ::testing::Test
+{
+protected:
+ std::shared_ptr<AttributeVector> _attr;
+ SingleRawAttribute* _raw;
+
+ RawAttributeTest();
+ ~RawAttributeTest() override;
+ std::vector<char> get_raw(uint32_t docid);
+};
+
+
+RawAttributeTest::RawAttributeTest()
+ : ::testing::Test(),
+ _attr(),
+ _raw(nullptr)
+{
+ Config cfg(BasicType::RAW, CollectionType::SINGLE);
+ _attr = AttributeFactory::createAttribute("raw", cfg);
+ _raw = &dynamic_cast<SingleRawAttribute&>(*_attr);
+ _attr->addReservedDoc();
+}
+
+RawAttributeTest::~RawAttributeTest() = default;
+
+std::vector<char>
+RawAttributeTest::get_raw(uint32_t docid)
+{
+ return as_vector(_raw->get_raw(docid));
+}
+
+TEST_F(RawAttributeTest, can_set_and_clear_value)
+{
+ EXPECT_TRUE(_attr->addDocs(10));
+ _attr->commit();
+ EXPECT_EQ(empty, get_raw(1));
+ _raw->set_raw(1, raw_hello);
+ EXPECT_EQ(as_vector(hello), get_raw(1));
+ _attr->clearDoc(1);
+ EXPECT_EQ(empty, get_raw(1));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index 72c7efe3094..594afc31795 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -123,6 +123,7 @@ vespa_add_library(searchlib_attribute OBJECT
single_enum_search_context.cpp
single_numeric_enum_search_context.cpp
single_numeric_search_context.cpp
+ single_raw_attribute.cpp
single_small_numeric_search_context.cpp
single_string_enum_search_context.cpp
single_string_enum_hint_search_context.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp
index fe2b0c9f989..1a0d24b0595 100644
--- a/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp
@@ -7,6 +7,7 @@
#include "singlestringattribute.h"
#include "singleboolattribute.h"
#include "singlenumericattribute.hpp"
+#include "single_raw_attribute.h"
#include <vespa/eval/eval/fast_value.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
#include <vespa/searchlib/tensor/serialized_fast_value_attribute.h>
@@ -51,6 +52,8 @@ AttributeFactory::createSingleStd(stringref name, const Config & info)
}
case BasicType::REFERENCE:
return std::make_shared<attribute::ReferenceAttribute>(name, info);
+ case BasicType::RAW:
+ return std::make_shared<attribute::SingleRawAttribute>(name, info);
default:
break;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp
new file mode 100644
index 00000000000..9bd3a81482a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp
@@ -0,0 +1,170 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "single_raw_attribute.h"
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/vespalib/datastore/array_store.hpp>
+
+using vespalib::alloc::MemoryAllocator;
+using vespalib::datastore::EntryRef;
+
+namespace {
+
+constexpr float ALLOC_GROW_FACTOR = 0.2;
+
+}
+
+namespace search::attribute {
+
+SingleRawAttribute::SingleRawAttribute(const vespalib::string& name, const Config& config)
+ : NotImplementedAttribute(name, config),
+ _ref_vector(config.getGrowStrategy(), getGenerationHolder()),
+ _array_store(ArrayStoreType::optimizedConfigForHugePage(1000u,
+ MemoryAllocator::HUGEPAGE_SIZE,
+ MemoryAllocator::PAGE_SIZE,
+ 8_Ki, ALLOC_GROW_FACTOR),
+ get_memory_allocator())
+{
+}
+
+SingleRawAttribute::~SingleRawAttribute()
+{
+ getGenerationHolder().reclaim_all();
+}
+
+void
+SingleRawAttribute::reclaim_memory(generation_t oldest_used_gen)
+{
+ _array_store.reclaim_memory(oldest_used_gen);
+ getGenerationHolder().reclaim(oldest_used_gen);
+}
+
+void
+SingleRawAttribute::before_inc_generation(generation_t current_gen)
+{
+ getGenerationHolder().assign_generation(current_gen);
+ _array_store.assign_generation(current_gen);
+}
+
+bool
+SingleRawAttribute::addDoc(DocId &docId)
+{
+ bool incGen = _ref_vector.isFull();
+ _ref_vector.push_back(AtomicEntryRef());
+ AttributeVector::incNumDocs();
+ docId = AttributeVector::getNumDocs() - 1;
+ updateUncommittedDocIdLimit(docId);
+ if (incGen) {
+ incGeneration();
+ } else {
+ reclaim_unused_memory();
+ }
+ return true;
+}
+
+void
+SingleRawAttribute::onCommit()
+{
+ incGeneration();
+ if (_array_store.consider_compact()) {
+ auto context = _array_store.compact_worst(getConfig().getCompactionStrategy());
+ if (context) {
+ context->compact(vespalib::ArrayRef<AtomicEntryRef>(&_ref_vector[0], _ref_vector.size()));
+ }
+ incGeneration();
+ updateStat(true);
+ }
+}
+
+void
+SingleRawAttribute::onUpdateStat()
+{
+ vespalib::MemoryUsage total = update_stat();
+ this->updateStatistics(_ref_vector.size(),
+ _ref_vector.size(),
+ total.allocatedBytes(),
+ total.usedBytes(),
+ total.deadBytes(),
+ total.allocatedBytesOnHold());
+}
+
+vespalib::MemoryUsage
+SingleRawAttribute::update_stat()
+{
+ vespalib::MemoryUsage result = _ref_vector.getMemoryUsage();
+ result.merge(_array_store.update_stat(getConfig().getCompactionStrategy()));
+ result.mergeGenerationHeldBytes(getGenerationHolder().get_held_bytes());
+ return result;
+}
+
+vespalib::ConstArrayRef<char>
+SingleRawAttribute::get_raw(EntryRef ref) const
+{
+ auto array = _array_store.get(ref);
+ uint32_t size = 0;
+ assert(array.size() >= sizeof(size));
+ memcpy(&size, array.data(), sizeof(size));
+ assert(array.size() >= sizeof(size) + size);
+ return {array.data() + sizeof(size), size};
+}
+
+vespalib::ConstArrayRef<char>
+SingleRawAttribute::get_raw(DocId docid) const
+{
+ EntryRef ref;
+ if (docid < getCommittedDocIdLimit()) {
+ ref = acquire_entry_ref(docid);
+ }
+ if (!ref.valid()) {
+ return {};
+ }
+ return get_raw(ref);
+}
+
+EntryRef
+SingleRawAttribute::set_raw(vespalib::ConstArrayRef<char> raw)
+{
+ uint32_t size = raw.size();
+ size_t buffer_size = raw.size() + sizeof(size);
+ auto& mapper = _array_store.get_mapper();
+ auto type_id = mapper.get_type_id(buffer_size);
+ auto array_size = (type_id != 0) ? mapper.get_array_size(type_id) : buffer_size;
+ assert(array_size >= buffer_size);
+ auto ref = _array_store.allocate(array_size);
+ auto buf = _array_store.get_writable(ref);
+ memcpy(buf.data(), &size, sizeof(size));
+ memcpy(buf.data() + sizeof(size), raw.data(), size);
+ if (array_size > buffer_size) {
+ memset(buf.data() + buffer_size, 0, array_size - buffer_size);
+ }
+ return ref;
+}
+
+void
+SingleRawAttribute::set_raw(DocId docid, vespalib::ConstArrayRef<char> raw)
+{
+ auto ref = set_raw(raw);
+ assert(docid < _ref_vector.size());
+ updateUncommittedDocIdLimit(docid);
+ auto& elem_ref = _ref_vector[docid];
+ EntryRef old_ref(elem_ref.load_relaxed());
+ elem_ref.store_release(ref);
+ if (old_ref.valid()) {
+ _array_store.remove(old_ref);
+ }
+}
+
+uint32_t
+SingleRawAttribute::clearDoc(DocId docId)
+{
+ updateUncommittedDocIdLimit(docId);
+ auto& elem_ref = _ref_vector[docId];
+ EntryRef old_ref(elem_ref.load_relaxed());
+ elem_ref.store_relaxed(EntryRef());
+ if (old_ref.valid()) {
+ _array_store.remove(old_ref);
+ return 1u;
+ }
+ return 0u;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h
new file mode 100644
index 00000000000..7477b13bc5a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h
@@ -0,0 +1,43 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "not_implemented_attribute.h"
+#include <vespa/vespalib/datastore/array_store.h>
+#include <vespa/vespalib/util/rcuvector.h>
+
+namespace search::attribute {
+
+/**
+ * Attribute vector storing a single raw value per document.
+ */
+class SingleRawAttribute : public NotImplementedAttribute
+{
+ using AtomicEntryRef = vespalib::datastore::AtomicEntryRef;
+ using EntryRef = vespalib::datastore::EntryRef;
+ using RefVector = vespalib::RcuVectorBase<AtomicEntryRef>;
+ using RefType = vespalib::datastore::EntryRefT<19>;
+ using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType>;
+
+ RefVector _ref_vector;
+ ArrayStoreType _array_store;
+ vespalib::datastore::CompactionSpec _compaction_spec;
+
+ vespalib::MemoryUsage update_stat();
+ EntryRef acquire_entry_ref(DocId docid) const noexcept { return _ref_vector.acquire_elem_ref(docid).load_acquire(); }
+ EntryRef set_raw(vespalib::ConstArrayRef<char> raw);
+ vespalib::ConstArrayRef<char> get_raw(EntryRef ref) const;
+public:
+ SingleRawAttribute(const vespalib::string& name, const Config& config);
+ ~SingleRawAttribute() override;
+ void onCommit() override;
+ void onUpdateStat() override;
+ void reclaim_memory(generation_t oldest_used_gen) override;
+ void before_inc_generation(generation_t current_gen) override;
+ bool addDoc(DocId &docId) override;
+ vespalib::ConstArrayRef<char> get_raw(DocId docid) const override;
+ void set_raw(DocId docid, vespalib::ConstArrayRef<char> raw);
+ uint32_t clearDoc(DocId docId) override;
+};
+
+}