From a50527a2adb0da7080fabd297e804dcee7140b30 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 21 Mar 2023 16:50:03 +0100 Subject: Add SingleRawExtAttribute, used by streaming search. --- .../attribute/extendattributes/extendattribute.cpp | 49 ++++++++++++++++ .../src/vespa/searchlib/attribute/CMakeLists.txt | 1 + .../vespa/searchlib/attribute/attributevector.h | 1 + .../attribute/single_raw_ext_attribute.cpp | 67 ++++++++++++++++++++++ .../searchlib/attribute/single_raw_ext_attribute.h | 28 +++++++++ 5 files changed, 146 insertions(+) create mode 100644 searchlib/src/vespa/searchlib/attribute/single_raw_ext_attribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/single_raw_ext_attribute.h (limited to 'searchlib/src') diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp index a44965ffb31..98d79ebd50c 100644 --- a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp @@ -1,9 +1,20 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include #include +#include + +using search::attribute::SingleRawExtAttribute; namespace search { +std::vector as_vector(vespalib::stringref value) { + return {value.data(), value.data() + value.size()}; +} + +std::vector as_vector(vespalib::ConstArrayRef value) { + return {value.data(), value.data() + value.size()}; +} + class ExtendAttributeTest : public ::testing::Test { protected: @@ -15,6 +26,8 @@ protected: void testExtendFloat(Attribute & attr); template void testExtendString(Attribute & attr); + template + void testExtendRaw(Attribute & attr); }; template @@ -132,6 +145,35 @@ void ExtendAttributeTest::testExtendString(Attribute & attr) } } +template +void ExtendAttributeTest::testExtendRaw(Attribute & attr) +{ + std::vector zeros{10, 0, 0, 11}; + uint32_t docId(0); + EXPECT_EQ(0u, attr.getNumDocs()); + attr.addDoc(docId); + EXPECT_EQ(0u, docId); + EXPECT_EQ(1u, attr.getNumDocs()); + attr.add(as_vector("1.7"), 10); + auto buf = attr.get_raw(0); + EXPECT_EQ(as_vector("1.7"), as_vector(buf)); + attr.add(as_vector("2.3"), 20); + buf = attr.get_raw(0); + EXPECT_EQ(as_vector("2.3"), as_vector(buf)); + attr.addDoc(docId); + EXPECT_EQ(1u, docId); + EXPECT_EQ(attr.getNumDocs(), 2u); + attr.add(as_vector("3.6"), 30); + buf = attr.get_raw(1); + EXPECT_EQ(as_vector("3.6"), as_vector(buf)); + buf = attr.get_raw(0); + EXPECT_EQ(as_vector("2.3"), as_vector(buf)); + attr.addDoc(docId); + EXPECT_EQ(2u, docId); + attr.add(zeros, 40); + buf = attr.get_raw(2); + EXPECT_EQ(zeros, as_vector(buf)); +} TEST_F(ExtendAttributeTest, single_integer_ext_attribute) { @@ -196,6 +238,13 @@ TEST_F(ExtendAttributeTest, weighted_set_string_ext_attribute) testExtendString(wssattr); } +TEST_F(ExtendAttributeTest, single_raw_ext_attribute) +{ + SingleRawExtAttribute srattr("sr1"); + EXPECT_TRUE(! srattr.hasMultiValue()); + testExtendRaw(srattr); +} + } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index db5cf43050e..7634e954510 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -133,6 +133,7 @@ vespa_add_library(searchlib_attribute OBJECT single_raw_attribute.cpp single_raw_attribute_loader.cpp single_raw_attribute_saver.cpp + single_raw_ext_attribute.cpp single_small_numeric_search_context.cpp single_string_enum_search_context.cpp single_string_enum_hint_search_context.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index b94f49c073f..3d14622ca02 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -85,6 +85,7 @@ public: virtual bool add(int64_t, int32_t = 1) { return false; } virtual bool add(double, int32_t = 1) { return false; } virtual bool add(const char *, int32_t = 1) { return false; } + virtual bool add(vespalib::ConstArrayRef, int32_t = 1) { return false; } virtual ~IExtendAttribute() = default; }; diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_ext_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_ext_attribute.cpp new file mode 100644 index 00000000000..46b275d2ac5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_ext_attribute.cpp @@ -0,0 +1,67 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "single_raw_ext_attribute.h" +#include + +#include +LOG_SETUP(".searchlib.attribute.single_raw_ext_attribute"); + +namespace search::attribute { + +SingleRawExtAttribute::SingleRawExtAttribute(const vespalib::string& name) + : RawAttribute(name, Config(BasicType::RAW, CollectionType::SINGLE)), + IExtendAttribute(), + _buffer(), + _offsets() +{ +} + +SingleRawExtAttribute::~SingleRawExtAttribute() = default; + +void +SingleRawExtAttribute::onCommit() +{ + LOG_ABORT("should not be reached"); +} + +void +SingleRawExtAttribute::onUpdateStat() +{ +} + +bool +SingleRawExtAttribute::addDoc(DocId& docId) +{ + size_t offset(_buffer.size()); + docId = _offsets.size(); + _offsets.push_back(offset); + incNumDocs(); + setCommittedDocIdLimit(getNumDocs()); + return true; +} + +bool +SingleRawExtAttribute::add(vespalib::ConstArrayRef v, int32_t) +{ + const size_t start(_offsets.back()); + const size_t sz(v.size()); + _buffer.resize(start + sz); + memcpy(&_buffer[start], v.data(), sz); + return true; +} + +vespalib::ConstArrayRef +SingleRawExtAttribute::get_raw(DocId docid) const +{ + if (docid >= _offsets.size()) { + return {}; + } + auto offset = _offsets[docid]; + auto size = ((docid + 1 >= _offsets.size()) ? _buffer.size() : _offsets[docid + 1]) - offset; + if (size == 0) { + return {}; + } + return {_buffer.data() + offset, size}; +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_ext_attribute.h b/searchlib/src/vespa/searchlib/attribute/single_raw_ext_attribute.h new file mode 100644 index 00000000000..e3beb21c1a5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_ext_attribute.h @@ -0,0 +1,28 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "raw_attribute.h" +#include + +namespace search::attribute { + +/** + * Attribute vector storing a single raw value per document in streaming search. + */ +class SingleRawExtAttribute : public RawAttribute, + public IExtendAttribute +{ + std::vector> _buffer; + std::vector> _offsets; +public: + SingleRawExtAttribute(const vespalib::string& name); + ~SingleRawExtAttribute() override; + void onCommit() override; + void onUpdateStat() override; + bool addDoc(DocId& docId) override; + bool add(vespalib::ConstArrayRef v, int32_t) override; + vespalib::ConstArrayRef get_raw(DocId docid) const override; +}; + +} -- cgit v1.2.3