diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-04-19 16:45:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-19 16:45:29 +0200 |
commit | 47620a414231f5b84568e59a1b641a2ae0811ad5 (patch) | |
tree | 0a12323a43754a9170ca1011bbc71f1deb1fdc44 | |
parent | 753018a168de87c2051135b6ee75b49d0a666f30 (diff) | |
parent | a6ae0e36bb0d5fdfd276b7576b2591334003fbf6 (diff) |
Merge pull request #22152 from vespa-engine/toregge/add-copy-and-remapping-multi-value-read-views
Add multi value read views that handles mapping from enumerated value
15 files changed, 513 insertions, 4 deletions
diff --git a/searchcommon/src/vespa/searchcommon/attribute/i_multi_value_attribute.h b/searchcommon/src/vespa/searchcommon/attribute/i_multi_value_attribute.h index 3ed86a076b3..641e602e522 100644 --- a/searchcommon/src/vespa/searchcommon/attribute/i_multi_value_attribute.h +++ b/searchcommon/src/vespa/searchcommon/attribute/i_multi_value_attribute.h @@ -38,6 +38,7 @@ public: virtual const IArrayReadView<int64_t>* make_read_view(ArrayTag<int64_t>, vespalib::Stash&) const { return nullptr; } virtual const IArrayReadView<float>* make_read_view(ArrayTag<float>, vespalib::Stash&) const { return nullptr; } virtual const IArrayReadView<double>* make_read_view(ArrayTag<double>, vespalib::Stash&) const { return nullptr; } + virtual const IArrayReadView<const char*>* make_read_view(ArrayTag<const char*>, vespalib::Stash&) const { return nullptr; } virtual const IWeightedSetReadView<int8_t>* make_read_view(WeightedSetTag<int8_t>, vespalib::Stash&) const { return nullptr; } virtual const IWeightedSetReadView<int16_t>* make_read_view(WeightedSetTag<int16_t>, vespalib::Stash&) const { return nullptr; } @@ -45,6 +46,7 @@ public: virtual const IWeightedSetReadView<int64_t>* make_read_view(WeightedSetTag<int64_t>, vespalib::Stash&) const { return nullptr; } virtual const IWeightedSetReadView<float>* make_read_view(WeightedSetTag<float>, vespalib::Stash&) const { return nullptr; } virtual const IWeightedSetReadView<double>* make_read_view(WeightedSetTag<double>, vespalib::Stash&) const { return nullptr; } + virtual const IWeightedSetReadView<const char*>* make_read_view(WeightedSetTag<const char*>, vespalib::Stash&) const { return nullptr; } virtual const IArrayEnumReadView* make_read_view(ArrayEnumTag, vespalib::Stash&) const { return nullptr; } virtual const IWeightedSetEnumReadView* make_read_view(WeightedSetEnumTag, vespalib::Stash&) const { return nullptr; } diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index a7886cc3c61..300900dbb77 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -88,6 +88,7 @@ vespa_define_module( src/tests/attribute/imported_attribute_vector src/tests/attribute/imported_search_context src/tests/attribute/multi_value_mapping + src/tests/attribute/multi_value_read_view src/tests/attribute/posting_list_merger src/tests/attribute/postinglist src/tests/attribute/postinglistattribute diff --git a/searchlib/src/tests/attribute/multi_value_read_view/CMakeLists.txt b/searchlib/src/tests/attribute/multi_value_read_view/CMakeLists.txt new file mode 100644 index 00000000000..32d90273623 --- /dev/null +++ b/searchlib/src/tests/attribute/multi_value_read_view/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attribute_multi_value_read_view_test_app TEST + SOURCES + multi_value_read_view_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_attribute_multi_value_read_view_test_app COMMAND searchlib_attribute_multi_value_read_view_test_app) diff --git a/searchlib/src/tests/attribute/multi_value_read_view/multi_value_read_view_test.cpp b/searchlib/src/tests/attribute/multi_value_read_view/multi_value_read_view_test.cpp new file mode 100644 index 00000000000..150490ddd92 --- /dev/null +++ b/searchlib/src/tests/attribute/multi_value_read_view/multi_value_read_view_test.cpp @@ -0,0 +1,248 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcommon/attribute/i_multi_value_attribute.h> +#include <vespa/searchcommon/attribute/multi_value_traits.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/floatbase.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/util/stash.h> + + + +namespace search::attribute { + +class TestParam { + BasicType _basic_type; + +public: + TestParam(BasicType basic_type_in) + : _basic_type(basic_type_in) + { + } + + BasicType basic_type() const noexcept { return _basic_type; } +}; + +std::ostream& operator<<(std::ostream& os, const TestParam& param) +{ + os << param.basic_type().asString(); + return os; +} + +class MultiValueReadViewTest : public ::testing::TestWithParam<TestParam> +{ +protected: + MultiValueReadViewTest() + : ::testing::TestWithParam<TestParam>() + { + } + ~MultiValueReadViewTest() override = default; + + template <typename AttributeBaseType, typename BaseType> + void populate_helper(AttributeBaseType& attr, const std::vector<BaseType>& values); + void populate(AttributeVector& attr); + template <typename MultiValueType> + void check_values_helper(const AttributeVector &attr, const std::vector<multivalue::ValueType_t<MultiValueType>>& exp_values); + template <typename BasicType> + void check_integer_values(const AttributeVector &attr); + template <typename BasicType> + void check_floating_point_values(const AttributeVector &attr); + void check_string_values(const AttributeVector &attr); + void check_values(const AttributeVector& attr); + void test_normal_attribute_vector(CollectionType collection_type, bool fast_search); +}; + +template <typename AttributeBaseType, typename BaseType> +void +MultiValueReadViewTest::populate_helper(AttributeBaseType& attr, const std::vector<BaseType>& values) +{ + attr.addReservedDoc(); + uint32_t doc_id = 0; + attr.addDoc(doc_id); + EXPECT_NE(0u, doc_id); + attr.clearDoc(doc_id); + attr.addDoc(doc_id); + EXPECT_NE(0u, doc_id); + attr.clearDoc(doc_id); + attr.append(doc_id, values[0], 2); + attr.append(doc_id, values[1], 7); +} + +void +MultiValueReadViewTest::populate(AttributeVector& attr) +{ + switch (attr.getConfig().basicType().type()) { + case BasicType::Type::INT8: + case BasicType::Type::INT16: + case BasicType::Type::INT32: + case BasicType::Type::INT64: + populate_helper<IntegerAttribute, int64_t>(dynamic_cast<IntegerAttribute&>(attr), {42, 44}); + break; + case BasicType::Type::FLOAT: + case BasicType::Type::DOUBLE: + populate_helper<FloatingPointAttribute, double>(dynamic_cast<FloatingPointAttribute&>(attr), {42.0, 44.0}); + break; + case BasicType::Type::STRING: + populate_helper<StringAttribute, const char*>(dynamic_cast<StringAttribute&>(attr), {"42", "44"}); + break; + default: + FAIL() << "Cannot populate attribute vector"; + } + attr.commit(); +} + +namespace { + +template <typename BasicType> +struct CompareValues +{ + bool operator()(const BasicType &lhs, const BasicType &rhs) const { return lhs < rhs; } + bool operator()(const multivalue::WeightedValue<BasicType>& lhs, const multivalue::WeightedValue<BasicType>& rhs) const { return lhs.value() < rhs.value(); } + bool equal(const BasicType &lhs, const BasicType &rhs) const { return lhs == rhs; } + bool equal(const multivalue::WeightedValue<BasicType>& lhs, const multivalue::WeightedValue<BasicType>& rhs) const { return lhs.value() == rhs.value(); } +}; + +template <> +struct CompareValues<const char *> +{ + bool operator()(const char *lhs, const char *rhs) const { return strcmp(lhs, rhs) < 0; } + bool operator()(const multivalue::WeightedValue<const char *>& lhs, const multivalue::WeightedValue<const char *>& rhs) const { return strcmp(lhs.value(), rhs.value()) < 0; } + bool equal(const char *lhs, const char *rhs) const { return strcmp(lhs, rhs) == 0; } + bool equal(const multivalue::WeightedValue<const char *>& lhs, const multivalue::WeightedValue<const char *>& rhs) const { return strcmp(lhs.value(), rhs.value()) == 0; } +}; + +} + +template <typename MultiValueType> +void +MultiValueReadViewTest::check_values_helper(const AttributeVector &attr, const std::vector<multivalue::ValueType_t<MultiValueType>>& exp_values) +{ + vespalib::Stash stash; + auto mv_attr = attr.as_multi_value_attribute(); + EXPECT_NE(nullptr, mv_attr); + auto read_view = mv_attr->make_read_view(IMultiValueAttribute::Tag<MultiValueType>(), stash); + EXPECT_NE(nullptr, read_view); + auto values = read_view->get_values(1); + EXPECT_TRUE(values.empty()); + values = read_view->get_values(2); + std::vector<MultiValueType> values_copy(values.begin(), values.end()); + bool was_array = true; + CompareValues<multivalue::ValueType_t<MultiValueType>> compare_values; + if (attr.getConfig().collectionType().type() == CollectionType::Type::WSET) { + std::sort(values_copy.begin(), values_copy.end(), compare_values); + was_array = false; + } + EXPECT_EQ(2u, values_copy.size()); + if constexpr (multivalue::is_WeightedValue_v<MultiValueType>) { + EXPECT_TRUE(compare_values.equal(exp_values[0], values_copy[0].value())); + EXPECT_EQ(was_array ? 1 : 2, values_copy[0].weight()); + EXPECT_TRUE(compare_values.equal(exp_values[1], values_copy[1].value())); + EXPECT_EQ(was_array ? 1 : 7, values_copy[1].weight()); + } else { + EXPECT_TRUE(compare_values.equal(exp_values[0], values_copy[0])); + EXPECT_TRUE(compare_values.equal(exp_values[1], values_copy[1])); + } +} + +template <typename BasicType> +void +MultiValueReadViewTest::check_integer_values(const AttributeVector &attr) +{ + std::vector<BasicType> exp_values{42, 44}; + check_values_helper<BasicType>(attr, exp_values); + check_values_helper<multivalue::WeightedValue<BasicType>>(attr, exp_values); +} + +template <typename BasicType> +void +MultiValueReadViewTest::check_floating_point_values(const AttributeVector &attr) +{ + std::vector<BasicType> exp_values{42.0, 44.0}; + check_values_helper<BasicType>(attr, exp_values); + check_values_helper<multivalue::WeightedValue<BasicType>>(attr, exp_values); +} + +void +MultiValueReadViewTest::check_string_values(const AttributeVector &attr) +{ + std::vector<const char *> exp_values{"42", "44"}; + check_values_helper<const char *>(attr, exp_values); + check_values_helper<multivalue::WeightedValue<const char *>>(attr, exp_values); +} + +void +MultiValueReadViewTest::check_values(const AttributeVector& attr) +{ + switch (attr.getConfig().basicType().type()) { + case BasicType::Type::INT8: + check_integer_values<int8_t>(attr); + break; + case BasicType::Type::INT16: + check_integer_values<int16_t>(attr); + break; + case BasicType::Type::INT32: + check_integer_values<int32_t>(attr); + break; + case BasicType::Type::INT64: + check_integer_values<int64_t>(attr); + break; + case BasicType::Type::FLOAT: + check_floating_point_values<float>(attr); + break; + case BasicType::Type::DOUBLE: + check_floating_point_values<double>(attr); + break; + case BasicType::Type::STRING: + check_string_values(attr); + break; + default: + FAIL() << "Cannot check values in attribute vector"; + } +} + +void +MultiValueReadViewTest::test_normal_attribute_vector(CollectionType collection_type, bool fast_search) +{ + auto param = GetParam(); + Config config(param.basic_type(), collection_type); + config.setFastSearch(fast_search); + auto attr = AttributeFactory::createAttribute("attr", config); + populate(*attr); + check_values(*attr); +} + +TEST_P(MultiValueReadViewTest, test_array) +{ + test_normal_attribute_vector(CollectionType::Type::ARRAY, false); +}; + +TEST_P(MultiValueReadViewTest, test_enumerated_array) +{ + test_normal_attribute_vector(CollectionType::Type::ARRAY, true); +}; + +TEST_P(MultiValueReadViewTest, test_weighted_set) +{ + test_normal_attribute_vector(CollectionType::Type::WSET, false); +}; + +TEST_P(MultiValueReadViewTest, test_enumerated_weighted_set) +{ + test_normal_attribute_vector(CollectionType::Type::WSET, true); +}; + +auto test_values = ::testing::Values(TestParam(BasicType::Type::INT8), + TestParam(BasicType::Type::INT16), + TestParam(BasicType::Type::INT32), + TestParam(BasicType::Type::INT64), + TestParam(BasicType::Type::FLOAT), + TestParam(BasicType::Type::DOUBLE), + TestParam(BasicType::Type::STRING)); + +VESPA_GTEST_INSTANTIATE_TEST_SUITE_P(ReadView, MultiValueReadViewTest, test_values,testing::PrintToStringParamName()); + +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 79c68ba4fe3..8caa6299025 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -26,6 +26,7 @@ vespa_add_library(searchlib_attribute OBJECT bitvector_search_cache.cpp changevector.cpp configconverter.cpp + copy_multi_value_read_view.cpp createarrayfastsearch.cpp createarraystd.cpp createsetfastsearch.cpp @@ -45,6 +46,7 @@ vespa_add_library(searchlib_attribute OBJECT enum_store_dictionary.cpp enum_store_loaders.cpp enumstore.cpp + enumerated_multi_value_read_view.cpp extendableattributes.cpp fixedsourceselector.cpp flagattribute.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/copy_multi_value_read_view.cpp b/searchlib/src/vespa/searchlib/attribute/copy_multi_value_read_view.cpp new file mode 100644 index 00000000000..a2cead2e1ad --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/copy_multi_value_read_view.cpp @@ -0,0 +1,54 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "copy_multi_value_read_view.h" + +using vespalib::datastore::AtomicEntryRef; + +namespace search::attribute { + +template <typename MultiValueType, typename RawMultiValueType> +CopyMultiValueReadView<MultiValueType, RawMultiValueType>::CopyMultiValueReadView(MultiValueMappingReadView<RawMultiValueType> mv_mapping_read_view) + : _mv_mapping_read_view(mv_mapping_read_view), + _copy() +{ +} + +template <typename MultiValueType, typename RawMultiValueType> +CopyMultiValueReadView<MultiValueType, RawMultiValueType>::~CopyMultiValueReadView() = default; + +template <typename MultiValueType, typename RawMultiValueType> +vespalib::ConstArrayRef<MultiValueType> +CopyMultiValueReadView<MultiValueType, RawMultiValueType>::get_values(uint32_t docid) const +{ + auto raw = _mv_mapping_read_view.get(docid); + if (_copy.size() < raw.size()) { + _copy.resize(raw.size()); + } + auto dst = _copy.data(); + for (auto &src : raw) { + ValueType v = multivalue::get_value_ref(src); + *dst = multivalue::ValueBuilder<MultiValueType>::build(v, multivalue::get_weight(src)); + ++dst; + } + return vespalib::ConstArrayRef(_copy.data(), raw.size()); +} + +using multivalue::WeightedValue; + +template class CopyMultiValueReadView<int8_t, WeightedValue<int8_t>>; +template class CopyMultiValueReadView<int16_t, WeightedValue<int16_t>>; +template class CopyMultiValueReadView<int32_t, WeightedValue<int32_t>>; +template class CopyMultiValueReadView<int64_t, WeightedValue<int64_t>>; +template class CopyMultiValueReadView<float, WeightedValue<float>>; +template class CopyMultiValueReadView<double, WeightedValue<double>>; +template class CopyMultiValueReadView<AtomicEntryRef, WeightedValue<AtomicEntryRef>>; + +template class CopyMultiValueReadView<WeightedValue<int8_t>, int8_t>; +template class CopyMultiValueReadView<WeightedValue<int16_t>, int16_t>; +template class CopyMultiValueReadView<WeightedValue<int32_t>, int32_t>; +template class CopyMultiValueReadView<WeightedValue<int64_t>, int64_t>; +template class CopyMultiValueReadView<WeightedValue<float>, float>; +template class CopyMultiValueReadView<WeightedValue<double>, double>; +template class CopyMultiValueReadView<WeightedValue<AtomicEntryRef>, AtomicEntryRef>; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/copy_multi_value_read_view.h b/searchlib/src/vespa/searchlib/attribute/copy_multi_value_read_view.h new file mode 100644 index 00000000000..e8786357738 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/copy_multi_value_read_view.h @@ -0,0 +1,31 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_value_mapping_read_view.h" +#include "enumstore.h" +#include <vespa/searchcommon/attribute/i_multi_value_read_view.h> +#include <vespa/searchcommon/attribute/multi_value_traits.h> + +namespace search::attribute { + +/** + * Read view for the data stored in a multi-value attribute that handles + * addition and removal of weight. + * @tparam MultiValueType The multi-value type of the data to access. + * @tparam RawMultiValueType The multi-value type of the raw data to access. + */ +template <typename MultiValueType, typename RawMultiValueType> +class CopyMultiValueReadView : public IMultiValueReadView<MultiValueType> +{ + static_assert(std::is_same_v<multivalue::ValueType_t<MultiValueType>, multivalue::ValueType_t<RawMultiValueType>>); + using ValueType = multivalue::ValueType_t<MultiValueType>; + MultiValueMappingReadView<RawMultiValueType> _mv_mapping_read_view; + mutable std::vector<MultiValueType> _copy; +public: + CopyMultiValueReadView(MultiValueMappingReadView<RawMultiValueType> mv_mapping_read_view); + ~CopyMultiValueReadView() override; + vespalib::ConstArrayRef<MultiValueType> get_values(uint32_t docid) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enumerated_multi_value_read_view.cpp b/searchlib/src/vespa/searchlib/attribute/enumerated_multi_value_read_view.cpp new file mode 100644 index 00000000000..b0459d83ecf --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumerated_multi_value_read_view.cpp @@ -0,0 +1,73 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "enumerated_multi_value_read_view.h" + +using vespalib::datastore::AtomicEntryRef; + +namespace search::attribute { + +template <typename MultiValueType, typename RawMultiValueType, typename EnumEntryType> +EnumeratedMultiValueReadView<MultiValueType, RawMultiValueType, EnumEntryType>::EnumeratedMultiValueReadView(MultiValueMappingReadView<RawMultiValueType> mv_mapping_read_view, const EnumStoreT<EnumEntryType>& enum_store) + : _mv_mapping_read_view(mv_mapping_read_view), + _enum_store(enum_store), + _copy() +{ +} + +template <typename MultiValueType, typename RawMultiValueType, typename EnumEntryType> +EnumeratedMultiValueReadView<MultiValueType, RawMultiValueType, EnumEntryType>::~EnumeratedMultiValueReadView() = default; + +template <typename MultiValueType, typename RawMultiValueType, typename EnumEntryType> +vespalib::ConstArrayRef<MultiValueType> +EnumeratedMultiValueReadView<MultiValueType, RawMultiValueType, EnumEntryType>::get_values(uint32_t docid) const +{ + auto raw = _mv_mapping_read_view.get(docid); + if (_copy.size() < raw.size()) { + _copy.resize(raw.size()); + } + auto dst = _copy.data(); + for (auto &src : raw) { + EnumEntryType v = _enum_store.get_value(multivalue::get_value_ref(src).load_acquire()); + *dst = multivalue::ValueBuilder<MultiValueType>::build(v, multivalue::get_weight(src)); + ++dst; + } + return vespalib::ConstArrayRef(_copy.data(), raw.size()); +} + +using multivalue::WeightedValue; + +using WeightedAtomicEntryRef = WeightedValue<AtomicEntryRef>; + +template class EnumeratedMultiValueReadView<int8_t, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<int16_t, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<int32_t, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<int64_t, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<float, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<double, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<const char*, AtomicEntryRef>; + +template class EnumeratedMultiValueReadView<int8_t, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<int16_t, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<int32_t, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<int64_t, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<float, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<double, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<const char*, WeightedAtomicEntryRef>; + +template class EnumeratedMultiValueReadView<WeightedValue<int8_t>, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<int16_t>, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<int32_t>, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<int64_t>, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<float>, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<double>, WeightedAtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<const char*>, WeightedAtomicEntryRef>; + +template class EnumeratedMultiValueReadView<WeightedValue<int8_t>, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<int16_t>, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<int32_t>, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<int64_t>, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<float>, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<double>, AtomicEntryRef>; +template class EnumeratedMultiValueReadView<WeightedValue<const char*>, AtomicEntryRef>; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enumerated_multi_value_read_view.h b/searchlib/src/vespa/searchlib/attribute/enumerated_multi_value_read_view.h new file mode 100644 index 00000000000..77c279700c9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumerated_multi_value_read_view.h @@ -0,0 +1,32 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_value_mapping_read_view.h" +#include "enumstore.h" +#include <vespa/searchcommon/attribute/i_multi_value_read_view.h> +#include <vespa/searchcommon/attribute/multi_value_traits.h> + +namespace search::attribute { + +/** + * Read view for the data stored in a multi-value attribute that handles + * mapping from enumerated value to value. + * @tparam MultiValueType The multi-value type of the data to access. + * @tparam RawMultiValueType The multi-value type of the raw data to access. + * @tparam EnumEntryType The enum store entry type. + */ +template <typename MultiValueType, typename RawMultiValueType, typename EnumEntryType = multivalue::ValueType_t<MultiValueType>> +class EnumeratedMultiValueReadView : public IMultiValueReadView<MultiValueType> +{ + using AtomicEntryRef = vespalib::datastore::AtomicEntryRef; + MultiValueMappingReadView<RawMultiValueType> _mv_mapping_read_view; + const EnumStoreT<EnumEntryType>& _enum_store; + mutable std::vector<MultiValueType> _copy; +public: + EnumeratedMultiValueReadView(MultiValueMappingReadView<RawMultiValueType> mv_mapping_read_view, const EnumStoreT<EnumEntryType>& enum_store); + ~EnumeratedMultiValueReadView() override; + vespalib::ConstArrayRef<MultiValueType> get_values(uint32_t docid) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h index 1786429af8e..9f8506d3cb4 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h @@ -106,6 +106,10 @@ public: return getWeightedHelper<WeightedFloat, double>(doc, v, sz); } + // Implements attribute::IMultiValueAttribute + const attribute::IMultiValueReadView<T>* make_read_view(attribute::IMultiValueAttribute::Tag<T>, vespalib::Stash& stash) const override; + const attribute::IMultiValueReadView<multivalue::WeightedValue<T>>* make_read_view(attribute::IMultiValueAttribute::Tag<multivalue::WeightedValue<T>>, vespalib::Stash& stash) const override; + private: using AttributeReader = PrimitiveReader<typename B::LoadedValueType>; void loadAllAtOnce(AttributeReader & attrReader, size_t numDocs, size_t numValues); diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp index 3323440dd0d..81f8c1c910e 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp @@ -4,10 +4,12 @@ #include "load_utils.h" #include "loadednumericvalue.h" +#include "enumerated_multi_value_read_view.h" #include "multinumericenumattribute.h" #include "multi_numeric_enum_search_context.h" #include <vespa/searchlib/query/query_term_simple.h> #include <vespa/searchlib/util/fileutil.hpp> +#include <vespa/vespalib/util/stash.h> namespace search { @@ -114,6 +116,20 @@ MultiValueNumericEnumAttribute<B, M>::onLoad(vespalib::Executor *) } template <typename B, typename M> +const attribute::IMultiValueReadView<typename B::BaseClass::BaseType>* +MultiValueNumericEnumAttribute<B, M>::make_read_view(attribute::IMultiValueAttribute::Tag<typename B::BaseClass::BaseType>, vespalib::Stash& stash) const +{ + return &stash.create<attribute::EnumeratedMultiValueReadView<T, M>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit()), this->_enumStore); +} + +template <typename B, typename M> +const attribute::IMultiValueReadView<multivalue::WeightedValue<typename B::BaseClass::BaseType>>* +MultiValueNumericEnumAttribute<B, M>::make_read_view(attribute::IMultiValueAttribute::Tag<multivalue::WeightedValue<typename B::BaseClass::BaseType>>, vespalib::Stash& stash) const +{ + return &stash.create<attribute::EnumeratedMultiValueReadView<multivalue::WeightedValue<T>, M>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit()), this->_enumStore); +} + +template <typename B, typename M> std::unique_ptr<attribute::SearchContext> MultiValueNumericEnumAttribute<B, M>::getSearch(QueryTermSimple::UP qTerm, const attribute::SearchContextParams & params) const diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h index 6f94843e1b8..532af930220 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h @@ -105,6 +105,10 @@ public: std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; + + // Implements attribute::IMultiValueAttribute + const attribute::IMultiValueReadView<const char*>* make_read_view(attribute::IMultiValueAttribute::Tag<const char*>, vespalib::Stash& stash) const override; + const attribute::IMultiValueReadView<multivalue::WeightedValue<const char*>>* make_read_view(attribute::IMultiValueAttribute::Tag<multivalue::WeightedValue<const char*>>, vespalib::Stash& stash) const override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index 2d60887c23b..0edd459efc7 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -5,12 +5,14 @@ #include "stringattribute.h" #include "multistringattribute.h" #include "enumattribute.hpp" +#include "enumerated_multi_value_read_view.h" #include "multienumattribute.hpp" #include "multi_string_enum_hint_search_context.h" #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/searchlib/util/bufferwriter.h> #include <vespa/vespalib/util/regexp.h> +#include <vespa/vespalib/util/stash.h> #include <vespa/searchlib/query/query_term_ucs4.h> namespace search { @@ -47,5 +49,19 @@ MultiValueStringAttributeT<B, M>::getSearch(QueryTermSimpleUP qTerm, return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore, doc_id_limit, this->getStatus().getNumValues()); } +template <typename B, typename M> +const attribute::IMultiValueReadView<const char*>* +MultiValueStringAttributeT<B, M>::make_read_view(attribute::IMultiValueAttribute::Tag<const char*>, vespalib::Stash& stash) const +{ + return &stash.create<attribute::EnumeratedMultiValueReadView<const char*, M>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit()), this->_enumStore); +} + +template <typename B, typename M> +const attribute::IMultiValueReadView<multivalue::WeightedValue<const char*>>* +MultiValueStringAttributeT<B, M>::make_read_view(attribute::IMultiValueAttribute::Tag<multivalue::WeightedValue<const char*>>, vespalib::Stash& stash) const +{ + return &stash.create<attribute::EnumeratedMultiValueReadView<multivalue::WeightedValue<const char*>, M>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit()), this->_enumStore); +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h index 3844c0f9b02..5a2ee5c80d9 100644 --- a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h @@ -82,7 +82,8 @@ public: const IMultiValueAttribute* as_multi_value_attribute() const override; // Implements attribute::IMultiValueAttribute - const attribute::IMultiValueReadView<MultiValueType>* make_read_view(attribute::IMultiValueAttribute::Tag<MultiValueType>, vespalib::Stash& stash) const override; + const attribute::IMultiValueReadView<ValueType>* make_read_view(attribute::IMultiValueAttribute::Tag<ValueType>, vespalib::Stash& stash) const override; + const attribute::IMultiValueReadView<multivalue::WeightedValue<ValueType>>* make_read_view(attribute::IMultiValueAttribute::Tag<multivalue::WeightedValue<ValueType>>, vespalib::Stash& stash) const override; }; } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp index 04194f662d9..4e0e460da9c 100644 --- a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp @@ -4,6 +4,7 @@ #include "address_space_components.h" #include "raw_multi_value_read_view.h" +#include "copy_multi_value_read_view.h" #include <vespa/searchlib/attribute/multivalueattribute.h> #include <vespa/vespalib/stllike/hash_map.h> #include <vespa/vespalib/stllike/hash_map.hpp> @@ -299,10 +300,25 @@ MultiValueAttribute<B, M>::as_multi_value_attribute() const } template <typename B, typename M> -const attribute::IMultiValueReadView<M>* -MultiValueAttribute<B, M>::make_read_view(attribute::IMultiValueAttribute::Tag<MultiValueType>, vespalib::Stash& stash) const +const attribute::IMultiValueReadView<multivalue::ValueType_t<M>>* +MultiValueAttribute<B, M>::make_read_view(attribute::IMultiValueAttribute::Tag<ValueType>, vespalib::Stash& stash) const { - return &stash.create<attribute::RawMultiValueReadView<MultiValueType>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit())); + if constexpr (std::is_same_v<MultiValueType, ValueType>) { + return &stash.create<attribute::RawMultiValueReadView<MultiValueType>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit())); + } else { + return &stash.create<attribute::CopyMultiValueReadView<ValueType, MultiValueType>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit())); + } +} + +template <typename B, typename M> +const attribute::IMultiValueReadView<multivalue::WeightedValue<multivalue::ValueType_t<M>>>* +MultiValueAttribute<B, M>::make_read_view(attribute::IMultiValueAttribute::Tag<multivalue::WeightedValue<ValueType>>, vespalib::Stash& stash) const +{ + if constexpr (std::is_same_v<MultiValueType, multivalue::WeightedValue<ValueType>>) { + return &stash.create<attribute::RawMultiValueReadView<MultiValueType>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit())); + } else { + return &stash.create<attribute::CopyMultiValueReadView<multivalue::WeightedValue<ValueType>, MultiValueType>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit())); + } } } // namespace search |