From 14d0fb746f9cf23baf941f93bc5366a637b3c877 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 26 Apr 2022 13:04:12 +0200 Subject: Use MultiValueReadView to read values in multivalue attribute dynamic field writer. --- .../docsummary/attributedfw/attributedfw_test.cpp | 3 + .../searchsummary/docsummary/attributedfw.cpp | 246 ++++++++++++++------- .../vespa/searchsummary/docsummary/docsumstate.h | 2 +- 3 files changed, 165 insertions(+), 86 deletions(-) (limited to 'searchsummary') diff --git a/searchsummary/src/tests/docsummary/attributedfw/attributedfw_test.cpp b/searchsummary/src/tests/docsummary/attributedfw/attributedfw_test.cpp index 9f6ca8f6b57..67d505582d8 100644 --- a/searchsummary/src/tests/docsummary/attributedfw/attributedfw_test.cpp +++ b/searchsummary/src/tests/docsummary/attributedfw/attributedfw_test.cpp @@ -59,6 +59,8 @@ public: } _writer = AttributeDFWFactory::create(_attrs.mgr(), field_name, filter_elements, _matching_elems_fields); _writer->setIndex(0); + EXPECT_TRUE(_writer->setFieldWriterStateIndex(0)); + _state._fieldWriterStates.resize(1); _field_name = field_name; _state._attributes.resize(1); _state._attributes[0] = _state._attrCtx->getAttribute(field_name); @@ -77,6 +79,7 @@ public: _callback.clear(); _callback.add_matching_elements(docid, _field_name, matching_elems); _state._matching_elements = std::unique_ptr(); + _state._fieldWriterStates[0] = nullptr; // Force new state to pick up changed matching elements expect_field(exp_slime_as_json, docid); } }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp index 448feedac80..eb4f1a19e06 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp @@ -3,12 +3,12 @@ #include "attributedfw.h" #include "docsumstate.h" #include "docsumwriter.h" +#include "docsum_field_writer_state.h" #include #include -#include +#include +#include #include -#include -#include #include #include #include @@ -23,6 +23,8 @@ using namespace search; using search::attribute::BasicType; using search::attribute::IAttributeContext; using search::attribute::IAttributeVector; +using search::attribute::IMultiValueAttribute; +using search::attribute::IMultiValueReadView; using vespalib::Memory; using vespalib::slime::Cursor; using vespalib::slime::Inserter; @@ -140,145 +142,219 @@ SingleAttrDFW::insertField(uint32_t docid, GetDocsumsState * state, ResType type //----------------------------------------------------------------------------- -template -class MultiAttrDFW : public AttrDFW { -private: - bool _is_weighted_set; - bool _filter_elements; - std::shared_ptr _matching_elems_fields; +template +const IMultiValueReadView* +make_read_view(const IAttributeVector& attribute, vespalib::Stash& stash) +{ + auto multi_value_attribute = attribute.as_multi_value_attribute(); + if (multi_value_attribute != nullptr) { + return multi_value_attribute->make_read_view(IMultiValueAttribute::MultiValueTag(), stash); + } + return nullptr; +} +class EmptyWriterState : public DocsumFieldWriterState +{ public: - explicit MultiAttrDFW(const vespalib::string& attr_name, bool is_weighted_set, - bool filter_elements, std::shared_ptr matching_elems_fields) - : AttrDFW(attr_name), - _is_weighted_set(is_weighted_set), - _filter_elements(filter_elements), - _matching_elems_fields(std::move(matching_elems_fields)) - { - if (filter_elements && _matching_elems_fields) { - _matching_elems_fields->add_field(attr_name); - } - } - void insertField(uint32_t docid, GetDocsumsState* state, ResType type, Inserter& target) override; + EmptyWriterState() = default; + ~EmptyWriterState() = default; + void insertField(uint32_t, Inserter&) override { } }; -void -set(const vespalib::string & value, Symbol itemSymbol, Cursor & cursor) +template +class MultiAttrDFWState : public DocsumFieldWriterState { - cursor.setString(itemSymbol, value); -} + const vespalib::string& _field_name; + const IMultiValueReadView* _read_view; + const MatchingElements* _matching_elements; +public: + MultiAttrDFWState(const vespalib::string& field_name, const IAttributeVector& attr, vespalib::Stash& stash, const MatchingElements* matching_elements); + ~MultiAttrDFWState() override; + void insertField(uint32_t docid, Inserter& target) override; +}; -void -append(const IAttributeVector::WeightedString & element, Cursor& arr) -{ - arr.addString(element.getValue()); -} -void -set(int64_t value, Symbol itemSymbol, Cursor & cursor) +template +MultiAttrDFWState::MultiAttrDFWState(const vespalib::string& field_name, const IAttributeVector& attr, vespalib::Stash& stash, const MatchingElements* matching_elements) + : _field_name(field_name), + _read_view(make_read_view(attr, stash)), + _matching_elements(matching_elements) { - cursor.setLong(itemSymbol, value); } -void -append(const IAttributeVector::WeightedInt & element, Cursor& arr) -{ - arr.addLong(element.getValue()); -} +template +MultiAttrDFWState::~MultiAttrDFWState() = default; +template void -set(double value, Symbol itemSymbol, Cursor & cursor) +set_value(V value, Symbol item_symbol, Cursor& cursor) { - cursor.setDouble(itemSymbol, value); + if constexpr (std::is_same_v) { + cursor.setString(item_symbol, value); + } else if constexpr(std::is_floating_point_v) { + cursor.setDouble(item_symbol, value); + } else { + cursor.setLong(item_symbol, value); + } } +template void -append(const IAttributeVector::WeightedFloat & element, Cursor& arr) +append_value(V value, Cursor& arr) { - arr.addDouble(element.getValue()); + if constexpr (std::is_same_v) { + arr.addString(value); + } else if constexpr(std::is_floating_point_v) { + arr.addDouble(value); + } else { + arr.addLong(value); + } } Memory ITEM("item"); Memory WEIGHT("weight"); -template +template void -MultiAttrDFW::insertField(uint32_t docid, GetDocsumsState* state, ResType, Inserter& target) +MultiAttrDFWState::insertField(uint32_t docid, Inserter& target) { - const auto& attr = get_attribute(*state); - uint32_t entries = attr.getValueCount(docid); - if (entries == 0) { - return; // Don't insert empty fields + using ValueType = multivalue::ValueType_t; + if (!_read_view) { + return; } + auto elements = _read_view->get_values(docid); + if (elements.empty()) { + return; + } + Cursor &arr = target.insertArray(elements.size()); - std::vector elements(entries); - entries = std::min(entries, attr.get(docid, elements.data(), entries)); - Cursor &arr = target.insertArray(entries); - - if (_filter_elements) { - const auto& matching_elems = state->get_matching_elements(*_matching_elems_fields) - .get_matching_elements(docid, getAttributeName()); - if (!matching_elems.empty() && matching_elems.back() < entries) { - if (_is_weighted_set) { + if (_matching_elements) { + const auto& matching_elems = _matching_elements->get_matching_elements(docid, _field_name); + if (!matching_elems.empty() && matching_elems.back() < elements.size()) { + if constexpr (multivalue::is_WeightedValue_v) { Symbol itemSymbol = arr.resolve(ITEM); Symbol weightSymbol = arr.resolve(WEIGHT); for (uint32_t id_to_keep : matching_elems) { - const DataType & element = elements[id_to_keep]; + auto& element = elements[id_to_keep]; Cursor& elemC = arr.addObject(); - set(element.getValue(), itemSymbol, elemC); - elemC.setLong(weightSymbol, element.getWeight()); + set_value(element.value(), itemSymbol, elemC); + elemC.setLong(weightSymbol, element.weight()); } } else { for (uint32_t id_to_keep : matching_elems) { - append(elements[id_to_keep], arr); + append_value(elements[id_to_keep], arr); } } } } else { - if (_is_weighted_set) { + if constexpr (multivalue::is_WeightedValue_v) { Symbol itemSymbol = arr.resolve(ITEM); Symbol weightSymbol = arr.resolve(WEIGHT); for (const auto & element : elements) { Cursor& elemC = arr.addObject(); - set(element.getValue(), itemSymbol, elemC); - elemC.setLong(weightSymbol, element.getWeight()); + set_value(element.value(), itemSymbol, elemC); + elemC.setLong(weightSymbol, element.weight()); } } else { for (const auto & element : elements) { - append(element, arr); + append_value(element, arr); } } } } -std::unique_ptr -create_multi_writer(const IAttributeVector& attr, - bool filter_elements, - std::shared_ptr matching_elems_fields) +class MultiAttrDFW : public AttrDFW { +private: + bool _filter_elements; + uint32_t _state_index; // index into _fieldWriterStates in GetDocsumsState + std::shared_ptr _matching_elems_fields; + +public: + explicit MultiAttrDFW(const vespalib::string& attr_name, bool filter_elements, std::shared_ptr matching_elems_fields) + : AttrDFW(attr_name), + _filter_elements(filter_elements), + _matching_elems_fields(std::move(matching_elems_fields)) + { + if (filter_elements && _matching_elems_fields) { + _matching_elems_fields->add_field(attr_name); + } + } + bool setFieldWriterStateIndex(uint32_t fieldWriterStateIndex) override; + void insertField(uint32_t docid, GetDocsumsState* state, ResType type, Inserter& target) override; +}; + +bool +MultiAttrDFW::setFieldWriterStateIndex(uint32_t fieldWriterStateIndex) +{ + _state_index = fieldWriterStateIndex; + return true; +} + +template +DocsumFieldWriterState* +make_field_writer_state_helper(const vespalib::string& field_name, const IAttributeVector& attr, vespalib::Stash& stash, const MatchingElements* matching_elements) { - auto type = attr.getBasicType(); bool is_weighted_set = attr.hasWeightedSetType(); + if (is_weighted_set) { + return &stash.create>>(field_name, attr, stash, matching_elements); + } else { + return &stash.create>(field_name, attr, stash, matching_elements); + } +} + +DocsumFieldWriterState* +make_field_writer_state(const vespalib::string& field_name, const IAttributeVector& attr, vespalib::Stash& stash, const MatchingElements* matching_elements) +{ + auto type = attr.getBasicType(); switch (type) { - case BasicType::NONE: - case BasicType::STRING: { - return std::make_unique>(attr.getName(), is_weighted_set, - filter_elements, std::move(matching_elems_fields)); + case BasicType::Type::STRING: + return make_field_writer_state_helper(field_name, attr, stash, matching_elements); + case BasicType::Type::INT8: + return make_field_writer_state_helper(field_name, attr, stash, matching_elements); + case BasicType::Type::INT16: + return make_field_writer_state_helper(field_name, attr, stash, matching_elements); + case BasicType::Type::INT32: + return make_field_writer_state_helper(field_name, attr, stash, matching_elements); + case BasicType::Type::INT64: + return make_field_writer_state_helper(field_name, attr, stash, matching_elements); + case BasicType::Type::FLOAT: + return make_field_writer_state_helper(field_name, attr, stash, matching_elements); + case BasicType::Type::DOUBLE: + return make_field_writer_state_helper(field_name, attr, stash, matching_elements); + default: + ; + } + return &stash.create(); +} + +void +MultiAttrDFW::insertField(uint32_t docid, GetDocsumsState *state, ResType, vespalib::slime::Inserter &target) +{ + auto& field_writer_state = state->_fieldWriterStates[_state_index]; + if (!field_writer_state) { + const MatchingElements *matching_elements = nullptr; + if (_filter_elements) { + matching_elements = &state->get_matching_elements(*_matching_elems_fields); + } + const auto& attr = get_attribute(*state); + field_writer_state = make_field_writer_state(getAttributeName(), attr, state->get_stash(), matching_elements); } - case BasicType::BOOL: - case BasicType::UINT2: - case BasicType::UINT4: + field_writer_state->insertField(docid, target); +} + +std::unique_ptr +create_multi_writer(const IAttributeVector& attr, bool filter_elements, std::shared_ptr matching_elems_fields) +{ + auto type = attr.getBasicType(); + switch (type) { + case BasicType::STRING: case BasicType::INT8: case BasicType::INT16: case BasicType::INT32: - case BasicType::INT64: { - return std::make_unique>(attr.getName(), is_weighted_set, - filter_elements, std::move(matching_elems_fields)); - } + case BasicType::INT64: case BasicType::FLOAT: - case BasicType::DOUBLE: { - return std::make_unique>(attr.getName(), is_weighted_set, - filter_elements, std::move(matching_elems_fields)); - } + case BasicType::DOUBLE: + return std::make_unique(attr.getName(), filter_elements, std::move(matching_elems_fields)); default: // should not happen LOG(error, "Bad value for attribute type: %u", type); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h index 1127af6d6bd..c25aca15200 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h @@ -92,7 +92,7 @@ public: // used by RankFeaturesDFW FeatureSet::SP _rankFeatures; - // Used by AttributeCombinerDFW when filtering is enabled + // Used by AttributeCombinerDFW and MultiAttrDFW when filtering is enabled std::unique_ptr _matching_elements; GetDocsumsState(const GetDocsumsState &) = delete; -- cgit v1.2.3