diff options
5 files changed, 134 insertions, 38 deletions
diff --git a/document/src/vespa/document/fieldvalue/mapfieldvalue.h b/document/src/vespa/document/fieldvalue/mapfieldvalue.h index d761a37612e..bed4275d61f 100644 --- a/document/src/vespa/document/fieldvalue/mapfieldvalue.h +++ b/document/src/vespa/document/fieldvalue/mapfieldvalue.h @@ -72,8 +72,8 @@ public: mutable pair _current; }; class const_iterator { - typedef std::pair<const FieldValue *, const FieldValue *> pair; public: + typedef std::pair<const FieldValue *, const FieldValue *> pair; const_iterator(const MapFieldValue & map, size_t index) : _map(&map), _index(index) { } bool operator == (const const_iterator & rhs) const { return _map == rhs._map && _index == rhs._index; } bool operator != (const const_iterator & rhs) const { return _map != rhs._map || _index != rhs._index; } @@ -147,6 +147,20 @@ public: const_iterator find(const FieldValue& fv) const; iterator find(const FieldValue& fv); + bool has_no_erased_keys() const { + return (_keys->size() == _count) && (_values->size() == _count); + } + + /** + * Returns the key-value pair at the given position in the underlying arrays. + * + * Note: Should only be used when has_no_erased_keys() returns true. + * Otherwise you might access elements that are conceptually removed. + */ + const_iterator::pair operator[](size_t idx) const { + return const_iterator::pair(&(*_keys)[idx], &(*_values)[idx]); + } + FieldValue::UP createValue() const; DECLARE_IDENTIFIABLE_ABSTRACT(MapFieldValue); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp index 812c19bc3f2..443634b3e3f 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp @@ -26,6 +26,12 @@ using vespalib::slime::inject; namespace search::docsummary { +const std::vector<uint32_t>& +MatchedElementsFilterDFW::get_matching_elements(uint32_t docid, GetDocsumsState& state) const +{ + return state.get_matching_elements(*_struct_field_mapper).get_matching_elements(docid, _input_field_name); +} + MatchedElementsFilterDFW::MatchedElementsFilterDFW(const std::string& input_field_name, uint32_t input_field_enum, std::shared_ptr<StructFieldMapper> struct_field_mapper) : _input_field_name(input_field_name), @@ -68,33 +74,30 @@ decode_input_field_to_slime(const ResEntry& entry, search::RawBuf& target_buf, S } void -convert_input_field_to_slime(const FieldValue& input_field_value, Slime& input_field_as_slime) +filter_matching_elements_in_input_field_while_converting_to_slime(const FieldValue& input_field_value, + const std::vector<uint32_t>& matching_elems, + vespalib::slime::Inserter& target) { - // This is the same conversion that happens in proton::DocumentStoreAdapter. - auto converted = SummaryFieldConverter::convertSummaryField(false, input_field_value); + // This is a similar conversion that happens in proton::DocumentStoreAdapter. + // Only difference is that we filter matched elements on the fly. + auto converted = SummaryFieldConverter::convert_field_with_filter(false, input_field_value, matching_elems); // This should hold as we also have asserted that (type == ResType::RES_JSONSTRING); assert(converted->getClass().inherits(LiteralFieldValueB::classId)); auto& literal = static_cast<const LiteralFieldValueB&>(*converted); vespalib::stringref buf = literal.getValueRef(); + Slime input_field_as_slime; BinaryFormat::decode(vespalib::Memory(buf.data(), buf.size()), input_field_as_slime); + inject(input_field_as_slime.get(), target); } bool resolve_input_field_as_slime(GeneralResult& result, GetDocsumsState& state, - int entry_idx, const vespalib::string& input_field_name, - Slime& input_field_as_slime) + int entry_idx, Slime& input_field_as_slime) { ResEntry* entry = result.GetEntry(entry_idx); if (entry != nullptr) { decode_input_field_to_slime(*entry, state._docSumFieldSpace, input_field_as_slime); return true; - } else { - // Use the document instance if the input field is not in the docsum blob. - auto field_value = result.get_field_value(input_field_name); - if (field_value) { - convert_input_field_to_slime(*field_value, input_field_as_slime); - return true; - } } return false; } @@ -124,12 +127,16 @@ MatchedElementsFilterDFW::insertField(uint32_t docid, GeneralResult* result, Get assert(type == ResType::RES_JSONSTRING); int entry_idx = result->GetClass()->GetIndexFromEnumValue(_input_field_enum); Slime input_field; - if (resolve_input_field_as_slime(*result, *state, entry_idx, _input_field_name, input_field)) { + if (resolve_input_field_as_slime(*result, *state, entry_idx, input_field)) { Slime output_field; - filter_matching_elements_in_input_field(input_field, - state->get_matching_elements(*_struct_field_mapper) - .get_matching_elements(docid, _input_field_name), output_field); + filter_matching_elements_in_input_field(input_field, get_matching_elements(docid, *state), output_field); inject(output_field.get(), target); + } else { + // Use the document instance if the input field is not in the docsum blob. + auto field_value = result->get_field_value(_input_field_name); + if (field_value) { + filter_matching_elements_in_input_field_while_converting_to_slime(*field_value, get_matching_elements(docid, *state), target); + } } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h index d232ad7ae0c..966b5b95fa7 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h @@ -18,6 +18,8 @@ private: uint32_t _input_field_enum; std::shared_ptr<StructFieldMapper> _struct_field_mapper; + const std::vector<uint32_t>& get_matching_elements(uint32_t docid, GetDocsumsState& state) const; + public: MatchedElementsFilterDFW(const std::string& input_field_name, uint32_t input_field_enum, std::shared_ptr<StructFieldMapper> struct_field_mapper); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp index 1cac789ad21..7368a199569 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp @@ -331,9 +331,44 @@ SummaryFieldValueConverter::~SummaryFieldValueConverter() = default; using namespace vespalib::slime::convenience; + + class SlimeFiller : public ConstFieldValueVisitor { +private: + class MapFieldValueInserter { + private: + Cursor& _array; + Symbol _key_sym; + Symbol _val_sym; + bool _tokenize; + + public: + MapFieldValueInserter(Inserter& parent_inserter, bool tokenize) + : _array(parent_inserter.insertArray()), + _key_sym(_array.resolve("key")), + _val_sym(_array.resolve("value")), + _tokenize(tokenize) + { + } + void insert_entry(const FieldValue& key, const FieldValue& value) { + Cursor& c = _array.addObject(); + ObjectSymbolInserter ki(c, _key_sym); + ObjectSymbolInserter vi(c, _val_sym); + SlimeFiller key_conv(ki, _tokenize); + SlimeFiller val_conv(vi, _tokenize); + + key.accept(key_conv); + value.accept(val_conv); + } + }; + Inserter &_inserter; bool _tokenize; + const std::vector<uint32_t>* _matching_elems; + + bool filter_matching_elements() const { + return _matching_elems != nullptr; + } void visit(const AnnotationReferenceFieldValue & v ) override { (void)v; @@ -351,20 +386,17 @@ class SlimeFiller : public ConstFieldValueVisitor { } void visit(const MapFieldValue & v) override { - Cursor &a = _inserter.insertArray(); - Symbol keysym = a.resolve("key"); - Symbol valsym = a.resolve("value"); - for (const auto & entry : v) { - Cursor &c = a.addObject(); - ObjectSymbolInserter ki(c, keysym); - ObjectSymbolInserter vi(c, valsym); - SlimeFiller keyConv(ki, _tokenize); - SlimeFiller valConv(vi, _tokenize); - - const FieldValue &key = *(entry.first); - key.accept(keyConv); - const FieldValue &val = *(entry.second); - val.accept(valConv); + MapFieldValueInserter map_inserter(_inserter, _tokenize); + if (filter_matching_elements()) { + assert(v.has_no_erased_keys()); + for (uint32_t id_to_keep : (*_matching_elems)) { + auto entry = v[id_to_keep]; + map_inserter.insert_entry(*entry.first, *entry.second); + } + } else { + for (const auto &entry : v) { + map_inserter.insert_entry(*entry.first, *entry.second); + } } } @@ -373,8 +405,14 @@ class SlimeFiller : public ConstFieldValueVisitor { if (value.size() > 0) { ArrayInserter ai(a); SlimeFiller conv(ai, _tokenize); - for (const FieldValue &fv : value) { - fv.accept(conv); + if (filter_matching_elements()) { + for (uint32_t id_to_keep : (*_matching_elems)) { + value[id_to_keep].accept(conv); + } + } else { + for (const FieldValue &fv : value) { + fv.accept(conv); + } } } } @@ -481,20 +519,38 @@ class SlimeFiller : public ConstFieldValueVisitor { public: SlimeFiller(Inserter &inserter, bool tokenize) - : _inserter(inserter), _tokenize(tokenize) {} + : _inserter(inserter), + _tokenize(tokenize), + _matching_elems() + {} + + SlimeFiller(Inserter& inserter, bool tokenize, const std::vector<uint32_t>* matching_elems) + : _inserter(inserter), + _tokenize(tokenize), + _matching_elems(matching_elems) + {} }; class SlimeConverter : public FieldValueConverter { +private: bool _tokenize; + const std::vector<uint32_t>* _matching_elems; + public: SlimeConverter(bool tokenize) - : _tokenize(tokenize) + : _tokenize(tokenize), + _matching_elems() + {} + + SlimeConverter(bool tokenize, const std::vector<uint32_t>& matching_elems) + : _tokenize(tokenize), + _matching_elems(&matching_elems) {} FieldValue::UP convert(const FieldValue &input) override { vespalib::Slime slime; SlimeInserter inserter(slime); - SlimeFiller visitor(inserter, _tokenize); + SlimeFiller visitor(inserter, _tokenize, _matching_elems); input.accept(visitor); search::RawBuf rbuf(4096); search::SlimeOutputRawBufAdapter adapter(rbuf); @@ -514,5 +570,14 @@ SummaryFieldConverter::convertSummaryField(bool markup, return SummaryFieldValueConverter(markup, subConv).convert(value); } +FieldValue::UP +SummaryFieldConverter::convert_field_with_filter(bool markup, + const document::FieldValue& value, + const std::vector<uint32_t>& matching_elems) +{ + SlimeConverter sub_conv(markup, matching_elems); + return SummaryFieldValueConverter(markup, sub_conv).convert(value); +} + } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h index 96db6026e88..840de9073c9 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h @@ -12,8 +12,16 @@ namespace search::docsummary { class SummaryFieldConverter { public: - static document::FieldValue::UP - convertSummaryField(bool markup, const document::FieldValue &value); + static document::FieldValue::UP convertSummaryField(bool markup, const document::FieldValue &value); + + /** + * Converts the given field value to slime, only keeping the elements that are contained in the matching elements vector. + * + * Filtering occurs when the field value is an ArrayFieldValue or MapFieldValue. + */ + static document::FieldValue::UP convert_field_with_filter(bool markup, + const document::FieldValue& value, + const std::vector<uint32_t>& matching_elems); }; } |