From 7505a365c8b9c5ce02012aecd613f51a51a273ce Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Tue, 3 Dec 2019 08:56:10 +0000 Subject: Extend MatchedElementsFilterDFW to get the input field from the document instance if it is not in the docsum blob. --- .../matched_elements_filter_test.cpp | 36 +++++++++++++-- .../searchsummary/docsummary/general_result.cpp | 13 +++++- .../searchsummary/docsummary/general_result.h | 8 ++++ .../docsummary/matched_elements_filter_dfw.cpp | 53 ++++++++++++++++++---- 4 files changed, 97 insertions(+), 13 deletions(-) (limited to 'searchsummary') diff --git a/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp b/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp index bbcab709e42..9019a212f3f 100644 --- a/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp +++ b/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp @@ -67,6 +67,7 @@ class DocsumStore { private: ResultConfig _config; ResultPacker _packer; + DocumentType _doc_type; StructDataType::UP _elem_type; ArrayDataType _array_type; MapDataType _map_type; @@ -91,10 +92,14 @@ public: DocsumStore() : _config(), _packer(&_config), + _doc_type("test"), _elem_type(make_struct_elem_type()), _array_type(*_elem_type), _map_type(*DataType::STRING, *_elem_type) { + _doc_type.addField(Field("array_in_doc", _array_type, true)); + _doc_type.addField(Field("map_in_doc", _map_type, true)); + auto* result_class = _config.AddResultClass("test", class_id); EXPECT_TRUE(result_class->AddConfigEntry("array", ResType::RES_JSONSTRING)); EXPECT_TRUE(result_class->AddConfigEntry("map", ResType::RES_JSONSTRING)); @@ -106,12 +111,14 @@ public: const ResultClass* get_class() const { return _config.LookupResultClass(class_id); } search::docsummary::DocsumStoreValue getMappedDocsum() { assert(_packer.Init(class_id)); + auto doc = std::make_unique(_doc_type, DocumentId("id:test:test::0")); { ArrayFieldValue array_value(_array_type); array_value.append(make_elem_value("a", 3)); array_value.append(make_elem_value("b", 5)); array_value.append(make_elem_value("c", 7)); write_field_value(array_value); + doc->setValue("array_in_doc", array_value); } { MapFieldValue map_value(_map_type); @@ -119,6 +126,7 @@ public: map_value.put(StringFieldValue("b"), *make_elem_value("b", 5)); map_value.put(StringFieldValue("c"), *make_elem_value("c", 7)); write_field_value(map_value); + doc->setValue("map_in_doc", map_value); } { MapFieldValue map2_value(_map_type); @@ -128,7 +136,7 @@ public: const char* buf; uint32_t buf_len; assert(_packer.GetDocsumBlob(&buf, &buf_len)); - return DocsumStoreValue(buf, buf_len); + return DocsumStoreValue(buf, buf_len, std::move(doc)); } }; @@ -186,7 +194,8 @@ private: Slime run_filter_field_writer(const std::string& input_field_name, const ElementVector& matching_elements) { auto writer = make_field_writer(input_field_name); GeneralResult result(_doc_store.get_class()); - result.inplaceUnpack(_doc_store.getMappedDocsum()); + auto docsum = _doc_store.getMappedDocsum(); + result.inplaceUnpack(docsum); StateCallback callback(input_field_name, matching_elements); GetDocsumsState state(callback); Slime slime; @@ -206,7 +215,6 @@ public: ~MatchedElementsFilterTest() {} std::unique_ptr make_field_writer(const std::string& input_field_name) { int input_field_enum = _doc_store.get_config().GetFieldNameEnum().Lookup(input_field_name.c_str()); - EXPECT_GE(input_field_enum, 0); return MatchedElementsFilterDFW::create(input_field_name, input_field_enum, _attr_ctx, _mapper); } @@ -229,6 +237,17 @@ TEST_F(MatchedElementsFilterTest, filters_elements_in_array_field_value) "{'name':'c','weight':7}]"); } +TEST_F(MatchedElementsFilterTest, filters_elements_in_array_field_value_when_input_field_is_not_in_docsum_blob) +{ + expect_filtered("array_in_doc", {}, "[]"); + expect_filtered("array_in_doc", {0}, "[{'name':'a','weight':3}]"); + expect_filtered("array_in_doc", {1}, "[{'name':'b','weight':5}]"); + expect_filtered("array_in_doc", {2}, "[{'name':'c','weight':7}]"); + expect_filtered("array_in_doc", {0, 1, 2}, "[{'name':'a','weight':3}," + "{'name':'b','weight':5}," + "{'name':'c','weight':7}]"); +} + TEST_F(MatchedElementsFilterTest, struct_field_mapper_is_setup_for_array_field_value) { auto writer = make_field_writer("array"); @@ -248,6 +267,17 @@ TEST_F(MatchedElementsFilterTest, filters_elements_in_map_field_value) "{'key':'c','value':{'name':'c','weight':7}}]"); } +TEST_F(MatchedElementsFilterTest, filters_elements_in_map_field_value_when_input_field_is_not_in_docsum_blob) +{ + expect_filtered("map_in_doc", {}, "[]"); + expect_filtered("map_in_doc", {0}, "[{'key':'a','value':{'name':'a','weight':3}}]"); + expect_filtered("map_in_doc", {1}, "[{'key':'b','value':{'name':'b','weight':5}}]"); + expect_filtered("map_in_doc", {2}, "[{'key':'c','value':{'name':'c','weight':7}}]"); + expect_filtered("map_in_doc", {0, 1, 2}, "[{'key':'a','value':{'name':'a','weight':3}}," + "{'key':'b','value':{'name':'b','weight':5}}," + "{'key':'c','value':{'name':'c','weight':7}}]"); +} + TEST_F(MatchedElementsFilterTest, struct_field_mapper_is_setup_for_map_field_value) { { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp b/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp index 8e4c6bbb4ce..a6a5263f5ac 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp @@ -2,6 +2,7 @@ #include "general_result.h" #include "resultconfig.h" +#include #include #include @@ -59,7 +60,8 @@ GeneralResult::GeneralResult(const ResultClass *resClass) _entrycnt(0), _entries(nullptr), _buf(nullptr), - _bufEnd(nullptr) + _bufEnd(nullptr), + _document() { } @@ -90,6 +92,15 @@ GeneralResult::GetEntryFromEnumValue(uint32_t value) return (idx >= 0 && (uint32_t)idx < _entrycnt) ? &_entries[idx] : nullptr; } +std::unique_ptr +GeneralResult::get_field_value(const vespalib::string& field_name) const +{ + if (_document != nullptr) { + return _document->getValue(field_name); + } + return std::unique_ptr(); +} + bool GeneralResult::unpack(const char *buf, const size_t buflen) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/general_result.h b/searchsummary/src/vespa/searchsummary/docsummary/general_result.h index a4cdd1b7f69..4920985d9ae 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/general_result.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/general_result.h @@ -5,6 +5,11 @@ #include "resultclass.h" #include "docsumstorevalue.h" +namespace document { +class Document; +class FieldValue; +} + namespace search::docsummary { class GeneralResult @@ -18,6 +23,7 @@ private: ResEntry *_entries; char *_buf; // allocated in same chunk as _entries char *_bufEnd; // first byte after _buf + const document::Document* _document; bool InBuf(const void *pt) const { return ((const char *)pt >= _buf && @@ -35,10 +41,12 @@ public: ResEntry *GetEntry(uint32_t idx); ResEntry *GetEntry(const char *name); ResEntry *GetEntryFromEnumValue(uint32_t val); + std::unique_ptr get_field_value(const vespalib::string& field_name) const; bool unpack(const char *buf, const size_t buflen); bool inplaceUnpack(const DocsumStoreValue &value) { if (value.valid()) { + _document = value.get_document(); return unpack(value.fieldsPt(), value.fieldsSz()); } else { return false; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp index 69085258a43..812c19bc3f2 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp @@ -3,6 +3,9 @@ #include "docsumstate.h" #include "matched_elements_filter_dfw.h" #include "struct_fields_resolver.h" +#include "summaryfieldconverter.h" +#include +#include #include #include #include @@ -11,6 +14,8 @@ #include #include +using document::FieldValue; +using document::LiteralFieldValueB; using vespalib::Slime; using vespalib::slime::ArrayInserter; using vespalib::slime::BinaryFormat; @@ -54,12 +59,44 @@ MatchedElementsFilterDFW::~MatchedElementsFilterDFW() = default; namespace { void -decode_input_field(const ResEntry& entry, search::RawBuf& target_buf, Slime& input_field) +decode_input_field_to_slime(const ResEntry& entry, search::RawBuf& target_buf, Slime& input_field_as_slime) { const char* buf; uint32_t buf_len; entry._resolve_field(&buf, &buf_len, &target_buf); - BinaryFormat::decode(vespalib::Memory(buf, buf_len), input_field); + BinaryFormat::decode(vespalib::Memory(buf, buf_len), input_field_as_slime); +} + +void +convert_input_field_to_slime(const FieldValue& input_field_value, Slime& input_field_as_slime) +{ + // This is the same conversion that happens in proton::DocumentStoreAdapter. + auto converted = SummaryFieldConverter::convertSummaryField(false, input_field_value); + // This should hold as we also have asserted that (type == ResType::RES_JSONSTRING); + assert(converted->getClass().inherits(LiteralFieldValueB::classId)); + auto& literal = static_cast(*converted); + vespalib::stringref buf = literal.getValueRef(); + BinaryFormat::decode(vespalib::Memory(buf.data(), buf.size()), input_field_as_slime); +} + +bool +resolve_input_field_as_slime(GeneralResult& result, GetDocsumsState& state, + int entry_idx, const vespalib::string& input_field_name, + Slime& input_field_as_slime) +{ + ResEntry* entry = result.GetEntry(entry_idx); + if (entry != nullptr) { + decode_input_field_to_slime(*entry, state._docSumFieldSpace, input_field_as_slime); + return true; + } else { + // Use the document instance if the input field is not in the docsum blob. + auto field_value = result.get_field_value(input_field_name); + if (field_value) { + convert_input_field_to_slime(*field_value, input_field_as_slime); + return true; + } + } + return false; } void @@ -86,14 +123,12 @@ MatchedElementsFilterDFW::insertField(uint32_t docid, GeneralResult* result, Get { assert(type == ResType::RES_JSONSTRING); int entry_idx = result->GetClass()->GetIndexFromEnumValue(_input_field_enum); - ResEntry* entry = result->GetEntry(entry_idx); - if (entry != nullptr) { - Slime input_field; - decode_input_field(*entry, state->_docSumFieldSpace, input_field); - + Slime input_field; + if (resolve_input_field_as_slime(*result, *state, entry_idx, _input_field_name, input_field)) { Slime output_field; - filter_matching_elements_in_input_field(input_field, state->get_matching_elements(*_struct_field_mapper).get_matching_elements(docid, _input_field_name), output_field); - + filter_matching_elements_in_input_field(input_field, + state->get_matching_elements(*_struct_field_mapper) + .get_matching_elements(docid, _input_field_name), output_field); inject(output_field.get(), target); } } -- cgit v1.2.3