From 5364af49db3478c2852f3a12161250a099ee71db Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 16 Sep 2022 12:51:31 +0200 Subject: Add slime filler filter. --- .../docsummary/slime_filler/slime_filler_test.cpp | 77 ++++++++++++++++++---- .../vespa/searchsummary/docsummary/CMakeLists.txt | 1 + .../searchsummary/docsummary/slime_filler.cpp | 44 ++++++++----- .../vespa/searchsummary/docsummary/slime_filler.h | 4 +- .../docsummary/slime_filler_filter.cpp | 67 +++++++++++++++++++ .../searchsummary/docsummary/slime_filler_filter.h | 25 +++++++ .../docsummary/summaryfieldconverter.cpp | 2 +- 7 files changed, 189 insertions(+), 31 deletions(-) create mode 100644 searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp create mode 100644 searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h (limited to 'searchsummary') diff --git a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp index 4ebdfb10cb7..6f3faeb69d5 100644 --- a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,7 @@ using search::docsummary::IStringFieldConverter; using search::docsummary::DocsumFieldWriter; using search::docsummary::ResultConfig; using search::docsummary::SlimeFiller; +using search::docsummary::SlimeFillerFilter; using search::linguistics::SPANTREE_NAME; using search::linguistics::TERM; using vespalib::SimpleBuffer; @@ -176,6 +178,8 @@ get_document_types_config() .addField("d", nested_type_id) .addField("e", nested_type_id) .addField("f", nested_type_id)) + .addField("nested_array", Array(nested_type_id)) + .addField("nested_map", Map(DataType::T_STRING, nested_type_id)) .addField("ref", ref_type_id), Struct("indexingdocument.body")) .referenceType(ref_type_id, ref_target_doctype_id); @@ -228,9 +232,11 @@ protected: ArrayFieldValue make_array(); WeightedSetFieldValue make_weighted_set(); MapFieldValue make_map(); + StructFieldValue make_nested_value(int i); void expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector* matching_elems); void expect_insert(const vespalib::string& exp, const FieldValue& fv); void expect_insert_filtered(const vespalib::string& exp, const FieldValue& fv, const std::vector& matching_elems); + void expect_insert(const vespalib::string& exp, const FieldValue& fv, SlimeFillerFilter& filter); void expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize); }; @@ -345,6 +351,21 @@ SlimeFillerTest::make_map() return map; } +StructFieldValue +SlimeFillerTest::make_nested_value(int i) +{ + StructFieldValue nested(get_data_type("nested")); + StructFieldValue nested2(get_data_type("nested")); + nested.setValue("a", IntFieldValue(42 + 100 * i)); + nested.setValue("b", IntFieldValue(44 + 100 * i)); + nested.setValue("c", IntFieldValue(46 + 100 * i)); + nested2.setValue("a", IntFieldValue(62 + 100 * i)); + nested2.setValue("c", IntFieldValue(66 + 100 * i)); + nested.setValue("d", nested2); + nested.setValue("f", nested2); + return nested; +} + void SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector* matching_elems) { @@ -352,8 +373,6 @@ SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv SlimeInserter inserter(slime); SlimeFiller filler(inserter, matching_elems); fv.accept(filler); - SimpleBuffer buf; - JsonFormat::encode(slime, buf, true); auto act = slime_to_string(slime); EXPECT_EQ(exp, act); } @@ -370,6 +389,17 @@ SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv expect_insert(exp, fv, nullptr); } +void +SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, SlimeFillerFilter& filter) +{ + Slime slime; + SlimeInserter inserter(slime); + SlimeFiller filler(inserter, nullptr, &filter); + fv.accept(filler); + auto act = slime_to_string(slime); + EXPECT_EQ(exp, act); +} + void SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize) { @@ -378,7 +408,7 @@ SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const Field MockJuniperConverter converter; AnnotationConverter annotation_converter(converter); PassThroughStringFieldConverter passthrough_converter(converter); - SlimeFiller filler(inserter, tokenize ? (IStringFieldConverter*) &annotation_converter : (IStringFieldConverter*) &passthrough_converter); + SlimeFiller filler(inserter, tokenize ? (IStringFieldConverter*) &annotation_converter : (IStringFieldConverter*) &passthrough_converter, nullptr); fv.accept(filler); auto act_null = slime_to_string(slime); EXPECT_EQ("null", act_null); @@ -581,17 +611,38 @@ TEST_F(SlimeFillerTest, insert_map_filtered) TEST_F(SlimeFillerTest, insert_struct) { - StructFieldValue nested(get_data_type("nested")); - StructFieldValue nested2(get_data_type("nested")); - nested.setValue("a", IntFieldValue(42)); - nested.setValue("b", IntFieldValue(44)); - nested.setValue("c", IntFieldValue(46)); - nested2.setValue("a", IntFieldValue(62)); - nested2.setValue("c", IntFieldValue(66)); - nested.setValue("d", nested2); - nested.setValue("f", nested2); - // Field order depends on assigned field ids, cf. document::Field::calculateIdV7() + auto nested = make_nested_value(0); + // Field order depends on assigned field ids, cf. document::Field::calculateIdV7(), and symbol insertion order in slime expect_insert(R"({"f":{"c":66,"a":62},"c":46,"a":42,"b":44,"d":{"c":66,"a":62}})", nested); + SlimeFillerFilter filter; + filter.add("a").add("c").add("f.a").add("d"); + expect_insert(R"({"f":{"a":62},"a":42,"c":46,"d":{"a":62,"c":66}})", nested, filter); +} + +TEST_F(SlimeFillerTest, insert_struct_array) +{ + ArrayFieldValue array(get_data_type("Array")); + for (int i = 0; i < 3; ++i) { + array.add(make_nested_value(i)); + } + expect_insert(R"([{"f":{"c":66,"a":62},"c":46,"a":42,"b":44,"d":{"c":66,"a":62}},{"f":{"c":166,"a":162},"c":146,"a":142,"b":144,"d":{"c":166,"a":162}},{"f":{"c":266,"a":262},"c":246,"a":242,"b":244,"d":{"c":266,"a":262}}])", array); + SlimeFillerFilter filter; + filter.add("a").add("c").add("f.a").add("d"); + expect_insert(R"([{"f":{"a":62},"a":42,"c":46,"d":{"a":62,"c":66}},{"f":{"a":162},"a":142,"c":146,"d":{"a":162,"c":166}},{"f":{"a":262},"a":242,"c":246,"d":{"a":262,"c":266}}])", array, filter); +} + +TEST_F(SlimeFillerTest, insert_struct_map) +{ + MapFieldValue map(get_data_type("Map")); + for (int i = 0; i < 3; ++i) { + vespalib::asciistream key; + key << "key" << (i + 1); + map.put(StringFieldValue(key.str()), make_nested_value(i)); + } + expect_insert(R"([{"key":"key1","value":{"f":{"c":66,"a":62},"c":46,"a":42,"b":44,"d":{"c":66,"a":62}}},{"key":"key2","value":{"f":{"c":166,"a":162},"c":146,"a":142,"b":144,"d":{"c":166,"a":162}}},{"key":"key3","value":{"f":{"c":266,"a":262},"c":246,"a":242,"b":244,"d":{"c":266,"a":262}}}])", map); + SlimeFillerFilter filter; + filter.add("value.a").add("value.c").add("value.f.a").add("value.d"); + expect_insert(R"([{"key":"key1","value":{"f":{"a":62},"a":42,"c":46,"d":{"a":62,"c":66}}},{"key":"key2","value":{"f":{"a":162},"a":142,"c":146,"d":{"a":162,"c":166}}},{"key":"key3","value":{"f":{"a":262},"a":242,"c":246,"d":{"a":262,"c":266}}}])", map, filter); } TEST_F(SlimeFillerTest, insert_string_with_callback) diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt index 6aba9614e73..be435b49348 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt +++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt @@ -34,6 +34,7 @@ vespa_add_library(searchsummary_docsummary OBJECT searchdatatype.cpp simple_dfw.cpp slime_filler.cpp + slime_filler_filter.cpp struct_fields_resolver.cpp struct_map_attribute_combiner_dfw.cpp summaryfeaturesdfw.cpp diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp index 230c03d6644..94774c1bee4 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp @@ -5,6 +5,7 @@ #include "i_string_field_converter.h" #include "resultconfig.h" #include "searchdatatype.h" +#include "slime_filler_filter.h" #include #include #include @@ -65,23 +66,27 @@ private: Cursor& _array; Symbol _key_sym; Symbol _val_sym; + std::optional _filter; public: - MapFieldValueInserter(Inserter& parent_inserter) + MapFieldValueInserter(Inserter& parent_inserter, std::optional filter) : _array(parent_inserter.insertArray()), _key_sym(_array.resolve("key")), - _val_sym(_array.resolve("value")) + _val_sym(_array.resolve("value")), + _filter(std::move(filter)) { } void insert_entry(const FieldValue& key, const FieldValue& value) { Cursor& c = _array.addObject(); ObjectSymbolInserter ki(c, _key_sym); - ObjectSymbolInserter vi(c, _val_sym); SlimeFiller key_conv(ki); - SlimeFiller val_conv(vi); key.accept(key_conv); - value.accept(val_conv); + if (_filter.has_value()) { + ObjectSymbolInserter vi(c, _val_sym); + SlimeFiller val_conv(vi, nullptr, _filter.value()); + value.accept(val_conv); + } } }; @@ -90,21 +95,24 @@ public: SlimeFiller::SlimeFiller(Inserter& inserter) : _inserter(inserter), _matching_elems(nullptr), - _string_converter(nullptr) + _string_converter(nullptr), + _filter(nullptr) { } SlimeFiller::SlimeFiller(Inserter& inserter, const std::vector* matching_elems) : _inserter(inserter), _matching_elems(matching_elems), - _string_converter(nullptr) + _string_converter(nullptr), + _filter(nullptr) { } -SlimeFiller::SlimeFiller(Inserter& inserter, IStringFieldConverter* string_converter) +SlimeFiller::SlimeFiller(Inserter& inserter, IStringFieldConverter* string_converter, const SlimeFillerFilter* filter) : _inserter(inserter), _matching_elems(nullptr), - _string_converter(string_converter) + _string_converter(string_converter), + _filter(filter) { } @@ -136,7 +144,7 @@ SlimeFiller::visit(const MapFieldValue& v) if (empty_or_empty_after_filtering(v)) { return; } - MapFieldValueInserter map_inserter(_inserter); + MapFieldValueInserter map_inserter(_inserter, SlimeFillerFilter::get_filter(_filter, "value")); if (filter_matching_elements()) { assert(v.has_no_erased_keys()); for (uint32_t id_to_keep : (*_matching_elems)) { @@ -158,7 +166,7 @@ SlimeFiller::visit(const ArrayFieldValue& value) } Cursor& a = _inserter.insertArray(); ArrayInserter ai(a); - SlimeFiller conv(ai, _string_converter); + SlimeFiller conv(ai, _string_converter, _filter); if (filter_matching_elements()) { for (uint32_t id_to_keep : (*_matching_elems)) { value[id_to_keep].accept(conv); @@ -266,11 +274,15 @@ SlimeFiller::visit(const StructFieldValue& value) } Cursor& c = _inserter.insertObject(); for (StructFieldValue::const_iterator itr = value.begin(); itr != value.end(); ++itr) { - Memory keymem(itr.field().getName()); - ObjectInserter vi(c, keymem); - SlimeFiller conv(vi); - FieldValue::UP nextValue(value.getValue(itr.field())); - (*nextValue).accept(conv); + auto& name = itr.field().getName(); + auto sub_filter = SlimeFillerFilter::get_filter(_filter, name); + if (sub_filter.has_value()) { + Memory keymem(name); + ObjectInserter vi(c, keymem); + SlimeFiller conv(vi, nullptr, sub_filter.value()); + FieldValue::UP nextValue(value.getValue(itr.field())); + (*nextValue).accept(conv); + } } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h index 329dd3c6bb2..a81a20814c4 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h @@ -11,6 +11,7 @@ namespace vespalib::slime { struct Inserter; } namespace search::docsummary { class IStringFieldConverter; +class SlimeFillerFilter; /* * Class inserting a field value into a slime object. @@ -20,6 +21,7 @@ class SlimeFiller : public document::ConstFieldValueVisitor { vespalib::slime::Inserter& _inserter; const std::vector* _matching_elems; IStringFieldConverter* _string_converter; + const SlimeFillerFilter* _filter; bool filter_matching_elements() const { return _matching_elems != nullptr; @@ -51,7 +53,7 @@ class SlimeFiller : public document::ConstFieldValueVisitor { public: SlimeFiller(vespalib::slime::Inserter& inserter); SlimeFiller(vespalib::slime::Inserter& inserter, const std::vector* matching_elems); - SlimeFiller(vespalib::slime::Inserter& inserter, IStringFieldConverter* string_converter); + SlimeFiller(vespalib::slime::Inserter& inserter, IStringFieldConverter* string_converter, const SlimeFillerFilter* filter); ~SlimeFiller() override; }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp new file mode 100644 index 00000000000..db28a1ae5cf --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp @@ -0,0 +1,67 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "slime_filler_filter.h" +#include +#include + +namespace search::docsummary { + +SlimeFillerFilter::SlimeFillerFilter() + : _filter() +{ +} + +SlimeFillerFilter::~SlimeFillerFilter() = default; + +std::optional +SlimeFillerFilter::get_filter(vespalib::stringref field_name) const +{ + auto itr = _filter.find(field_name); + if (itr == _filter.end()) { + return std::nullopt; + } + return itr->second.get(); +} + +std::optional +SlimeFillerFilter::get_filter(const SlimeFillerFilter* filter, vespalib::stringref field_name) +{ + return (filter != nullptr) ? filter->get_filter(field_name) : nullptr; +} + +bool +SlimeFillerFilter::empty() const { return _filter.empty(); } + +SlimeFillerFilter& +SlimeFillerFilter::add(vespalib::stringref field_path) +{ + vespalib::stringref field_name; + vespalib::stringref remaining_path; + auto dot_pos = field_path.find('.'); + if (dot_pos != vespalib::string::npos) { + field_name = field_path.substr(0, dot_pos); + remaining_path = field_path.substr(dot_pos + 1); + } else { + field_name = field_path; + } + auto itr = _filter.find(field_name); + if (itr != _filter.end()) { + if (itr->second) { + if (remaining_path.empty()) { + itr->second.reset(); + } else { + itr->second->add(remaining_path); + } + } + } else { + auto insres = _filter.insert(std::make_pair(field_name, std::unique_ptr())); + assert(insres.second); + if (!remaining_path.empty()) { + insres.first->second = std::make_unique(); + insres.first->second->add(remaining_path); + } + } + return *this; +} + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h new file mode 100644 index 00000000000..8f2c5dea392 --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h @@ -0,0 +1,25 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search::docsummary { + +/* + * Class filtering which fields to render in a struct field. + */ +class SlimeFillerFilter { + vespalib::hash_map> _filter; + std::optional get_filter(vespalib::stringref field_name) const; +public: + SlimeFillerFilter(); + ~SlimeFillerFilter(); + static std::optional get_filter(const SlimeFillerFilter*, vespalib::stringref field_name); + bool empty() const; + SlimeFillerFilter& add(vespalib::stringref field_path); +}; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp index 5c6a87664e5..dd5a59e46af 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp @@ -37,7 +37,7 @@ SummaryFieldConverter::insert_juniper_field(const document::FieldValue& value, v CheckUndefinedValueVisitor check_undefined; value.accept(check_undefined); if (!check_undefined.is_undefined()) { - SlimeFiller visitor(inserter, &converter); + SlimeFiller visitor(inserter, &converter, nullptr); value.accept(visitor); } } -- cgit v1.2.3