aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-09-16 13:44:25 +0200
committerGitHub <noreply@github.com>2022-09-16 13:44:25 +0200
commitdbf46805190b41f461acca68955b23e47662dd66 (patch)
treebb931ade23ecf84f3303469adfa8da255349ddce
parentdee9fbfc53658c1261f83fd9dc41f8a890bbe8c0 (diff)
parent071d3ba3b99c9bd61b31eb08f7778afd094bd779 (diff)
Merge pull request #24093 from vespa-engine/toregge/add-slime-filler-filter
Add slime filler filter.
-rw-r--r--searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp77
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt1
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp44
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h4
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp67
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h29
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp2
7 files changed, 193 insertions, 31 deletions
diff --git a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp
index 4ebdfb10cb7..6f3faeb69d5 100644
--- a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp
+++ b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp
@@ -40,6 +40,7 @@
#include <vespa/searchsummary/docsummary/linguisticsannotation.h>
#include <vespa/searchsummary/docsummary/resultconfig.h>
#include <vespa/searchsummary/docsummary/slime_filler.h>
+#include <vespa/searchsummary/docsummary/slime_filler_filter.h>
#include <vespa/vespalib/data/slime/binary_format.h>
#include <vespa/vespalib/data/slime/json_format.h>
#include <vespa/vespalib/data/slime/slime.h>
@@ -88,6 +89,7 @@ using search::docsummary::IStringFieldConverter;
using search::docsummary::DocsumFieldWriter;
using search::docsummary::ResultConfig;
using search::docsummary::SlimeFiller;
+using search::docsummary::SlimeFillerFilter;
using search::linguistics::SPANTREE_NAME;
using search::linguistics::TERM;
using vespalib::SimpleBuffer;
@@ -176,6 +178,8 @@ get_document_types_config()
.addField("d", nested_type_id)
.addField("e", nested_type_id)
.addField("f", nested_type_id))
+ .addField("nested_array", Array(nested_type_id))
+ .addField("nested_map", Map(DataType::T_STRING, nested_type_id))
.addField("ref", ref_type_id),
Struct("indexingdocument.body"))
.referenceType(ref_type_id, ref_target_doctype_id);
@@ -228,9 +232,11 @@ protected:
ArrayFieldValue make_array();
WeightedSetFieldValue make_weighted_set();
MapFieldValue make_map();
+ StructFieldValue make_nested_value(int i);
void expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>* matching_elems);
void expect_insert(const vespalib::string& exp, const FieldValue& fv);
void expect_insert_filtered(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>& matching_elems);
+ void expect_insert(const vespalib::string& exp, const FieldValue& fv, SlimeFillerFilter& filter);
void expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize);
};
@@ -345,6 +351,21 @@ SlimeFillerTest::make_map()
return map;
}
+StructFieldValue
+SlimeFillerTest::make_nested_value(int i)
+{
+ StructFieldValue nested(get_data_type("nested"));
+ StructFieldValue nested2(get_data_type("nested"));
+ nested.setValue("a", IntFieldValue(42 + 100 * i));
+ nested.setValue("b", IntFieldValue(44 + 100 * i));
+ nested.setValue("c", IntFieldValue(46 + 100 * i));
+ nested2.setValue("a", IntFieldValue(62 + 100 * i));
+ nested2.setValue("c", IntFieldValue(66 + 100 * i));
+ nested.setValue("d", nested2);
+ nested.setValue("f", nested2);
+ return nested;
+}
+
void
SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>* matching_elems)
{
@@ -352,8 +373,6 @@ SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv
SlimeInserter inserter(slime);
SlimeFiller filler(inserter, matching_elems);
fv.accept(filler);
- SimpleBuffer buf;
- JsonFormat::encode(slime, buf, true);
auto act = slime_to_string(slime);
EXPECT_EQ(exp, act);
}
@@ -371,6 +390,17 @@ SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv
}
void
+SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, SlimeFillerFilter& filter)
+{
+ Slime slime;
+ SlimeInserter inserter(slime);
+ SlimeFiller filler(inserter, nullptr, &filter);
+ fv.accept(filler);
+ auto act = slime_to_string(slime);
+ EXPECT_EQ(exp, act);
+}
+
+void
SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize)
{
Slime slime;
@@ -378,7 +408,7 @@ SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const Field
MockJuniperConverter converter;
AnnotationConverter annotation_converter(converter);
PassThroughStringFieldConverter passthrough_converter(converter);
- SlimeFiller filler(inserter, tokenize ? (IStringFieldConverter*) &annotation_converter : (IStringFieldConverter*) &passthrough_converter);
+ SlimeFiller filler(inserter, tokenize ? (IStringFieldConverter*) &annotation_converter : (IStringFieldConverter*) &passthrough_converter, nullptr);
fv.accept(filler);
auto act_null = slime_to_string(slime);
EXPECT_EQ("null", act_null);
@@ -581,17 +611,38 @@ TEST_F(SlimeFillerTest, insert_map_filtered)
TEST_F(SlimeFillerTest, insert_struct)
{
- StructFieldValue nested(get_data_type("nested"));
- StructFieldValue nested2(get_data_type("nested"));
- nested.setValue("a", IntFieldValue(42));
- nested.setValue("b", IntFieldValue(44));
- nested.setValue("c", IntFieldValue(46));
- nested2.setValue("a", IntFieldValue(62));
- nested2.setValue("c", IntFieldValue(66));
- nested.setValue("d", nested2);
- nested.setValue("f", nested2);
- // Field order depends on assigned field ids, cf. document::Field::calculateIdV7()
+ auto nested = make_nested_value(0);
+ // Field order depends on assigned field ids, cf. document::Field::calculateIdV7(), and symbol insertion order in slime
expect_insert(R"({"f":{"c":66,"a":62},"c":46,"a":42,"b":44,"d":{"c":66,"a":62}})", nested);
+ SlimeFillerFilter filter;
+ filter.add("a").add("c").add("f.a").add("d");
+ expect_insert(R"({"f":{"a":62},"a":42,"c":46,"d":{"a":62,"c":66}})", nested, filter);
+}
+
+TEST_F(SlimeFillerTest, insert_struct_array)
+{
+ ArrayFieldValue array(get_data_type("Array<nested>"));
+ for (int i = 0; i < 3; ++i) {
+ array.add(make_nested_value(i));
+ }
+ expect_insert(R"([{"f":{"c":66,"a":62},"c":46,"a":42,"b":44,"d":{"c":66,"a":62}},{"f":{"c":166,"a":162},"c":146,"a":142,"b":144,"d":{"c":166,"a":162}},{"f":{"c":266,"a":262},"c":246,"a":242,"b":244,"d":{"c":266,"a":262}}])", array);
+ SlimeFillerFilter filter;
+ filter.add("a").add("c").add("f.a").add("d");
+ expect_insert(R"([{"f":{"a":62},"a":42,"c":46,"d":{"a":62,"c":66}},{"f":{"a":162},"a":142,"c":146,"d":{"a":162,"c":166}},{"f":{"a":262},"a":242,"c":246,"d":{"a":262,"c":266}}])", array, filter);
+}
+
+TEST_F(SlimeFillerTest, insert_struct_map)
+{
+ MapFieldValue map(get_data_type("Map<String,nested>"));
+ for (int i = 0; i < 3; ++i) {
+ vespalib::asciistream key;
+ key << "key" << (i + 1);
+ map.put(StringFieldValue(key.str()), make_nested_value(i));
+ }
+ expect_insert(R"([{"key":"key1","value":{"f":{"c":66,"a":62},"c":46,"a":42,"b":44,"d":{"c":66,"a":62}}},{"key":"key2","value":{"f":{"c":166,"a":162},"c":146,"a":142,"b":144,"d":{"c":166,"a":162}}},{"key":"key3","value":{"f":{"c":266,"a":262},"c":246,"a":242,"b":244,"d":{"c":266,"a":262}}}])", map);
+ SlimeFillerFilter filter;
+ filter.add("value.a").add("value.c").add("value.f.a").add("value.d");
+ expect_insert(R"([{"key":"key1","value":{"f":{"a":62},"a":42,"c":46,"d":{"a":62,"c":66}}},{"key":"key2","value":{"f":{"a":162},"a":142,"c":146,"d":{"a":162,"c":166}}},{"key":"key3","value":{"f":{"a":262},"a":242,"c":246,"d":{"a":262,"c":266}}}])", map, filter);
}
TEST_F(SlimeFillerTest, insert_string_with_callback)
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
index 6aba9614e73..be435b49348 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
+++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
@@ -34,6 +34,7 @@ vespa_add_library(searchsummary_docsummary OBJECT
searchdatatype.cpp
simple_dfw.cpp
slime_filler.cpp
+ slime_filler_filter.cpp
struct_fields_resolver.cpp
struct_map_attribute_combiner_dfw.cpp
summaryfeaturesdfw.cpp
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp
index 230c03d6644..94774c1bee4 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp
@@ -5,6 +5,7 @@
#include "i_string_field_converter.h"
#include "resultconfig.h"
#include "searchdatatype.h"
+#include "slime_filler_filter.h"
#include <vespa/document/datatype/positiondatatype.h>
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
#include <vespa/document/fieldvalue/boolfieldvalue.h>
@@ -65,23 +66,27 @@ private:
Cursor& _array;
Symbol _key_sym;
Symbol _val_sym;
+ std::optional<const SlimeFillerFilter*> _filter;
public:
- MapFieldValueInserter(Inserter& parent_inserter)
+ MapFieldValueInserter(Inserter& parent_inserter, std::optional<const SlimeFillerFilter*> filter)
: _array(parent_inserter.insertArray()),
_key_sym(_array.resolve("key")),
- _val_sym(_array.resolve("value"))
+ _val_sym(_array.resolve("value")),
+ _filter(std::move(filter))
{
}
void insert_entry(const FieldValue& key, const FieldValue& value) {
Cursor& c = _array.addObject();
ObjectSymbolInserter ki(c, _key_sym);
- ObjectSymbolInserter vi(c, _val_sym);
SlimeFiller key_conv(ki);
- SlimeFiller val_conv(vi);
key.accept(key_conv);
- value.accept(val_conv);
+ if (_filter.has_value()) {
+ ObjectSymbolInserter vi(c, _val_sym);
+ SlimeFiller val_conv(vi, nullptr, _filter.value());
+ value.accept(val_conv);
+ }
}
};
@@ -90,21 +95,24 @@ public:
SlimeFiller::SlimeFiller(Inserter& inserter)
: _inserter(inserter),
_matching_elems(nullptr),
- _string_converter(nullptr)
+ _string_converter(nullptr),
+ _filter(nullptr)
{
}
SlimeFiller::SlimeFiller(Inserter& inserter, const std::vector<uint32_t>* matching_elems)
: _inserter(inserter),
_matching_elems(matching_elems),
- _string_converter(nullptr)
+ _string_converter(nullptr),
+ _filter(nullptr)
{
}
-SlimeFiller::SlimeFiller(Inserter& inserter, IStringFieldConverter* string_converter)
+SlimeFiller::SlimeFiller(Inserter& inserter, IStringFieldConverter* string_converter, const SlimeFillerFilter* filter)
: _inserter(inserter),
_matching_elems(nullptr),
- _string_converter(string_converter)
+ _string_converter(string_converter),
+ _filter(filter)
{
}
@@ -136,7 +144,7 @@ SlimeFiller::visit(const MapFieldValue& v)
if (empty_or_empty_after_filtering(v)) {
return;
}
- MapFieldValueInserter map_inserter(_inserter);
+ MapFieldValueInserter map_inserter(_inserter, SlimeFillerFilter::get_filter(_filter, "value"));
if (filter_matching_elements()) {
assert(v.has_no_erased_keys());
for (uint32_t id_to_keep : (*_matching_elems)) {
@@ -158,7 +166,7 @@ SlimeFiller::visit(const ArrayFieldValue& value)
}
Cursor& a = _inserter.insertArray();
ArrayInserter ai(a);
- SlimeFiller conv(ai, _string_converter);
+ SlimeFiller conv(ai, _string_converter, _filter);
if (filter_matching_elements()) {
for (uint32_t id_to_keep : (*_matching_elems)) {
value[id_to_keep].accept(conv);
@@ -266,11 +274,15 @@ SlimeFiller::visit(const StructFieldValue& value)
}
Cursor& c = _inserter.insertObject();
for (StructFieldValue::const_iterator itr = value.begin(); itr != value.end(); ++itr) {
- Memory keymem(itr.field().getName());
- ObjectInserter vi(c, keymem);
- SlimeFiller conv(vi);
- FieldValue::UP nextValue(value.getValue(itr.field()));
- (*nextValue).accept(conv);
+ auto& name = itr.field().getName();
+ auto sub_filter = SlimeFillerFilter::get_filter(_filter, name);
+ if (sub_filter.has_value()) {
+ Memory keymem(name);
+ ObjectInserter vi(c, keymem);
+ SlimeFiller conv(vi, nullptr, sub_filter.value());
+ FieldValue::UP nextValue(value.getValue(itr.field()));
+ (*nextValue).accept(conv);
+ }
}
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h
index 329dd3c6bb2..a81a20814c4 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h
@@ -11,6 +11,7 @@ namespace vespalib::slime { struct Inserter; }
namespace search::docsummary {
class IStringFieldConverter;
+class SlimeFillerFilter;
/*
* Class inserting a field value into a slime object.
@@ -20,6 +21,7 @@ class SlimeFiller : public document::ConstFieldValueVisitor {
vespalib::slime::Inserter& _inserter;
const std::vector<uint32_t>* _matching_elems;
IStringFieldConverter* _string_converter;
+ const SlimeFillerFilter* _filter;
bool filter_matching_elements() const {
return _matching_elems != nullptr;
@@ -51,7 +53,7 @@ class SlimeFiller : public document::ConstFieldValueVisitor {
public:
SlimeFiller(vespalib::slime::Inserter& inserter);
SlimeFiller(vespalib::slime::Inserter& inserter, const std::vector<uint32_t>* matching_elems);
- SlimeFiller(vespalib::slime::Inserter& inserter, IStringFieldConverter* string_converter);
+ SlimeFiller(vespalib::slime::Inserter& inserter, IStringFieldConverter* string_converter, const SlimeFillerFilter* filter);
~SlimeFiller() override;
};
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp
new file mode 100644
index 00000000000..db28a1ae5cf
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp
@@ -0,0 +1,67 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "slime_filler_filter.h"
+#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <cassert>
+
+namespace search::docsummary {
+
+SlimeFillerFilter::SlimeFillerFilter()
+ : _filter()
+{
+}
+
+SlimeFillerFilter::~SlimeFillerFilter() = default;
+
+std::optional<const SlimeFillerFilter*>
+SlimeFillerFilter::get_filter(vespalib::stringref field_name) const
+{
+ auto itr = _filter.find(field_name);
+ if (itr == _filter.end()) {
+ return std::nullopt;
+ }
+ return itr->second.get();
+}
+
+std::optional<const SlimeFillerFilter*>
+SlimeFillerFilter::get_filter(const SlimeFillerFilter* filter, vespalib::stringref field_name)
+{
+ return (filter != nullptr) ? filter->get_filter(field_name) : nullptr;
+}
+
+bool
+SlimeFillerFilter::empty() const { return _filter.empty(); }
+
+SlimeFillerFilter&
+SlimeFillerFilter::add(vespalib::stringref field_path)
+{
+ vespalib::stringref field_name;
+ vespalib::stringref remaining_path;
+ auto dot_pos = field_path.find('.');
+ if (dot_pos != vespalib::string::npos) {
+ field_name = field_path.substr(0, dot_pos);
+ remaining_path = field_path.substr(dot_pos + 1);
+ } else {
+ field_name = field_path;
+ }
+ auto itr = _filter.find(field_name);
+ if (itr != _filter.end()) {
+ if (itr->second) {
+ if (remaining_path.empty()) {
+ itr->second.reset();
+ } else {
+ itr->second->add(remaining_path);
+ }
+ }
+ } else {
+ auto insres = _filter.insert(std::make_pair(field_name, std::unique_ptr<SlimeFillerFilter>()));
+ assert(insres.second);
+ if (!remaining_path.empty()) {
+ insres.first->second = std::make_unique<SlimeFillerFilter>();
+ insres.first->second->add(remaining_path);
+ }
+ }
+ return *this;
+}
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h
new file mode 100644
index 00000000000..ba7ba6fe159
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h
@@ -0,0 +1,29 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <optional>
+
+namespace search::docsummary {
+
+/*
+ * Class filtering which fields to render in a struct field.
+ */
+class SlimeFillerFilter {
+ vespalib::hash_map<vespalib::string, std::unique_ptr<SlimeFillerFilter>> _filter;
+ std::optional<const SlimeFillerFilter*> get_filter(vespalib::stringref field_name) const;
+public:
+ SlimeFillerFilter();
+ ~SlimeFillerFilter();
+ /*
+ * If field is blocked by the filter then the return value is not set,
+ * otherwise it is set to the filter for the next level.
+ */
+ static std::optional<const SlimeFillerFilter*> get_filter(const SlimeFillerFilter* filter, vespalib::stringref field_name);
+ bool empty() const;
+ SlimeFillerFilter& add(vespalib::stringref field_path);
+};
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
index 5c6a87664e5..dd5a59e46af 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
@@ -37,7 +37,7 @@ SummaryFieldConverter::insert_juniper_field(const document::FieldValue& value, v
CheckUndefinedValueVisitor check_undefined;
value.accept(check_undefined);
if (!check_undefined.is_undefined()) {
- SlimeFiller visitor(inserter, &converter);
+ SlimeFiller visitor(inserter, &converter, nullptr);
value.accept(visitor);
}
}