summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-01-10 14:25:31 +0000
committerGeir Storli <geirst@verizonmedia.com>2020-01-10 14:25:31 +0000
commit4e13de16e6d74bf12fbfc45754fed4ba34e54b64 (patch)
tree2b0ae0b4912adb08cdef38ccffd6fe714108eb0f /searchsummary
parent46d808803c2bf8bb974cdf3d38ef6ceab4fc95f7 (diff)
Optimize handling of "matched-elements-only" when the input field as retrieved from the document instance.
In this case we now filter on matching elements while converting to slime, instead of converting to slime and then filter as done before.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp39
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h2
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp103
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h12
4 files changed, 119 insertions, 37 deletions
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp
index 812c19bc3f2..443634b3e3f 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp
@@ -26,6 +26,12 @@ using vespalib::slime::inject;
namespace search::docsummary {
+const std::vector<uint32_t>&
+MatchedElementsFilterDFW::get_matching_elements(uint32_t docid, GetDocsumsState& state) const
+{
+ return state.get_matching_elements(*_struct_field_mapper).get_matching_elements(docid, _input_field_name);
+}
+
MatchedElementsFilterDFW::MatchedElementsFilterDFW(const std::string& input_field_name, uint32_t input_field_enum,
std::shared_ptr<StructFieldMapper> struct_field_mapper)
: _input_field_name(input_field_name),
@@ -68,33 +74,30 @@ decode_input_field_to_slime(const ResEntry& entry, search::RawBuf& target_buf, S
}
void
-convert_input_field_to_slime(const FieldValue& input_field_value, Slime& input_field_as_slime)
+filter_matching_elements_in_input_field_while_converting_to_slime(const FieldValue& input_field_value,
+ const std::vector<uint32_t>& matching_elems,
+ vespalib::slime::Inserter& target)
{
- // This is the same conversion that happens in proton::DocumentStoreAdapter.
- auto converted = SummaryFieldConverter::convertSummaryField(false, input_field_value);
+ // This is a similar conversion that happens in proton::DocumentStoreAdapter.
+ // Only difference is that we filter matched elements on the fly.
+ auto converted = SummaryFieldConverter::convert_field_with_filter(false, input_field_value, matching_elems);
// This should hold as we also have asserted that (type == ResType::RES_JSONSTRING);
assert(converted->getClass().inherits(LiteralFieldValueB::classId));
auto& literal = static_cast<const LiteralFieldValueB&>(*converted);
vespalib::stringref buf = literal.getValueRef();
+ Slime input_field_as_slime;
BinaryFormat::decode(vespalib::Memory(buf.data(), buf.size()), input_field_as_slime);
+ inject(input_field_as_slime.get(), target);
}
bool
resolve_input_field_as_slime(GeneralResult& result, GetDocsumsState& state,
- int entry_idx, const vespalib::string& input_field_name,
- Slime& input_field_as_slime)
+ int entry_idx, Slime& input_field_as_slime)
{
ResEntry* entry = result.GetEntry(entry_idx);
if (entry != nullptr) {
decode_input_field_to_slime(*entry, state._docSumFieldSpace, input_field_as_slime);
return true;
- } else {
- // Use the document instance if the input field is not in the docsum blob.
- auto field_value = result.get_field_value(input_field_name);
- if (field_value) {
- convert_input_field_to_slime(*field_value, input_field_as_slime);
- return true;
- }
}
return false;
}
@@ -124,12 +127,16 @@ MatchedElementsFilterDFW::insertField(uint32_t docid, GeneralResult* result, Get
assert(type == ResType::RES_JSONSTRING);
int entry_idx = result->GetClass()->GetIndexFromEnumValue(_input_field_enum);
Slime input_field;
- if (resolve_input_field_as_slime(*result, *state, entry_idx, _input_field_name, input_field)) {
+ if (resolve_input_field_as_slime(*result, *state, entry_idx, input_field)) {
Slime output_field;
- filter_matching_elements_in_input_field(input_field,
- state->get_matching_elements(*_struct_field_mapper)
- .get_matching_elements(docid, _input_field_name), output_field);
+ filter_matching_elements_in_input_field(input_field, get_matching_elements(docid, *state), output_field);
inject(output_field.get(), target);
+ } else {
+ // Use the document instance if the input field is not in the docsum blob.
+ auto field_value = result->get_field_value(_input_field_name);
+ if (field_value) {
+ filter_matching_elements_in_input_field_while_converting_to_slime(*field_value, get_matching_elements(docid, *state), target);
+ }
}
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h
index d232ad7ae0c..966b5b95fa7 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h
@@ -18,6 +18,8 @@ private:
uint32_t _input_field_enum;
std::shared_ptr<StructFieldMapper> _struct_field_mapper;
+ const std::vector<uint32_t>& get_matching_elements(uint32_t docid, GetDocsumsState& state) const;
+
public:
MatchedElementsFilterDFW(const std::string& input_field_name, uint32_t input_field_enum,
std::shared_ptr<StructFieldMapper> struct_field_mapper);
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
index 1cac789ad21..7368a199569 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
@@ -331,9 +331,44 @@ SummaryFieldValueConverter::~SummaryFieldValueConverter() = default;
using namespace vespalib::slime::convenience;
+
+
class SlimeFiller : public ConstFieldValueVisitor {
+private:
+ class MapFieldValueInserter {
+ private:
+ Cursor& _array;
+ Symbol _key_sym;
+ Symbol _val_sym;
+ bool _tokenize;
+
+ public:
+ MapFieldValueInserter(Inserter& parent_inserter, bool tokenize)
+ : _array(parent_inserter.insertArray()),
+ _key_sym(_array.resolve("key")),
+ _val_sym(_array.resolve("value")),
+ _tokenize(tokenize)
+ {
+ }
+ void insert_entry(const FieldValue& key, const FieldValue& value) {
+ Cursor& c = _array.addObject();
+ ObjectSymbolInserter ki(c, _key_sym);
+ ObjectSymbolInserter vi(c, _val_sym);
+ SlimeFiller key_conv(ki, _tokenize);
+ SlimeFiller val_conv(vi, _tokenize);
+
+ key.accept(key_conv);
+ value.accept(val_conv);
+ }
+ };
+
Inserter &_inserter;
bool _tokenize;
+ const std::vector<uint32_t>* _matching_elems;
+
+ bool filter_matching_elements() const {
+ return _matching_elems != nullptr;
+ }
void visit(const AnnotationReferenceFieldValue & v ) override {
(void)v;
@@ -351,20 +386,17 @@ class SlimeFiller : public ConstFieldValueVisitor {
}
void visit(const MapFieldValue & v) override {
- Cursor &a = _inserter.insertArray();
- Symbol keysym = a.resolve("key");
- Symbol valsym = a.resolve("value");
- for (const auto & entry : v) {
- Cursor &c = a.addObject();
- ObjectSymbolInserter ki(c, keysym);
- ObjectSymbolInserter vi(c, valsym);
- SlimeFiller keyConv(ki, _tokenize);
- SlimeFiller valConv(vi, _tokenize);
-
- const FieldValue &key = *(entry.first);
- key.accept(keyConv);
- const FieldValue &val = *(entry.second);
- val.accept(valConv);
+ MapFieldValueInserter map_inserter(_inserter, _tokenize);
+ if (filter_matching_elements()) {
+ assert(v.has_no_erased_keys());
+ for (uint32_t id_to_keep : (*_matching_elems)) {
+ auto entry = v[id_to_keep];
+ map_inserter.insert_entry(*entry.first, *entry.second);
+ }
+ } else {
+ for (const auto &entry : v) {
+ map_inserter.insert_entry(*entry.first, *entry.second);
+ }
}
}
@@ -373,8 +405,14 @@ class SlimeFiller : public ConstFieldValueVisitor {
if (value.size() > 0) {
ArrayInserter ai(a);
SlimeFiller conv(ai, _tokenize);
- for (const FieldValue &fv : value) {
- fv.accept(conv);
+ if (filter_matching_elements()) {
+ for (uint32_t id_to_keep : (*_matching_elems)) {
+ value[id_to_keep].accept(conv);
+ }
+ } else {
+ for (const FieldValue &fv : value) {
+ fv.accept(conv);
+ }
}
}
}
@@ -481,20 +519,38 @@ class SlimeFiller : public ConstFieldValueVisitor {
public:
SlimeFiller(Inserter &inserter, bool tokenize)
- : _inserter(inserter), _tokenize(tokenize) {}
+ : _inserter(inserter),
+ _tokenize(tokenize),
+ _matching_elems()
+ {}
+
+ SlimeFiller(Inserter& inserter, bool tokenize, const std::vector<uint32_t>* matching_elems)
+ : _inserter(inserter),
+ _tokenize(tokenize),
+ _matching_elems(matching_elems)
+ {}
};
class SlimeConverter : public FieldValueConverter {
+private:
bool _tokenize;
+ const std::vector<uint32_t>* _matching_elems;
+
public:
SlimeConverter(bool tokenize)
- : _tokenize(tokenize)
+ : _tokenize(tokenize),
+ _matching_elems()
+ {}
+
+ SlimeConverter(bool tokenize, const std::vector<uint32_t>& matching_elems)
+ : _tokenize(tokenize),
+ _matching_elems(&matching_elems)
{}
FieldValue::UP convert(const FieldValue &input) override {
vespalib::Slime slime;
SlimeInserter inserter(slime);
- SlimeFiller visitor(inserter, _tokenize);
+ SlimeFiller visitor(inserter, _tokenize, _matching_elems);
input.accept(visitor);
search::RawBuf rbuf(4096);
search::SlimeOutputRawBufAdapter adapter(rbuf);
@@ -514,5 +570,14 @@ SummaryFieldConverter::convertSummaryField(bool markup,
return SummaryFieldValueConverter(markup, subConv).convert(value);
}
+FieldValue::UP
+SummaryFieldConverter::convert_field_with_filter(bool markup,
+ const document::FieldValue& value,
+ const std::vector<uint32_t>& matching_elems)
+{
+ SlimeConverter sub_conv(markup, matching_elems);
+ return SummaryFieldValueConverter(markup, sub_conv).convert(value);
+}
+
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
index 96db6026e88..840de9073c9 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
@@ -12,8 +12,16 @@ namespace search::docsummary {
class SummaryFieldConverter
{
public:
- static document::FieldValue::UP
- convertSummaryField(bool markup, const document::FieldValue &value);
+ static document::FieldValue::UP convertSummaryField(bool markup, const document::FieldValue &value);
+
+ /**
+ * Converts the given field value to slime, only keeping the elements that are contained in the matching elements vector.
+ *
+ * Filtering occurs when the field value is an ArrayFieldValue or MapFieldValue.
+ */
+ static document::FieldValue::UP convert_field_with_filter(bool markup,
+ const document::FieldValue& value,
+ const std::vector<uint32_t>& matching_elems);
};
}