aboutsummaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-10-04 08:56:23 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-10-04 09:13:31 +0000
commitdea97e69131b865669760daab6af068958d5a6b4 (patch)
tree2f01de81988ebe18da019d43ecb5ca74fd9edd40 /searchsummary
parent6fe52f2ed1665f6fe29b74bbaec4db2c889ebacf (diff)
Add docsum field writer that filters matched elements from a complex field retrieved from document store.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/CMakeLists.txt1
-rw-r--r--searchsummary/src/tests/docsummary/matched_elements_filter/CMakeLists.txt10
-rw-r--r--searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp205
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt31
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp87
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h27
6 files changed, 346 insertions, 15 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt
index 4df636e0219..2a23dd4c495 100644
--- a/searchsummary/CMakeLists.txt
+++ b/searchsummary/CMakeLists.txt
@@ -25,6 +25,7 @@ vespa_define_module(
src/tests/docsumformat
src/tests/docsummary
src/tests/docsummary/attribute_combiner
+ src/tests/docsummary/matched_elements_filter
src/tests/docsummary/slime_summary
src/tests/extractkeywords
)
diff --git a/searchsummary/src/tests/docsummary/matched_elements_filter/CMakeLists.txt b/searchsummary/src/tests/docsummary/matched_elements_filter/CMakeLists.txt
new file mode 100644
index 00000000000..a87f5638acc
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/matched_elements_filter/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+find_package(GTest REQUIRED)
+vespa_add_executable(searchsummary_matched_elements_filter_test_app TEST
+ SOURCES
+ matched_elements_filter_test.cpp
+ DEPENDS
+ searchsummary
+ GTest::GTest
+)
+vespa_add_test(NAME searchsummary_matched_elements_filter_test_app COMMAND searchsummary_matched_elements_filter_test_app)
diff --git a/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp b/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp
new file mode 100644
index 00000000000..40d0285b1ec
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp
@@ -0,0 +1,205 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/document/datatype/datatype.h>
+#include <vespa/document/datatype/structdatatype.h>
+#include <vespa/document/document.h>
+#include <vespa/searchlib/common/matching_elements.h>
+#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h>
+#include <vespa/searchsummary/docsummary/docsumstate.h>
+#include <vespa/searchsummary/docsummary/idocsumenvironment.h>
+#include <vespa/searchsummary/docsummary/matched_elements_filter_dfw.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+#include <vespa/searchsummary/docsummary/resultpacker.h>
+#include <vespa/searchsummary/docsummary/summaryfieldconverter.h>
+#include <vespa/vespalib/data/slime/json_format.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <iostream>
+
+#include <vespa/log/log.h>
+LOG_SETUP("matched_elements_filter_test");
+
+using search::MatchingElements;
+using search::StructFieldMapper;
+using vespalib::Slime;
+
+using namespace document;
+using namespace search::docsummary;
+using namespace vespalib::slime;
+
+using ElementVector = std::vector<uint32_t>;
+
+struct SlimeValue {
+ Slime slime;
+
+ SlimeValue(const std::string& json_input)
+ : slime()
+ {
+ size_t used = JsonFormat::decode(json_input, slime);
+ EXPECT_GT(used, 0);
+ }
+ SlimeValue(const Slime& slime_with_raw_field)
+ : slime()
+ {
+ size_t used = BinaryFormat::decode(slime_with_raw_field.get().asString(), slime);
+ EXPECT_GT(used, 0);
+ }
+};
+
+StructDataType::UP
+make_struct_elem_type()
+{
+ auto result = std::make_unique<StructDataType>("elem");
+ result->addField(Field("name", *DataType::STRING, true));
+ result->addField(Field("weight", *DataType::INT, true));
+ return result;
+}
+
+constexpr uint32_t class_id = 3;
+constexpr uint32_t doc_id = 2;
+
+class DocsumStore {
+private:
+ ResultConfig _config;
+ ResultPacker _packer;
+ StructDataType::UP _elem_type;
+ ArrayDataType _array_type;
+ MapDataType _map_type;
+
+ StructFieldValue::UP make_elem_value(const std::string& name, int weight) const {
+ auto result = std::make_unique<StructFieldValue>(*_elem_type);
+ result->setValue("name", StringFieldValue(name));
+ result->setValue("weight", IntFieldValue(weight));
+ return result;
+ }
+
+ void write_field_value(const FieldValue& value) {
+ auto converted = SummaryFieldConverter::convertSummaryField(false, value);
+ const auto* raw_field = dynamic_cast<const RawFieldValue*>(converted.get());
+ ASSERT_TRUE(raw_field);
+ auto raw_buf = raw_field->getAsRaw();
+ bool result = _packer.AddLongString(raw_buf.first, raw_buf.second);
+ ASSERT_TRUE(result);
+ }
+
+public:
+ DocsumStore()
+ : _config(),
+ _packer(&_config),
+ _elem_type(make_struct_elem_type()),
+ _array_type(*_elem_type),
+ _map_type(*DataType::STRING, *_elem_type)
+ {
+ auto* result_class = _config.AddResultClass("test", class_id);
+ EXPECT_TRUE(result_class->AddConfigEntry("array", ResType::RES_JSONSTRING));
+ EXPECT_TRUE(result_class->AddConfigEntry("map", ResType::RES_JSONSTRING));
+ _config.CreateEnumMaps();
+ }
+ const ResultConfig& get_config() const { return _config; }
+ const ResultClass* get_class() const { return _config.LookupResultClass(class_id); }
+ search::docsummary::DocsumStoreValue getMappedDocsum() {
+ assert(_packer.Init(class_id));
+ {
+ ArrayFieldValue array_value(_array_type);
+ array_value.append(make_elem_value("a", 3));
+ array_value.append(make_elem_value("b", 5));
+ array_value.append(make_elem_value("c", 7));
+ write_field_value(array_value);
+ }
+ {
+ MapFieldValue map_value(_map_type);
+ map_value.put(StringFieldValue("a"), *make_elem_value("a", 3));
+ map_value.put(StringFieldValue("b"), *make_elem_value("b", 5));
+ map_value.put(StringFieldValue("c"), *make_elem_value("c", 7));
+ write_field_value(map_value);
+ }
+ const char* buf;
+ uint32_t buf_len;
+ assert(_packer.GetDocsumBlob(&buf, &buf_len));
+ return DocsumStoreValue(buf, buf_len);
+ }
+};
+
+class StateCallback : public GetDocsumsStateCallback {
+private:
+ std::string _field_name;
+ ElementVector _matching_elements;
+
+public:
+ StateCallback(const std::string& field_name, const ElementVector& matching_elements)
+ : _field_name(field_name),
+ _matching_elements(matching_elements)
+ {
+ }
+ ~StateCallback() {}
+ void FillSummaryFeatures(GetDocsumsState*, IDocsumEnvironment*) override {}
+ void FillRankFeatures(GetDocsumsState*, IDocsumEnvironment*) override {}
+ void ParseLocation(GetDocsumsState*) override {}
+ std::unique_ptr<MatchingElements> fill_matching_elements(const StructFieldMapper&) override {
+ auto result = std::make_unique<MatchingElements>();
+ result->add_matching_elements(doc_id, _field_name, _matching_elements);
+ return result;
+ }
+};
+
+class MatchedElementsFilterTest : public ::testing::Test {
+private:
+ DocsumStore _store;
+
+ SlimeValue run_filter_field_writer(const std::string& input_field_name, const ElementVector& matching_elements) {
+ int input_field_enum = _store.get_config().GetFieldNameEnum().Lookup(input_field_name.c_str());
+ EXPECT_GE(input_field_enum, 0);
+ MatchedElementsFilterDFW filter(input_field_name, input_field_enum);
+
+ GeneralResult result(_store.get_class());
+ result.inplaceUnpack(_store.getMappedDocsum());
+ StateCallback callback(input_field_name, matching_elements);
+ GetDocsumsState state(callback);
+ Slime slime;
+ SlimeInserter inserter(slime);
+
+ filter.insertField(doc_id, &result, &state, ResType::RES_JSONSTRING, inserter);
+ return SlimeValue(slime);
+ }
+
+public:
+ MatchedElementsFilterTest()
+ : _store()
+ {
+ }
+ void expect_filtered(const std::string& input_field_name, const ElementVector& matching_elements, const std::string& exp_slime_as_json) {
+ SlimeValue act = run_filter_field_writer(input_field_name, matching_elements);
+ SlimeValue exp(exp_slime_as_json);
+ EXPECT_EQ(exp.slime, act.slime);
+ }
+};
+
+TEST_F(MatchedElementsFilterTest, filters_elements_in_array_field_value)
+{
+ expect_filtered("array", {}, "[]");
+ expect_filtered("array", {0}, "[{'name':'a','weight':3}]");
+ expect_filtered("array", {1}, "[{'name':'b','weight':5}]");
+ expect_filtered("array", {2}, "[{'name':'c','weight':7}]");
+ expect_filtered("array", {0, 1, 2}, "[{'name':'a','weight':3},"
+ "{'name':'b','weight':5},"
+ "{'name':'c','weight':7}]");
+}
+
+TEST_F(MatchedElementsFilterTest, filters_elements_in_map_field_value)
+{
+ expect_filtered("map", {}, "[]");
+ expect_filtered("map", {0}, "[{'key':'a','value':{'name':'a','weight':3}}]");
+ expect_filtered("map", {1}, "[{'key':'b','value':{'name':'b','weight':5}}]");
+ expect_filtered("map", {2}, "[{'key':'c','value':{'name':'c','weight':7}}]");
+ expect_filtered("map", {0, 1, 2}, "[{'key':'a','value':{'name':'a','weight':3}},"
+ "{'key':'b','value':{'name':'b','weight':5}},"
+ "{'key':'c','value':{'name':'c','weight':7}}]");
+}
+
+TEST_F(MatchedElementsFilterTest, field_writer_is_not_generated_as_it_depends_on_data_from_document_store)
+{
+ MatchedElementsFilterDFW filter("foo", 0);
+ EXPECT_FALSE(filter.IsGenerated());
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
index dccf72b2fe7..fb6a399e71c 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
+++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
@@ -4,29 +4,30 @@ vespa_add_library(searchsummary_docsummary OBJECT
array_attribute_combiner_dfw.cpp
attribute_combiner_dfw.cpp
attribute_field_writer.cpp
- resultclass.cpp
- resultconfig.cpp
- resultpacker.cpp
- urlresult.cpp
- getdocsumargs.cpp
- docsumstate.cpp
+ attributedfw.cpp
+ docsumconfig.cpp
docsumfieldwriter.cpp
+ docsumstate.cpp
docsumwriter.cpp
- keywordextractor.cpp
- attributedfw.cpp
dynamicteaserdfw.cpp
- docsumconfig.cpp
- rankfeaturesdfw.cpp
- summaryfeaturesdfw.cpp
- juniperproperties.cpp
- textextractordfw.cpp
geoposdfw.cpp
- tokenizer.cpp
- positionsdfw.cpp
+ getdocsumargs.cpp
+ juniperproperties.cpp
+ keywordextractor.cpp
linguisticsannotation.cpp
+ matched_elements_filter_dfw.cpp
+ positionsdfw.cpp
+ rankfeaturesdfw.cpp
+ resultclass.cpp
+ resultconfig.cpp
+ resultpacker.cpp
searchdatatype.cpp
struct_map_attribute_combiner_dfw.cpp
+ summaryfeaturesdfw.cpp
summaryfieldconverter.cpp
+ textextractordfw.cpp
+ tokenizer.cpp
+ urlresult.cpp
AFTER
searchsummary_config
)
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp
new file mode 100644
index 00000000000..1b7533b53e3
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp
@@ -0,0 +1,87 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "docsumstate.h"
+#include "matched_elements_filter_dfw.h"
+#include <vespa/searchlib/common/matching_elements.h>
+#include <vespa/searchlib/common/struct_field_mapper.h>
+#include <vespa/vespalib/data/slime/binary_format.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/data/smart_buffer.h>
+#include <cassert>
+
+using vespalib::Slime;
+using vespalib::slime::ArrayInserter;
+using vespalib::slime::BinaryFormat;
+using vespalib::slime::Inserter;
+using vespalib::slime::Inspector;
+using vespalib::slime::SlimeInserter;
+using vespalib::slime::inject;
+
+namespace search::docsummary {
+
+MatchedElementsFilterDFW::MatchedElementsFilterDFW(const std::string& input_field_name, uint32_t input_field_enum)
+ : _input_field_name(input_field_name),
+ _input_field_enum(input_field_enum),
+ _struct_field_mapper(std::make_shared<StructFieldMapper>())
+{
+ // TODO: Take struct field mapper in constructor and populate based on available attribute vectors.
+}
+
+MatchedElementsFilterDFW::~MatchedElementsFilterDFW() = default;
+
+namespace {
+
+void
+decode_input_field(const ResEntry& entry, search::RawBuf& target_buf, Slime& input_field)
+{
+ const char* buf;
+ uint32_t buf_len;
+ entry._resolve_field(&buf, &buf_len, &target_buf);
+ BinaryFormat::decode(vespalib::Memory(buf, buf_len), input_field);
+}
+
+void
+filter_matching_elements_in_input_field(const Slime& input_field, const std::vector<uint32_t>& matching_elems, Slime& output_field)
+{
+ SlimeInserter output_inserter(output_field);
+ Inspector& input_inspector = input_field.get();
+ ArrayInserter array_inserter(output_inserter.insertArray());
+ auto elems_itr = matching_elems.begin();
+ for (size_t i = 0; (i < input_inspector.entries()) && (elems_itr != matching_elems.end()); ++i) {
+ assert(*elems_itr >= i);
+ if (*elems_itr == i) {
+ inject(input_inspector[i], array_inserter);
+ ++elems_itr;
+ }
+ }
+}
+
+void
+encode_output_field(const Slime& output_field, Inserter& target)
+{
+ vespalib::SmartBuffer buf(4096);
+ BinaryFormat::encode(output_field, buf);
+ target.insertString(buf.obtain());
+}
+
+}
+
+void
+MatchedElementsFilterDFW::insertField(uint32_t docid, GeneralResult* result, GetDocsumsState *state,
+ ResType type, vespalib::slime::Inserter& target)
+{
+ assert(type == ResType::RES_JSONSTRING);
+ int entry_idx = result->GetClass()->GetIndexFromEnumValue(_input_field_enum);
+ ResEntry* entry = result->GetEntry(entry_idx);
+ if (entry != nullptr) {
+ Slime input_field;
+ decode_input_field(*entry, state->_docSumFieldSpace, input_field);
+
+ Slime output_field;
+ filter_matching_elements_in_input_field(input_field, state->get_matching_elements(*_struct_field_mapper).get_matching_elements(docid, _input_field_name), output_field);
+
+ encode_output_field(output_field, target);
+ }
+}
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h
new file mode 100644
index 00000000000..b96d3595b0a
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h
@@ -0,0 +1,27 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "docsumfieldwriter.h"
+
+namespace search::docsummary {
+
+/**
+ * Field writer that filters matched elements (according to the query) from a complex field
+ * (map of primitives, map of struct, array of struct) that is retrieved from the document store.
+ */
+class MatchedElementsFilterDFW : public IDocsumFieldWriter {
+private:
+ std::string _input_field_name;
+ uint32_t _input_field_enum;
+ std::shared_ptr<StructFieldMapper> _struct_field_mapper;
+
+public:
+ MatchedElementsFilterDFW(const std::string& input_field_name, uint32_t input_field_enum);
+ ~MatchedElementsFilterDFW();
+ bool IsGenerated() const override { return false; }
+ void insertField(uint32_t docid, GeneralResult* result, GetDocsumsState *state,
+ ResType type, vespalib::slime::Inserter& target) override;
+};
+
+}