aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-01-24 16:37:15 +0100
committerTor Egge <Tor.Egge@online.no>2023-01-24 16:37:15 +0100
commit5c2aca998192db6b0d4cbcd054aa11db158b298b (patch)
treebeea1df066868901c116801fa27b61bfb004bcdf
parentbb1a582cbf3de4854243f88f05a73b355f00a3d0 (diff)
Add new KeywordExtractor with two factories (one each for indexed search
and streaming search).
-rw-r--r--searchsummary/CMakeLists.txt1
-rw-r--r--searchsummary/src/tests/docsummary/keyword_extractor_factory/CMakeLists.txt9
-rw-r--r--searchsummary/src/tests/docsummary/keyword_extractor_factory/keyword_extractor_factory_test.cpp73
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt2
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.cpp22
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.h24
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.cpp41
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.h26
-rw-r--r--streamingvisitors/CMakeLists.txt1
-rw-r--r--streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt9
-rw-r--r--streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp116
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt1
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.cpp80
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.h39
14 files changed, 444 insertions, 0 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt
index 9c9079e6ed5..451c90c752d 100644
--- a/searchsummary/CMakeLists.txt
+++ b/searchsummary/CMakeLists.txt
@@ -20,6 +20,7 @@ vespa_define_module(
src/tests/docsummary/attribute_combiner
src/tests/docsummary/attributedfw
src/tests/docsummary/document_id_dfw
+ src/tests/docsummary/keyword_extractor_factory
src/tests/docsummary/matched_elements_filter
src/tests/docsummary/result_class
src/tests/docsummary/slime_filler
diff --git a/searchsummary/src/tests/docsummary/keyword_extractor_factory/CMakeLists.txt b/searchsummary/src/tests/docsummary/keyword_extractor_factory/CMakeLists.txt
new file mode 100644
index 00000000000..1cb555f3bd8
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/keyword_extractor_factory/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchsummary_keyword_extractor_factory_test_app TEST
+ SOURCES
+ keyword_extractor_factory_test.cpp
+ DEPENDS
+ searchsummary
+ GTest::GTest
+)
+vespa_add_test(NAME searchsummary_keyword_extractor_factory_test_app COMMAND searchsummary_keyword_extractor_factory_test_app)
diff --git a/searchsummary/src/tests/docsummary/keyword_extractor_factory/keyword_extractor_factory_test.cpp b/searchsummary/src/tests/docsummary/keyword_extractor_factory/keyword_extractor_factory_test.cpp
new file mode 100644
index 00000000000..8ba91699ae6
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/keyword_extractor_factory/keyword_extractor_factory_test.cpp
@@ -0,0 +1,73 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchsummary/docsummary/i_keyword_extractor.h>
+#include <vespa/searchsummary/docsummary/keyword_extractor_factory.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::docsummary::IKeywordExtractor;
+using search::docsummary::IKeywordExtractorFactory;
+using search::docsummary::KeywordExtractorFactory;
+using search::index::Schema;
+
+using FieldSet = Schema::FieldSet;
+
+class KeywordExtractorFactoryTest : public testing::Test {
+ std::unique_ptr<IKeywordExtractorFactory> _factory;
+ Schema _schema;
+
+protected:
+ KeywordExtractorFactoryTest();
+ ~KeywordExtractorFactoryTest() override;
+
+ void make_factory() {
+ _factory = std::make_unique<KeywordExtractorFactory>(_schema);
+ }
+
+ bool check_index(const vespalib::string &index_name, const vespalib::string& summary_field) {
+ if (!_factory) {
+ make_factory();
+ }
+ auto extractor = _factory->make(summary_field);
+ return extractor->isLegalIndex(index_name);
+ }
+
+ void add_field_set(const vespalib::string& field_set_name, const std::vector<vespalib::string>& field_names) {
+ FieldSet field_set(field_set_name);
+ for (auto& field_name : field_names) {
+ field_set.addField(field_name);
+ }
+ _schema.addFieldSet(field_set);
+ _factory.reset();
+ }
+};
+
+
+KeywordExtractorFactoryTest::KeywordExtractorFactoryTest()
+ : testing::Test(),
+ _factory()
+{
+}
+
+KeywordExtractorFactoryTest::~KeywordExtractorFactoryTest() = default;
+
+TEST_F(KeywordExtractorFactoryTest, empty_schema)
+{
+ EXPECT_TRUE(check_index("foo", "foo"));
+ EXPECT_FALSE(check_index("bar", "foo"));
+ EXPECT_FALSE(check_index("foo", "bar"));
+}
+
+TEST_F(KeywordExtractorFactoryTest, field_set_is_checked)
+{
+ add_field_set("ab", {"cd", "de"});
+ add_field_set("gh", {"cd"});
+ EXPECT_TRUE(check_index("cd", "cd"));
+ EXPECT_TRUE(check_index("ab", "cd"));
+ EXPECT_TRUE(check_index("gh", "cd"));
+ EXPECT_TRUE(check_index("de", "de"));
+ EXPECT_TRUE(check_index("ab", "de"));
+ EXPECT_FALSE(check_index("gh", "de"));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
index cfc3eb6536d..34e902461f4 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
+++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
@@ -23,6 +23,8 @@ vespa_add_library(searchsummary_docsummary OBJECT
juniper_dfw_term_visitor.cpp
juniper_query_adapter.cpp
juniperproperties.cpp
+ keyword_extractor.cpp
+ keyword_extractor_factory.cpp
legacy_keyword_extractor.cpp
legacy_keyword_extractor_factory.cpp
linguisticsannotation.cpp
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.cpp b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.cpp
new file mode 100644
index 00000000000..71b685c6317
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.cpp
@@ -0,0 +1,22 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "keyword_extractor.h"
+#include <vespa/vespalib/stllike/hash_set.hpp>
+
+namespace search::docsummary {
+
+KeywordExtractor::KeywordExtractor(StringSet indexes)
+ : IKeywordExtractor(),
+ _indexes(std::move(indexes))
+{
+}
+
+KeywordExtractor::~KeywordExtractor() = default;
+
+bool
+KeywordExtractor::isLegalIndex(vespalib::stringref idx) const
+{
+ return _indexes.contains(idx);
+}
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.h b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.h
new file mode 100644
index 00000000000..a2b1fba96f1
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.h
@@ -0,0 +1,24 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "i_keyword_extractor.h"
+#include <vespa/vespalib/stllike/hash_set.h>
+
+namespace search::docsummary {
+
+/*
+ * Class for checking if query term index name indicates that
+ * related query term is useful from the perspective of juniper.
+ */
+class KeywordExtractor : public IKeywordExtractor
+{
+ using StringSet = vespalib::hash_set<vespalib::string>;
+ StringSet _indexes;
+public:
+ KeywordExtractor(StringSet indexes);
+ ~KeywordExtractor() override;
+ bool isLegalIndex(vespalib::stringref idx) const override;
+};
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.cpp b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.cpp
new file mode 100644
index 00000000000..f749e6e42a1
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.cpp
@@ -0,0 +1,41 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "keyword_extractor_factory.h"
+#include "keyword_extractor.h"
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/stllike/hash_set.hpp>
+
+namespace search::docsummary {
+
+KeywordExtractorFactory::KeywordExtractorFactory(const search::index::Schema& schema)
+ : IKeywordExtractorFactory(),
+ _index_map()
+{
+ for (uint32_t i = 0; i < schema.getNumFieldSets(); ++i) {
+ auto& field_set = schema.getFieldSet(i);
+ auto& fields = field_set.getFields();
+ for (auto& field : fields) {
+ auto& vec = _index_map[field];
+ vec.emplace_back(field_set.getName());
+ }
+ }
+}
+
+KeywordExtractorFactory::~KeywordExtractorFactory() = default;
+
+std::shared_ptr<const IKeywordExtractor>
+KeywordExtractorFactory::make(vespalib::stringref input_field) const
+{
+ vespalib::hash_set<vespalib::string> indexes;
+ indexes.insert(input_field);
+ auto itr = _index_map.find(input_field);
+ if (itr != _index_map.end()) {
+ for (auto& index : itr->second) {
+ indexes.insert(index);
+ }
+ }
+ return std::make_shared<KeywordExtractor>(std::move(indexes));
+}
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.h b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.h
new file mode 100644
index 00000000000..e22475eb842
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.h
@@ -0,0 +1,26 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "i_keyword_extractor_factory.h"
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vector>
+
+namespace search::index { class Schema; }
+
+namespace search::docsummary {
+
+/*
+ * Class for creating an instance of IKeywordExtractor.
+ */
+class KeywordExtractorFactory : public IKeywordExtractorFactory
+{
+ vespalib::hash_map<vespalib::string, std::vector<vespalib::string>> _index_map;
+public:
+ KeywordExtractorFactory(const search::index::Schema& schema);
+ ~KeywordExtractorFactory() override;
+ std::shared_ptr<const IKeywordExtractor> make(vespalib::stringref input_field) const override;
+};
+
+}
diff --git a/streamingvisitors/CMakeLists.txt b/streamingvisitors/CMakeLists.txt
index 0e7789a21b9..adfee1a76ae 100644
--- a/streamingvisitors/CMakeLists.txt
+++ b/streamingvisitors/CMakeLists.txt
@@ -26,6 +26,7 @@ vespa_define_module(
src/tests/charbuffer
src/tests/docsum
src/tests/document
+ src/tests/keyword_extractor_factory
src/tests/searcher
src/tests/textutil
)
diff --git a/streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt b/streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt
new file mode 100644
index 00000000000..54e2368f200
--- /dev/null
+++ b/streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(streamingvisitors_keyword_extractor_factory_test_app TEST
+ SOURCES
+ keyword_extractor_factory_test.cpp
+ DEPENDS
+ streamingvisitors
+ GTest::GTest
+)
+vespa_add_test(NAME streamingvisitors_keyword_extractor_factory_test_app COMMAND streamingvisitors_keyword_extractor_factory_test_app)
diff --git a/streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp b/streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp
new file mode 100644
index 00000000000..6ed4dfa1425
--- /dev/null
+++ b/streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp
@@ -0,0 +1,116 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchsummary/docsummary/i_keyword_extractor.h>
+#include <vespa/vsm/vsm/keyword_extractor_factory.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::docsummary::IKeywordExtractor;
+using search::docsummary::IKeywordExtractorFactory;
+using vespa::config::search::vsm::VsmfieldsConfig;
+using vespa::config::search::vsm::VsmfieldsConfigBuilder;
+using vespa::config::search::vsm::VsmsummaryConfig;
+using vespa::config::search::vsm::VsmsummaryConfigBuilder;
+using vsm::KeywordExtractorFactory;
+
+class KeywordExtractorFactoryTest : public testing::Test {
+ std::unique_ptr<IKeywordExtractorFactory> _factory;
+ VsmfieldsConfigBuilder _fields;
+ VsmsummaryConfigBuilder _summary;
+protected:
+ KeywordExtractorFactoryTest();
+ ~KeywordExtractorFactoryTest() override;
+
+ void make_factory() {
+ _factory = std::make_unique<KeywordExtractorFactory>(_fields, _summary);
+ }
+
+ bool check_index(const vespalib::string &index_name, const vespalib::string& summary_field) {
+ if (!_factory) {
+ make_factory();
+ }
+ auto extractor = _factory->make(summary_field);
+ return extractor->isLegalIndex(index_name);
+ }
+
+ void add_summary_field(const vespalib::string& summary_field_name, const std::vector<vespalib::string>& field_names)
+ {
+ VsmsummaryConfigBuilder::Fieldmap field_map;
+ field_map.summary = summary_field_name;
+ for (auto& field_name : field_names) {
+ VsmsummaryConfigBuilder::Fieldmap::Document document;
+ document.field = field_name;
+ field_map.document.emplace_back(document);
+ }
+ _summary.fieldmap.emplace_back(field_map);
+ _factory.reset();
+ }
+ void add_index(const vespalib::string& index_name, const std::vector<vespalib::string>& field_names)
+ {
+ if (_fields.documenttype.empty()) {
+ _fields.documenttype.resize(1);
+ _fields.documenttype.back().name = "dummy";
+ }
+ VsmfieldsConfigBuilder::Documenttype::Index index;
+ index.name = index_name;
+ for (auto& field_name : field_names) {
+ VsmfieldsConfigBuilder::Documenttype::Index::Field field;
+ field.name = field_name;
+ index.field.emplace_back(field);
+ }
+ _fields.documenttype.back().index.emplace_back(index);
+ _factory.reset();
+ }
+};
+
+
+KeywordExtractorFactoryTest::KeywordExtractorFactoryTest()
+ : testing::Test(),
+ _factory()
+{
+}
+
+KeywordExtractorFactoryTest::~KeywordExtractorFactoryTest() = default;
+
+TEST_F(KeywordExtractorFactoryTest, empty_config)
+{
+ EXPECT_FALSE(check_index("foo", "foo"));
+}
+
+TEST_F(KeywordExtractorFactoryTest, implied_identity_mapping_for_summary_field)
+{
+ add_index("foo", {"bar"});
+ EXPECT_FALSE(check_index("foo", "foo"));
+ EXPECT_TRUE(check_index("foo", "bar"));
+}
+
+TEST_F(KeywordExtractorFactoryTest, two_source_fields_for_summary_field)
+{
+ add_index("bar", {"bar"});
+ add_index("baz", {"baz"});
+ add_summary_field("foo", {"bar", "baz"});
+ EXPECT_FALSE(check_index("foo", "foo"));
+ EXPECT_TRUE(check_index("bar", "foo"));
+ EXPECT_TRUE(check_index("bar", "bar"));
+ EXPECT_TRUE(check_index("baz", "foo"));
+ EXPECT_TRUE(check_index("baz", "baz"));
+}
+
+TEST_F(KeywordExtractorFactoryTest, two_source_fields_for_summary_field_and_multiple_indexes)
+{
+ add_index("bar", {"bar"});
+ add_index("baz", {"baz"});
+ add_index("both", {"bar", "baz"});
+ add_summary_field("foo", {"bar", "baz"});
+ EXPECT_FALSE(check_index("foo", "foo"));
+ EXPECT_TRUE(check_index("both", "foo"));
+ EXPECT_TRUE(check_index("bar", "foo"));
+ EXPECT_TRUE(check_index("baz", "foo"));
+ EXPECT_TRUE(check_index("both", "bar"));
+ EXPECT_TRUE(check_index("bar", "bar"));
+ EXPECT_FALSE(check_index("baz", "bar"));
+ EXPECT_TRUE(check_index("both", "baz"));
+ EXPECT_FALSE(check_index("bar", "baz"));
+ EXPECT_TRUE(check_index("baz", "baz"));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt
index cf121aead4b..67acbc1a391 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt
+++ b/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_add_library(vsm_vsmbase OBJECT
docsum_field_writer_factory.cpp
fieldsearchspec.cpp
flattendocsumwriter.cpp
+ keyword_extractor_factory.cpp
snippetmodifier.cpp
vsm-adapter.cpp
DEPENDS
diff --git a/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.cpp b/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.cpp
new file mode 100644
index 00000000000..5319f554c81
--- /dev/null
+++ b/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.cpp
@@ -0,0 +1,80 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "keyword_extractor_factory.h"
+#include <vespa/searchsummary/docsummary/keyword_extractor.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/stllike/hash_set.hpp>
+#include <cassert>
+#include <vespa/log/log.h>
+LOG_SETUP(".vsm.keyword_extractor_factory");
+
+using search::docsummary::IKeywordExtractor;
+using search::docsummary::IKeywordExtractorFactory;
+using search::docsummary::KeywordExtractor;
+using vespa::config::search::vsm::VsmfieldsConfig;
+using vespa::config::search::vsm::VsmsummaryConfig;
+
+namespace vsm {
+
+KeywordExtractorFactory::KeywordExtractorFactory(VsmfieldsConfig& vsm_fields_config,
+ VsmsummaryConfig& vsm_summary_config)
+ : IKeywordExtractorFactory(),
+ _index_map(),
+ _field_map()
+{
+ populate_index_map(vsm_fields_config);
+ populate_field_map(vsm_summary_config);
+}
+
+KeywordExtractorFactory::~KeywordExtractorFactory() = default;
+
+void
+KeywordExtractorFactory::populate_index_map(VsmfieldsConfig& vsm_fields_config)
+{
+ for (auto& doctype : vsm_fields_config.documenttype) {
+ for (auto& index : doctype.index) {
+ for (auto& field : index.field) {
+ _index_map[field.name].insert(index.name);
+ }
+ }
+ }
+}
+
+void
+KeywordExtractorFactory::populate_field_map(VsmsummaryConfig& vsm_summary_config)
+{
+ for (auto& summary_field : vsm_summary_config.fieldmap) {
+ for (auto& document : summary_field.document) {
+ _field_map[summary_field.summary].insert(document.field);
+ }
+ }
+}
+
+void
+KeywordExtractorFactory::populate_indexes(StringSet& indexes, const vespalib::string& field) const
+{
+ auto itr = _index_map.find(field);
+ if (itr != _index_map.end()) {
+ for (auto& index : itr->second) {
+ indexes.insert(index);
+ }
+ }
+}
+
+std::shared_ptr<const IKeywordExtractor>
+KeywordExtractorFactory::make(vespalib::stringref input_field) const
+{
+ StringSet indexes;
+ auto itr = _field_map.find(input_field);
+ if (itr != _field_map.end()) {
+ for (auto& field : itr->second) {
+ populate_indexes(indexes, field);
+ }
+ } else {
+ // Assume identity mapping vsm summary field -> document field
+ populate_indexes(indexes, input_field);
+ }
+ return std::make_shared<KeywordExtractor>(std::move(indexes));
+}
+
+}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.h b/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.h
new file mode 100644
index 00000000000..6ffcbd6f84b
--- /dev/null
+++ b/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.h
@@ -0,0 +1,39 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchsummary/docsummary/i_keyword_extractor_factory.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/stllike/hash_set.h>
+#include <vespa/vsm/config/config-vsmfields.h>
+#include <vespa/vsm/config/config-vsmsummary.h>
+
+namespace vsm {
+
+/*
+ * Class for creating an instance of IKeywordExtractor for streaming search.
+ *
+ * vsm summary fields are treated as document fields by the summary framework
+ * in the searchsummary module, cf. IDocsumStoreDocument.
+ */
+class KeywordExtractorFactory : public search::docsummary::IKeywordExtractorFactory
+{
+public:
+ using VsmfieldsConfig = vespa::config::search::vsm::VsmfieldsConfig;
+ using VsmsummaryConfig = vespa::config::search::vsm::VsmsummaryConfig;
+private:
+ using StringSet = vespalib::hash_set<vespalib::string>;
+ using StringSetMap = vespalib::hash_map<vespalib::string, StringSet>;
+ StringSetMap _index_map; // document field -> indexes
+ StringSetMap _field_map; // vsm summary field -> document fields
+ void populate_index_map(VsmfieldsConfig& vsm_fields_config);
+ void populate_field_map(VsmsummaryConfig& vsm_summary_config);
+ void populate_indexes(StringSet& indexes, const vespalib::string& field) const;
+public:
+ KeywordExtractorFactory(VsmfieldsConfig& vsm_fields_config,
+ VsmsummaryConfig& vsm_summary_config);
+ ~KeywordExtractorFactory() override;
+ std::shared_ptr<const search::docsummary::IKeywordExtractor> make(vespalib::stringref input_field) const override;
+};
+
+}