aboutsummaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/tests
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-01-24 16:37:15 +0100
committerTor Egge <Tor.Egge@online.no>2023-01-24 16:37:15 +0100
commit5c2aca998192db6b0d4cbcd054aa11db158b298b (patch)
treebeea1df066868901c116801fa27b61bfb004bcdf /streamingvisitors/src/tests
parentbb1a582cbf3de4854243f88f05a73b355f00a3d0 (diff)
Add new KeywordExtractor with two factories (one each for indexed search
and streaming search).
Diffstat (limited to 'streamingvisitors/src/tests')
-rw-r--r--streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt9
-rw-r--r--streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp116
2 files changed, 125 insertions, 0 deletions
diff --git a/streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt b/streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt
new file mode 100644
index 00000000000..54e2368f200
--- /dev/null
+++ b/streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(streamingvisitors_keyword_extractor_factory_test_app TEST
+ SOURCES
+ keyword_extractor_factory_test.cpp
+ DEPENDS
+ streamingvisitors
+ GTest::GTest
+)
+vespa_add_test(NAME streamingvisitors_keyword_extractor_factory_test_app COMMAND streamingvisitors_keyword_extractor_factory_test_app)
diff --git a/streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp b/streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp
new file mode 100644
index 00000000000..6ed4dfa1425
--- /dev/null
+++ b/streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp
@@ -0,0 +1,116 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchsummary/docsummary/i_keyword_extractor.h>
+#include <vespa/vsm/vsm/keyword_extractor_factory.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::docsummary::IKeywordExtractor;
+using search::docsummary::IKeywordExtractorFactory;
+using vespa::config::search::vsm::VsmfieldsConfig;
+using vespa::config::search::vsm::VsmfieldsConfigBuilder;
+using vespa::config::search::vsm::VsmsummaryConfig;
+using vespa::config::search::vsm::VsmsummaryConfigBuilder;
+using vsm::KeywordExtractorFactory;
+
+class KeywordExtractorFactoryTest : public testing::Test {
+ std::unique_ptr<IKeywordExtractorFactory> _factory;
+ VsmfieldsConfigBuilder _fields;
+ VsmsummaryConfigBuilder _summary;
+protected:
+ KeywordExtractorFactoryTest();
+ ~KeywordExtractorFactoryTest() override;
+
+ void make_factory() {
+ _factory = std::make_unique<KeywordExtractorFactory>(_fields, _summary);
+ }
+
+ bool check_index(const vespalib::string &index_name, const vespalib::string& summary_field) {
+ if (!_factory) {
+ make_factory();
+ }
+ auto extractor = _factory->make(summary_field);
+ return extractor->isLegalIndex(index_name);
+ }
+
+ void add_summary_field(const vespalib::string& summary_field_name, const std::vector<vespalib::string>& field_names)
+ {
+ VsmsummaryConfigBuilder::Fieldmap field_map;
+ field_map.summary = summary_field_name;
+ for (auto& field_name : field_names) {
+ VsmsummaryConfigBuilder::Fieldmap::Document document;
+ document.field = field_name;
+ field_map.document.emplace_back(document);
+ }
+ _summary.fieldmap.emplace_back(field_map);
+ _factory.reset();
+ }
+ void add_index(const vespalib::string& index_name, const std::vector<vespalib::string>& field_names)
+ {
+ if (_fields.documenttype.empty()) {
+ _fields.documenttype.resize(1);
+ _fields.documenttype.back().name = "dummy";
+ }
+ VsmfieldsConfigBuilder::Documenttype::Index index;
+ index.name = index_name;
+ for (auto& field_name : field_names) {
+ VsmfieldsConfigBuilder::Documenttype::Index::Field field;
+ field.name = field_name;
+ index.field.emplace_back(field);
+ }
+ _fields.documenttype.back().index.emplace_back(index);
+ _factory.reset();
+ }
+};
+
+
+KeywordExtractorFactoryTest::KeywordExtractorFactoryTest()
+ : testing::Test(),
+ _factory()
+{
+}
+
+KeywordExtractorFactoryTest::~KeywordExtractorFactoryTest() = default;
+
+TEST_F(KeywordExtractorFactoryTest, empty_config)
+{
+ EXPECT_FALSE(check_index("foo", "foo"));
+}
+
+TEST_F(KeywordExtractorFactoryTest, implied_identity_mapping_for_summary_field)
+{
+ add_index("foo", {"bar"});
+ EXPECT_FALSE(check_index("foo", "foo"));
+ EXPECT_TRUE(check_index("foo", "bar"));
+}
+
+TEST_F(KeywordExtractorFactoryTest, two_source_fields_for_summary_field)
+{
+ add_index("bar", {"bar"});
+ add_index("baz", {"baz"});
+ add_summary_field("foo", {"bar", "baz"});
+ EXPECT_FALSE(check_index("foo", "foo"));
+ EXPECT_TRUE(check_index("bar", "foo"));
+ EXPECT_TRUE(check_index("bar", "bar"));
+ EXPECT_TRUE(check_index("baz", "foo"));
+ EXPECT_TRUE(check_index("baz", "baz"));
+}
+
+TEST_F(KeywordExtractorFactoryTest, two_source_fields_for_summary_field_and_multiple_indexes)
+{
+ add_index("bar", {"bar"});
+ add_index("baz", {"baz"});
+ add_index("both", {"bar", "baz"});
+ add_summary_field("foo", {"bar", "baz"});
+ EXPECT_FALSE(check_index("foo", "foo"));
+ EXPECT_TRUE(check_index("both", "foo"));
+ EXPECT_TRUE(check_index("bar", "foo"));
+ EXPECT_TRUE(check_index("baz", "foo"));
+ EXPECT_TRUE(check_index("both", "bar"));
+ EXPECT_TRUE(check_index("bar", "bar"));
+ EXPECT_FALSE(check_index("baz", "bar"));
+ EXPECT_TRUE(check_index("both", "baz"));
+ EXPECT_FALSE(check_index("bar", "baz"));
+ EXPECT_TRUE(check_index("baz", "baz"));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()