diff options
41 files changed, 529 insertions, 529 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp index 2b3a3de3b45..16545058d67 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp @@ -10,8 +10,8 @@ #include <vespa/searchcore/proton/flushengine/shrink_lid_space_flush_target.h> #include <vespa/vespalib/util/lambdatask.h> #include <vespa/searchsummary/docsummary/docsum_field_writer_factory.h> -#include <vespa/searchsummary/docsummary/i_keyword_extractor.h> -#include <vespa/searchsummary/docsummary/legacy_keyword_extractor_factory.h> +#include <vespa/searchsummary/docsummary/i_query_term_filter.h> +#include <vespa/searchsummary/docsummary/legacy_query_term_filter_factory.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/fastlib/text/normwordfolder.h> #include <vespa/config-summary.h> @@ -96,8 +96,8 @@ SummarySetup(const vespalib::string & baseDir, const SummaryConfig & summaryCfg, _juniperConfig = std::make_unique<juniper::Juniper>(&_juniperProps, _wordFolder.get()); auto resultConfig = std::make_unique<ResultConfig>(); (void) schema; - std::unique_ptr<IKeywordExtractorFactory> keyword_extractor_factory = std::make_unique<LegacyKeywordExtractorFactory>(std::shared_ptr<IKeywordExtractor>()); - auto docsum_field_writer_factory = std::make_unique<DocsumFieldWriterFactory>(summaryCfg.usev8geopositions, *this, *keyword_extractor_factory); + std::unique_ptr<IQueryTermFilterFactory> query_term_filter_factory = std::make_unique<LegacyQueryTermFilterFactory>(std::shared_ptr<IQueryTermFilter>()); + auto docsum_field_writer_factory = std::make_unique<DocsumFieldWriterFactory>(summaryCfg.usev8geopositions, *this, *query_term_filter_factory); if (!resultConfig->readConfig(summaryCfg, make_string("SummaryManager(%s)", baseDir.c_str()).c_str(), *docsum_field_writer_factory)) { std::ostringstream oss; diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt index 451c90c752d..47d5756f3ee 100644 --- a/searchsummary/CMakeLists.txt +++ b/searchsummary/CMakeLists.txt @@ -20,8 +20,8 @@ vespa_define_module( src/tests/docsummary/attribute_combiner src/tests/docsummary/attributedfw src/tests/docsummary/document_id_dfw - src/tests/docsummary/keyword_extractor_factory src/tests/docsummary/matched_elements_filter + src/tests/docsummary/query_term_filter_factory src/tests/docsummary/result_class src/tests/docsummary/slime_filler src/tests/docsummary/slime_filler_filter diff --git a/searchsummary/src/tests/docsummary/keyword_extractor_factory/CMakeLists.txt b/searchsummary/src/tests/docsummary/keyword_extractor_factory/CMakeLists.txt deleted file mode 100644 index 1cb555f3bd8..00000000000 --- a/searchsummary/src/tests/docsummary/keyword_extractor_factory/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchsummary_keyword_extractor_factory_test_app TEST - SOURCES - keyword_extractor_factory_test.cpp - DEPENDS - searchsummary - GTest::GTest -) -vespa_add_test(NAME searchsummary_keyword_extractor_factory_test_app COMMAND searchsummary_keyword_extractor_factory_test_app) diff --git a/searchsummary/src/tests/docsummary/keyword_extractor_factory/keyword_extractor_factory_test.cpp b/searchsummary/src/tests/docsummary/keyword_extractor_factory/keyword_extractor_factory_test.cpp deleted file mode 100644 index f65df393ed9..00000000000 --- a/searchsummary/src/tests/docsummary/keyword_extractor_factory/keyword_extractor_factory_test.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/searchcommon/common/schema.h> -#include <vespa/searchsummary/docsummary/i_keyword_extractor.h> -#include <vespa/searchsummary/docsummary/keyword_extractor_factory.h> -#include <vespa/vespalib/gtest/gtest.h> - -using search::docsummary::IKeywordExtractor; -using search::docsummary::IKeywordExtractorFactory; -using search::docsummary::KeywordExtractorFactory; -using search::index::Schema; - -using FieldSet = Schema::FieldSet; - -class KeywordExtractorFactoryTest : public testing::Test { - std::unique_ptr<IKeywordExtractorFactory> _factory; - Schema _schema; - -protected: - KeywordExtractorFactoryTest(); - ~KeywordExtractorFactoryTest() override; - - void make_factory() { - _factory = std::make_unique<KeywordExtractorFactory>(_schema); - } - - bool check_index(const vespalib::string &index_name, const vespalib::string& summary_field) { - if (!_factory) { - make_factory(); - } - auto extractor = _factory->make(summary_field); - return extractor->isLegalIndex(index_name); - } - - void add_field_set(const vespalib::string& field_set_name, const std::vector<vespalib::string>& field_names) { - FieldSet field_set(field_set_name); - for (auto& field_name : field_names) { - field_set.addField(field_name); - } - _schema.addFieldSet(field_set); - _factory.reset(); - } -}; - - -KeywordExtractorFactoryTest::KeywordExtractorFactoryTest() - : testing::Test(), - _factory() -{ -} - -KeywordExtractorFactoryTest::~KeywordExtractorFactoryTest() = default; - -TEST_F(KeywordExtractorFactoryTest, empty_schema) -{ - EXPECT_TRUE(check_index("foo", "foo")); - EXPECT_FALSE(check_index("bar", "foo")); - EXPECT_FALSE(check_index("foo", "bar")); -} - -TEST_F(KeywordExtractorFactoryTest, field_set_is_checked) -{ - add_field_set("ab", {"cd", "de"}); - add_field_set("gh", {"cd"}); - add_field_set("default", {"de"}); - EXPECT_TRUE(check_index("cd", "cd")); - EXPECT_TRUE(check_index("ab", "cd")); - EXPECT_TRUE(check_index("gh", "cd")); - EXPECT_FALSE(check_index("default", "cd")); - EXPECT_FALSE(check_index("", "cd")); - EXPECT_TRUE(check_index("de", "de")); - EXPECT_TRUE(check_index("ab", "de")); - EXPECT_FALSE(check_index("gh", "de")); - EXPECT_TRUE(check_index("default", "de")); - EXPECT_TRUE(check_index("", "de")); -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchsummary/src/tests/docsummary/query_term_filter_factory/CMakeLists.txt b/searchsummary/src/tests/docsummary/query_term_filter_factory/CMakeLists.txt new file mode 100644 index 00000000000..12138948aca --- /dev/null +++ b/searchsummary/src/tests/docsummary/query_term_filter_factory/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchsummary_query_term_filter_factory_test_app TEST + SOURCES + query_term_filter_factory_test.cpp + DEPENDS + searchsummary + GTest::GTest +) +vespa_add_test(NAME searchsummary_query_term_filter_factory_test_app COMMAND searchsummary_query_term_filter_factory_test_app) diff --git a/searchsummary/src/tests/docsummary/query_term_filter_factory/query_term_filter_factory_test.cpp b/searchsummary/src/tests/docsummary/query_term_filter_factory/query_term_filter_factory_test.cpp new file mode 100644 index 00000000000..c966edb3958 --- /dev/null +++ b/searchsummary/src/tests/docsummary/query_term_filter_factory/query_term_filter_factory_test.cpp @@ -0,0 +1,78 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchsummary/docsummary/i_query_term_filter.h> +#include <vespa/searchsummary/docsummary/query_term_filter_factory.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::docsummary::IQueryTermFilter; +using search::docsummary::IQueryTermFilterFactory; +using search::docsummary::QueryTermFilterFactory; +using search::index::Schema; + +using FieldSet = Schema::FieldSet; + +class QueryTermFilterFactoryTest : public testing::Test { + std::unique_ptr<IQueryTermFilterFactory> _factory; + Schema _schema; + +protected: + QueryTermFilterFactoryTest(); + ~QueryTermFilterFactoryTest() override; + + void make_factory() { + _factory = std::make_unique<QueryTermFilterFactory>(_schema); + } + + bool check_view(const vespalib::string& view, const vespalib::string& summary_field) { + if (!_factory) { + make_factory(); + } + auto query_term_filter = _factory->make(summary_field); + return query_term_filter->use_view(view); + } + + void add_field_set(const vespalib::string& field_set_name, const std::vector<vespalib::string>& field_names) { + FieldSet field_set(field_set_name); + for (auto& field_name : field_names) { + field_set.addField(field_name); + } + _schema.addFieldSet(field_set); + _factory.reset(); + } +}; + + +QueryTermFilterFactoryTest::QueryTermFilterFactoryTest() + : testing::Test(), + _factory() +{ +} + +QueryTermFilterFactoryTest::~QueryTermFilterFactoryTest() = default; + +TEST_F(QueryTermFilterFactoryTest, empty_schema) +{ + EXPECT_TRUE(check_view("foo", "foo")); + EXPECT_FALSE(check_view("bar", "foo")); + EXPECT_FALSE(check_view("foo", "bar")); +} + +TEST_F(QueryTermFilterFactoryTest, field_set_is_checked) +{ + add_field_set("ab", {"cd", "de"}); + add_field_set("gh", {"cd"}); + add_field_set("default", {"de"}); + EXPECT_TRUE(check_view("cd", "cd")); + EXPECT_TRUE(check_view("ab", "cd")); + EXPECT_TRUE(check_view("gh", "cd")); + EXPECT_FALSE(check_view("default", "cd")); + EXPECT_FALSE(check_view("", "cd")); + EXPECT_TRUE(check_view("de", "de")); + EXPECT_TRUE(check_view("ab", "de")); + EXPECT_FALSE(check_view("gh", "de")); + EXPECT_TRUE(check_view("default", "de")); + EXPECT_TRUE(check_view("", "de")); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt index 34e902461f4..a95b2322947 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt +++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt @@ -23,13 +23,13 @@ vespa_add_library(searchsummary_docsummary OBJECT juniper_dfw_term_visitor.cpp juniper_query_adapter.cpp juniperproperties.cpp - keyword_extractor.cpp - keyword_extractor_factory.cpp - legacy_keyword_extractor.cpp - legacy_keyword_extractor_factory.cpp + legacy_query_term_filter.cpp + legacy_query_term_filter_factory.cpp linguisticsannotation.cpp matched_elements_filter_dfw.cpp positionsdfw.cpp + query_term_filter.cpp + query_term_filter_factory.cpp rankfeaturesdfw.cpp res_config_entry.cpp resultclass.cpp diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp index ee94b0d452d..d6f06c9161e 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp @@ -20,10 +20,10 @@ using vespalib::IllegalArgumentException; namespace search::docsummary { -DocsumFieldWriterFactory::DocsumFieldWriterFactory(bool use_v8_geo_positions, const IDocsumEnvironment& env, const IKeywordExtractorFactory& keyword_extractor_factory) +DocsumFieldWriterFactory::DocsumFieldWriterFactory(bool use_v8_geo_positions, const IDocsumEnvironment& env, const IQueryTermFilterFactory& query_term_filter_factory) : _use_v8_geo_positions(use_v8_geo_positions), _env(env), - _keyword_extractor_factory(keyword_extractor_factory), + _query_term_filter_factory(query_term_filter_factory), _matching_elems_fields(std::make_shared<MatchingElementsFields>()) { } @@ -66,7 +66,7 @@ DocsumFieldWriterFactory::create_docsum_field_writer(const vespalib::string& fie auto fw = std::make_unique<DynamicTeaserDFW>(getEnvironment().getJuniper()); auto fw_ptr = fw.get(); fieldWriter = std::move(fw); - if (!fw_ptr->Init(field_name.c_str(), source, _keyword_extractor_factory)) { + if (!fw_ptr->Init(field_name.c_str(), source, _query_term_filter_factory)) { throw IllegalArgumentException("Failed to initialize DynamicTeaserDFW."); } } else { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.h b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.h index 88fe5563193..e50fb85cca6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.h @@ -9,7 +9,7 @@ namespace search { class MatchingElementsFields; } namespace search::docsummary { class IDocsumEnvironment; -class IKeywordExtractorFactory; +class IQueryTermFilterFactory; /* * Factory class for creating docsum field writers. @@ -18,13 +18,13 @@ class DocsumFieldWriterFactory : public IDocsumFieldWriterFactory { bool _use_v8_geo_positions; const IDocsumEnvironment& _env; - const IKeywordExtractorFactory& _keyword_extractor_factory; + const IQueryTermFilterFactory& _query_term_filter_factory; protected: std::shared_ptr<MatchingElementsFields> _matching_elems_fields; const IDocsumEnvironment& getEnvironment() const noexcept { return _env; } bool has_attribute_manager() const noexcept; public: - DocsumFieldWriterFactory(bool use_v8_geo_positions, const IDocsumEnvironment& env, const IKeywordExtractorFactory& keyword_extractor_factory); + DocsumFieldWriterFactory(bool use_v8_geo_positions, const IDocsumEnvironment& env, const IQueryTermFilterFactory& query_term_filter_factory); ~DocsumFieldWriterFactory() override; std::unique_ptr<DocsumFieldWriter> create_docsum_field_writer(const vespalib::string& field_name, const vespalib::string& command, diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index 20cb9b47f4c..10bf986eebf 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -4,7 +4,7 @@ #include "docsumstate.h" #include "docsum_field_writer_state.h" #include "i_docsum_store_document.h" -#include "legacy_keyword_extractor.h" +#include "legacy_query_term_filter.h" #include <vespa/document/fieldvalue/fieldvalue.h> #include <vespa/searchlib/attribute/iattributemanager.h> #include <vespa/vespalib/util/issue.h> diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp index 3dfcc419408..d76e4af84d3 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp @@ -4,7 +4,7 @@ #include "docsumstate.h" #include "i_docsum_store_document.h" #include "i_juniper_converter.h" -#include "i_keyword_extractor_factory.h" +#include "i_query_term_filter_factory.h" #include "juniper_query_adapter.h" #include <vespa/document/fieldvalue/stringfieldvalue.h> #include <vespa/vespalib/objects/hexdump.h> @@ -23,7 +23,7 @@ JuniperDFW::JuniperDFW(const juniper::Juniper * juniper) : _input_field_name(), _juniperConfig(), _juniper(juniper), - _keyword_extractor() + _query_term_filter() { } @@ -34,7 +34,7 @@ bool JuniperDFW::Init( const char *fieldName, const vespalib::string& inputField, - const IKeywordExtractorFactory& keyword_extractor_factory) + const IQueryTermFilterFactory& query_term_filter_factory) { bool rc = true; _juniperConfig = _juniper->CreateConfig(fieldName); @@ -44,7 +44,7 @@ JuniperDFW::Init( } _input_field_name = inputField; - _keyword_extractor = keyword_extractor_factory.make(_input_field_name); + _query_term_filter = query_term_filter_factory.make(_input_field_name); return rc; } @@ -52,9 +52,9 @@ bool JuniperTeaserDFW::Init( const char *fieldName, const vespalib::string& inputField, - const IKeywordExtractorFactory& keyword_extractor_factory) + const IQueryTermFilterFactory& query_term_filter_factory) { - return JuniperDFW::Init(fieldName, inputField, keyword_extractor_factory); + return JuniperDFW::Init(fieldName, inputField, query_term_filter_factory); } void @@ -62,7 +62,7 @@ DynamicTeaserDFW::insert_juniper_field(uint32_t docid, vespalib::stringref input { auto& query = state._dynteaser.get_query(_input_field_name); if (!query) { - JuniperQueryAdapter iq(_keyword_extractor.get(), + JuniperQueryAdapter iq(_query_term_filter.get(), state._args.getStackDump(), &state._args.highlightTerms()); query = _juniper->CreateQueryHandle(iq, nullptr); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/i_keyword_extractor_factory.h b/searchsummary/src/vespa/searchsummary/docsummary/i_keyword_extractor_factory.h deleted file mode 100644 index 264af049251..00000000000 --- a/searchsummary/src/vespa/searchsummary/docsummary/i_keyword_extractor_factory.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/vespalib/stllike/string.h> -#include <memory> - -namespace search::docsummary { - -class IKeywordExtractor; - -/* - * Interface class for creating an instance of IKeywordExtractor for a - * specific input field. - */ -class IKeywordExtractorFactory -{ -public: - virtual ~IKeywordExtractorFactory() = default; - - virtual std::shared_ptr<const IKeywordExtractor> make(vespalib::stringref input_field) const = 0; -}; - -} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/i_keyword_extractor.h b/searchsummary/src/vespa/searchsummary/docsummary/i_query_term_filter.h index f9f2db871f9..a666a3f4198 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/i_keyword_extractor.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/i_query_term_filter.h @@ -10,12 +10,12 @@ namespace search::docsummary { * Interface class for checking if query term index name indicates that * related query term is useful from the perspective of juniper. */ -class IKeywordExtractor +class IQueryTermFilter { public: - virtual ~IKeywordExtractor() = default; + virtual ~IQueryTermFilter() = default; - virtual bool isLegalIndex(vespalib::stringref idx) const = 0; + virtual bool use_view(vespalib::stringref view) const = 0; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/i_query_term_filter_factory.h b/searchsummary/src/vespa/searchsummary/docsummary/i_query_term_filter_factory.h new file mode 100644 index 00000000000..ac8130f16f3 --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/i_query_term_filter_factory.h @@ -0,0 +1,24 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <memory> + +namespace search::docsummary { + +class IQueryTermFilter; + +/* + * Interface class for creating an instance of IQueryTermFilter for a + * specific input field. + */ +class IQueryTermFilterFactory +{ +public: + virtual ~IQueryTermFilterFactory() = default; + + virtual std::shared_ptr<const IQueryTermFilter> make(vespalib::stringref input_field) const = 0; +}; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.cpp index 360e97e0c04..80e21f0be96 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.cpp @@ -1,7 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "juniper_query_adapter.h" -#include "i_keyword_extractor.h" +#include "i_query_term_filter.h" #include "juniper_dfw_query_item.h" #include "juniper_dfw_term_visitor.h" #include <vespa/searchlib/fef/properties.h> @@ -10,9 +10,9 @@ namespace search::docsummary { -JuniperQueryAdapter::JuniperQueryAdapter(const IKeywordExtractor *kwExtractor, vespalib::stringref buf, +JuniperQueryAdapter::JuniperQueryAdapter(const IQueryTermFilter *query_term_filter, vespalib::stringref buf, const search::fef::Properties *highlightTerms) - : _kwExtractor(kwExtractor), + : _query_term_filter(query_term_filter), _buf(buf), _highlightTerms(highlightTerms) { @@ -154,11 +154,11 @@ JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const bool JuniperQueryAdapter::UsefulIndex(const juniper::QueryItem* item) const { - if (_kwExtractor == nullptr) { + if (_query_term_filter == nullptr) { return true; } auto index = item->get_index(); - return _kwExtractor->isLegalIndex(index); + return _query_term_filter->use_view(index); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.h b/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.h index 851f0d26746..3fe004cd3d6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.h @@ -10,7 +10,7 @@ namespace search::fef { class Properties; } namespace search::docsummary { -class IKeywordExtractor; +class IQueryTermFilter; /* * Class implementing an adapter used by juniper to examine the current @@ -19,14 +19,14 @@ class IKeywordExtractor; class JuniperQueryAdapter : public juniper::IQuery { private: - const IKeywordExtractor *_kwExtractor; + const IQueryTermFilter *_query_term_filter; const vespalib::stringref _buf; const search::fef::Properties *_highlightTerms; public: JuniperQueryAdapter(const JuniperQueryAdapter&) = delete; JuniperQueryAdapter operator= (const JuniperQueryAdapter&) = delete; - JuniperQueryAdapter(const IKeywordExtractor *kwExtractor, vespalib::stringref buf, + JuniperQueryAdapter(const IQueryTermFilter *query_term_filter, vespalib::stringref buf, const search::fef::Properties *highlightTerms = nullptr); ~JuniperQueryAdapter() override; bool skipItem(search::SimpleQueryStackDumpIterator *iterator) const; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h index 528268b8fff..a28b6273e4c 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h @@ -13,8 +13,8 @@ namespace vespalib::slime { struct Inserter; } namespace search::docsummary { -class IKeywordExtractor; -class IKeywordExtractorFactory; +class IQueryTermFilter; +class IQueryTermFilterFactory; class JuniperDFW : public DocsumFieldWriter { @@ -22,7 +22,7 @@ public: virtual bool Init( const char *fieldName, const vespalib::string& inputField, - const IKeywordExtractorFactory& keyword_extractor_factory); + const IQueryTermFilterFactory& query_term_filter_factory); protected: explicit JuniperDFW(const juniper::Juniper * juniper); ~JuniperDFW() override; @@ -30,7 +30,7 @@ protected: vespalib::string _input_field_name; std::unique_ptr<juniper::Config> _juniperConfig; const juniper::Juniper *_juniper; - std::shared_ptr<const IKeywordExtractor> _keyword_extractor; + std::shared_ptr<const IQueryTermFilter> _query_term_filter; private: bool isGenerated() const override { return false; } JuniperDFW(const JuniperDFW &); @@ -43,7 +43,7 @@ class JuniperTeaserDFW : public JuniperDFW public: bool Init(const char *fieldName, const vespalib::string& inputField, - const IKeywordExtractorFactory& keyword_extractor_factory) override; + const IQueryTermFilterFactory& query_term_filter_factory) override; protected: explicit JuniperTeaserDFW(const juniper::Juniper * juniper) : JuniperDFW(juniper) { } }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.cpp b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.cpp deleted file mode 100644 index 19c24cdc4f7..00000000000 --- a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "keyword_extractor.h" -#include <vespa/vespalib/stllike/hash_set.hpp> - -namespace search::docsummary { - -KeywordExtractor::KeywordExtractor(StringSet indexes) - : IKeywordExtractor(), - _indexes(std::move(indexes)) -{ - if (_indexes.contains("default")) { - _indexes.insert(""); - } -} - -KeywordExtractor::~KeywordExtractor() = default; - -bool -KeywordExtractor::isLegalIndex(vespalib::stringref idx) const -{ - return _indexes.contains(idx); -} - -} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.cpp b/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.cpp deleted file mode 100644 index f749e6e42a1..00000000000 --- a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "keyword_extractor_factory.h" -#include "keyword_extractor.h" -#include <vespa/searchcommon/common/schema.h> -#include <vespa/vespalib/stllike/hash_map.hpp> -#include <vespa/vespalib/stllike/hash_set.hpp> - -namespace search::docsummary { - -KeywordExtractorFactory::KeywordExtractorFactory(const search::index::Schema& schema) - : IKeywordExtractorFactory(), - _index_map() -{ - for (uint32_t i = 0; i < schema.getNumFieldSets(); ++i) { - auto& field_set = schema.getFieldSet(i); - auto& fields = field_set.getFields(); - for (auto& field : fields) { - auto& vec = _index_map[field]; - vec.emplace_back(field_set.getName()); - } - } -} - -KeywordExtractorFactory::~KeywordExtractorFactory() = default; - -std::shared_ptr<const IKeywordExtractor> -KeywordExtractorFactory::make(vespalib::stringref input_field) const -{ - vespalib::hash_set<vespalib::string> indexes; - indexes.insert(input_field); - auto itr = _index_map.find(input_field); - if (itr != _index_map.end()) { - for (auto& index : itr->second) { - indexes.insert(index); - } - } - return std::make_shared<KeywordExtractor>(std::move(indexes)); -} - -} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor_factory.cpp b/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor_factory.cpp deleted file mode 100644 index b967f7ffeff..00000000000 --- a/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor_factory.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "legacy_keyword_extractor_factory.h" - -namespace search::docsummary { - -LegacyKeywordExtractorFactory::LegacyKeywordExtractorFactory(std::shared_ptr<const IKeywordExtractor> keyword_extractor) - : IKeywordExtractorFactory(), - _keyword_extractor(std::move(keyword_extractor)) -{ -} - -LegacyKeywordExtractorFactory::~LegacyKeywordExtractorFactory() = default; - -std::shared_ptr<const IKeywordExtractor> -LegacyKeywordExtractorFactory::make(vespalib::stringref) const -{ - return _keyword_extractor; -} - -} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor_factory.h b/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor_factory.h deleted file mode 100644 index 630a37cec4d..00000000000 --- a/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor_factory.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "i_keyword_extractor_factory.h" - -namespace search::docsummary { - -/* - * Class for creating an instance of IKeywordExtractor. - */ -class LegacyKeywordExtractorFactory : public IKeywordExtractorFactory -{ - std::shared_ptr<const IKeywordExtractor> _keyword_extractor; -public: - explicit LegacyKeywordExtractorFactory(std::shared_ptr<const IKeywordExtractor> keyword_extractor); - virtual ~LegacyKeywordExtractorFactory(); - std::shared_ptr<const IKeywordExtractor> make(vespalib::stringref) const override; -}; - -} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor.cpp b/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter.cpp index b3c644ad36d..9cc392fc219 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter.cpp @@ -1,6 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "legacy_keyword_extractor.h" +#include "legacy_query_term_filter.h" #include "idocsumenvironment.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/vespalib/stllike/hashtable.hpp> @@ -17,37 +17,37 @@ bool useful(search::ParseItem::ItemCreator creator) } -LegacyKeywordExtractor::LegacyKeywordExtractor() - : IKeywordExtractor(), +LegacyQueryTermFilter::LegacyQueryTermFilter() + : IQueryTermFilter(), _legalPrefixes(), _legalIndexes() { } -LegacyKeywordExtractor::~LegacyKeywordExtractor() = default; +LegacyQueryTermFilter::~LegacyQueryTermFilter() = default; bool -LegacyKeywordExtractor::isLegalIndexName(const char *idxName) const +LegacyQueryTermFilter::isLegalIndexName(const char *idxName) const { return _legalIndexes.find(idxName) != _legalIndexes.end(); } -LegacyKeywordExtractor::IndexPrefix::IndexPrefix(const char *prefix) noexcept +LegacyQueryTermFilter::IndexPrefix::IndexPrefix(const char *prefix) noexcept : _prefix(prefix) { } -LegacyKeywordExtractor::IndexPrefix::~IndexPrefix() = default; +LegacyQueryTermFilter::IndexPrefix::~IndexPrefix() = default; bool -LegacyKeywordExtractor::IndexPrefix::Match(const char *idxName) const +LegacyQueryTermFilter::IndexPrefix::Match(const char *idxName) const { return vespalib::starts_with(idxName, _prefix); } void -LegacyKeywordExtractor::addLegalIndexSpec(const char *spec) +LegacyQueryTermFilter::addLegalIndexSpec(const char *spec) { if (spec == nullptr) return; @@ -85,7 +85,7 @@ LegacyKeywordExtractor::addLegalIndexSpec(const char *spec) vespalib::string -LegacyKeywordExtractor::getLegalIndexSpec() +LegacyQueryTermFilter::getLegalIndexSpec() { vespalib::string spec; @@ -110,12 +110,12 @@ LegacyKeywordExtractor::getLegalIndexSpec() bool -LegacyKeywordExtractor::isLegalIndex(vespalib::stringref idx) const +LegacyQueryTermFilter::use_view(vespalib::stringref view) const { vespalib::string resolvedIdxName; - if ( ! idx.empty() ) { - resolvedIdxName = idx; + if ( ! view.empty() ) { + resolvedIdxName = view; } else { resolvedIdxName = "__defaultindex"; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor.h b/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter.h index b339294f2f1..dfda568bd9b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/legacy_keyword_extractor.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter.h @@ -2,12 +2,12 @@ #pragma once -#include "i_keyword_extractor.h" +#include "i_query_term_filter.h" #include <vespa/vespalib/stllike/hash_set.h> namespace search::docsummary { -class LegacyKeywordExtractor : public IKeywordExtractor +class LegacyQueryTermFilter : public IQueryTermFilter { public: @@ -44,10 +44,10 @@ private: } bool isLegalIndexName(const char *idxName) const; public: - LegacyKeywordExtractor(); - LegacyKeywordExtractor(const LegacyKeywordExtractor &) = delete; - LegacyKeywordExtractor& operator=(const LegacyKeywordExtractor &) = delete; - ~LegacyKeywordExtractor(); + LegacyQueryTermFilter(); + LegacyQueryTermFilter(const LegacyQueryTermFilter &) = delete; + LegacyQueryTermFilter& operator=(const LegacyQueryTermFilter &) = delete; + ~LegacyQueryTermFilter(); /** @@ -78,7 +78,7 @@ public: * * @return true if the given index name is legal. **/ - bool isLegalIndex(vespalib::stringref idx) const override; + bool use_view(vespalib::stringref idx) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter_factory.cpp b/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter_factory.cpp new file mode 100644 index 00000000000..77ccd494fac --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter_factory.cpp @@ -0,0 +1,21 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "legacy_query_term_filter_factory.h" + +namespace search::docsummary { + +LegacyQueryTermFilterFactory::LegacyQueryTermFilterFactory(std::shared_ptr<const IQueryTermFilter> query_term_filter) + : IQueryTermFilterFactory(), + _query_term_filter(std::move(query_term_filter)) +{ +} + +LegacyQueryTermFilterFactory::~LegacyQueryTermFilterFactory() = default; + +std::shared_ptr<const IQueryTermFilter> +LegacyQueryTermFilterFactory::make(vespalib::stringref) const +{ + return _query_term_filter; +} + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter_factory.h b/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter_factory.h new file mode 100644 index 00000000000..2e7fdd63204 --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/legacy_query_term_filter_factory.h @@ -0,0 +1,21 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "i_query_term_filter_factory.h" + +namespace search::docsummary { + +/* + * Class for creating an instance of IQueryTermFilter. + */ +class LegacyQueryTermFilterFactory : public IQueryTermFilterFactory +{ + std::shared_ptr<const IQueryTermFilter> _query_term_filter; +public: + explicit LegacyQueryTermFilterFactory(std::shared_ptr<const IQueryTermFilter> query_term_filter); + virtual ~LegacyQueryTermFilterFactory(); + std::shared_ptr<const IQueryTermFilter> make(vespalib::stringref) const override; +}; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter.cpp new file mode 100644 index 00000000000..a8be797ae98 --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter.cpp @@ -0,0 +1,25 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "query_term_filter.h" +#include <vespa/vespalib/stllike/hash_set.hpp> + +namespace search::docsummary { + +QueryTermFilter::QueryTermFilter(StringSet views) + : IQueryTermFilter(), + _views(std::move(views)) +{ + if (_views.contains("default")) { + _views.insert(""); + } +} + +QueryTermFilter::~QueryTermFilter() = default; + +bool +QueryTermFilter::use_view(vespalib::stringref view) const +{ + return _views.contains(view); +} + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.h b/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter.h index a2b1fba96f1..e5b0949bf29 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter.h @@ -2,7 +2,7 @@ #pragma once -#include "i_keyword_extractor.h" +#include "i_query_term_filter.h" #include <vespa/vespalib/stllike/hash_set.h> namespace search::docsummary { @@ -11,14 +11,14 @@ namespace search::docsummary { * Class for checking if query term index name indicates that * related query term is useful from the perspective of juniper. */ -class KeywordExtractor : public IKeywordExtractor +class QueryTermFilter : public IQueryTermFilter { using StringSet = vespalib::hash_set<vespalib::string>; - StringSet _indexes; + StringSet _views; public: - KeywordExtractor(StringSet indexes); - ~KeywordExtractor() override; - bool isLegalIndex(vespalib::stringref idx) const override; + QueryTermFilter(StringSet views); + ~QueryTermFilter() override; + bool use_view(vespalib::stringref view) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter_factory.cpp b/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter_factory.cpp new file mode 100644 index 00000000000..69a67d2461c --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter_factory.cpp @@ -0,0 +1,41 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "query_term_filter_factory.h" +#include "query_term_filter.h" +#include <vespa/searchcommon/common/schema.h> +#include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/stllike/hash_set.hpp> + +namespace search::docsummary { + +QueryTermFilterFactory::QueryTermFilterFactory(const search::index::Schema& schema) + : IQueryTermFilterFactory(), + _view_map() +{ + for (uint32_t i = 0; i < schema.getNumFieldSets(); ++i) { + auto& field_set = schema.getFieldSet(i); + auto& fields = field_set.getFields(); + for (auto& field : fields) { + auto& vec = _view_map[field]; + vec.emplace_back(field_set.getName()); + } + } +} + +QueryTermFilterFactory::~QueryTermFilterFactory() = default; + +std::shared_ptr<const IQueryTermFilter> +QueryTermFilterFactory::make(vespalib::stringref input_field) const +{ + vespalib::hash_set<vespalib::string> views; + views.insert(input_field); + auto itr = _view_map.find(input_field); + if (itr != _view_map.end()) { + for (auto& index : itr->second) { + views.insert(index); + } + } + return std::make_shared<QueryTermFilter>(std::move(views)); +} + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.h b/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter_factory.h index e22475eb842..d125aa78c33 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/keyword_extractor_factory.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/query_term_filter_factory.h @@ -2,7 +2,7 @@ #pragma once -#include "i_keyword_extractor_factory.h" +#include "i_query_term_filter_factory.h" #include <vespa/searchcommon/common/schema.h> #include <vespa/vespalib/stllike/hash_map.h> #include <vector> @@ -12,15 +12,15 @@ namespace search::index { class Schema; } namespace search::docsummary { /* - * Class for creating an instance of IKeywordExtractor. + * Class for creating an instance of IQueryTermFilter. */ -class KeywordExtractorFactory : public IKeywordExtractorFactory +class QueryTermFilterFactory : public IQueryTermFilterFactory { - vespalib::hash_map<vespalib::string, std::vector<vespalib::string>> _index_map; + vespalib::hash_map<vespalib::string, std::vector<vespalib::string>> _view_map; public: - KeywordExtractorFactory(const search::index::Schema& schema); - ~KeywordExtractorFactory() override; - std::shared_ptr<const IKeywordExtractor> make(vespalib::stringref input_field) const override; + QueryTermFilterFactory(const search::index::Schema& schema); + ~QueryTermFilterFactory() override; + std::shared_ptr<const IQueryTermFilter> make(vespalib::stringref input_field) const override; }; } diff --git a/streamingvisitors/CMakeLists.txt b/streamingvisitors/CMakeLists.txt index adfee1a76ae..2c7f01ddf37 100644 --- a/streamingvisitors/CMakeLists.txt +++ b/streamingvisitors/CMakeLists.txt @@ -26,7 +26,7 @@ vespa_define_module( src/tests/charbuffer src/tests/docsum src/tests/document - src/tests/keyword_extractor_factory + src/tests/query_term_filter_factory src/tests/searcher src/tests/textutil ) diff --git a/streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt b/streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt deleted file mode 100644 index 54e2368f200..00000000000 --- a/streamingvisitors/src/tests/keyword_extractor_factory/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(streamingvisitors_keyword_extractor_factory_test_app TEST - SOURCES - keyword_extractor_factory_test.cpp - DEPENDS - streamingvisitors - GTest::GTest -) -vespa_add_test(NAME streamingvisitors_keyword_extractor_factory_test_app COMMAND streamingvisitors_keyword_extractor_factory_test_app) diff --git a/streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp b/streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp deleted file mode 100644 index e06217a35f0..00000000000 --- a/streamingvisitors/src/tests/keyword_extractor_factory/keyword_extractor_factory_test.cpp +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/searchsummary/docsummary/i_keyword_extractor.h> -#include <vespa/vsm/vsm/keyword_extractor_factory.h> -#include <vespa/vespalib/gtest/gtest.h> - -using search::docsummary::IKeywordExtractor; -using search::docsummary::IKeywordExtractorFactory; -using vespa::config::search::vsm::VsmfieldsConfig; -using vespa::config::search::vsm::VsmfieldsConfigBuilder; -using vespa::config::search::vsm::VsmsummaryConfig; -using vespa::config::search::vsm::VsmsummaryConfigBuilder; -using vsm::KeywordExtractorFactory; - -class KeywordExtractorFactoryTest : public testing::Test { - std::unique_ptr<IKeywordExtractorFactory> _factory; - VsmfieldsConfigBuilder _fields; - VsmsummaryConfigBuilder _summary; -protected: - KeywordExtractorFactoryTest(); - ~KeywordExtractorFactoryTest() override; - - void make_factory() { - _factory = std::make_unique<KeywordExtractorFactory>(_fields, _summary); - } - - bool check_index(const vespalib::string &index_name, const vespalib::string& summary_field) { - if (!_factory) { - make_factory(); - } - auto extractor = _factory->make(summary_field); - return extractor->isLegalIndex(index_name); - } - - void add_summary_field(const vespalib::string& summary_field_name, const std::vector<vespalib::string>& field_names) - { - VsmsummaryConfigBuilder::Fieldmap field_map; - field_map.summary = summary_field_name; - for (auto& field_name : field_names) { - VsmsummaryConfigBuilder::Fieldmap::Document document; - document.field = field_name; - field_map.document.emplace_back(document); - } - _summary.fieldmap.emplace_back(field_map); - _factory.reset(); - } - void add_index(const vespalib::string& index_name, const std::vector<vespalib::string>& field_names) - { - if (_fields.documenttype.empty()) { - _fields.documenttype.resize(1); - _fields.documenttype.back().name = "dummy"; - } - VsmfieldsConfigBuilder::Documenttype::Index index; - index.name = index_name; - for (auto& field_name : field_names) { - VsmfieldsConfigBuilder::Documenttype::Index::Field field; - field.name = field_name; - index.field.emplace_back(field); - } - _fields.documenttype.back().index.emplace_back(index); - _factory.reset(); - } -}; - - -KeywordExtractorFactoryTest::KeywordExtractorFactoryTest() - : testing::Test(), - _factory() -{ -} - -KeywordExtractorFactoryTest::~KeywordExtractorFactoryTest() = default; - -TEST_F(KeywordExtractorFactoryTest, empty_config) -{ - EXPECT_FALSE(check_index("foo", "foo")); -} - -TEST_F(KeywordExtractorFactoryTest, implied_identity_mapping_for_summary_field) -{ - add_index("foo", {"bar"}); - EXPECT_FALSE(check_index("foo", "foo")); - EXPECT_TRUE(check_index("foo", "bar")); -} - -TEST_F(KeywordExtractorFactoryTest, two_source_fields_for_summary_field) -{ - add_index("bar", {"bar"}); - add_index("baz", {"baz"}); - add_summary_field("foo", {"bar", "baz"}); - EXPECT_FALSE(check_index("foo", "foo")); - EXPECT_TRUE(check_index("bar", "foo")); - EXPECT_TRUE(check_index("bar", "bar")); - EXPECT_TRUE(check_index("baz", "foo")); - EXPECT_TRUE(check_index("baz", "baz")); -} - -TEST_F(KeywordExtractorFactoryTest, two_source_fields_for_summary_field_and_multiple_indexes) -{ - add_index("bar", {"bar"}); - add_index("baz", {"baz"}); - add_index("both", {"bar", "baz"}); - add_index("default", {"baz"}); - add_summary_field("foo", {"bar", "baz"}); - EXPECT_FALSE(check_index("foo", "foo")); - EXPECT_TRUE(check_index("both", "foo")); - EXPECT_TRUE(check_index("bar", "foo")); - EXPECT_TRUE(check_index("baz", "foo")); - EXPECT_TRUE(check_index("default", "foo")); - EXPECT_TRUE(check_index("", "foo")); - EXPECT_TRUE(check_index("both", "bar")); - EXPECT_TRUE(check_index("bar", "bar")); - EXPECT_FALSE(check_index("baz", "bar")); - EXPECT_FALSE(check_index("default", "bar")); - EXPECT_FALSE(check_index("", "bar")); - EXPECT_TRUE(check_index("both", "baz")); - EXPECT_FALSE(check_index("bar", "baz")); - EXPECT_TRUE(check_index("baz", "baz")); - EXPECT_TRUE(check_index("default", "baz")); - EXPECT_TRUE(check_index("", "baz")); -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/streamingvisitors/src/tests/query_term_filter_factory/CMakeLists.txt b/streamingvisitors/src/tests/query_term_filter_factory/CMakeLists.txt new file mode 100644 index 00000000000..219602a8c03 --- /dev/null +++ b/streamingvisitors/src/tests/query_term_filter_factory/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(streamingvisitors_query_term_filter_factory_test_app TEST + SOURCES + query_term_filter_factory_test.cpp + DEPENDS + streamingvisitors + GTest::GTest +) +vespa_add_test(NAME streamingvisitors_query_term_filter_factory_test_app COMMAND streamingvisitors_query_term_filter_factory_test_app) diff --git a/streamingvisitors/src/tests/query_term_filter_factory/query_term_filter_factory_test.cpp b/streamingvisitors/src/tests/query_term_filter_factory/query_term_filter_factory_test.cpp new file mode 100644 index 00000000000..54eaa8b0ee1 --- /dev/null +++ b/streamingvisitors/src/tests/query_term_filter_factory/query_term_filter_factory_test.cpp @@ -0,0 +1,123 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchsummary/docsummary/i_query_term_filter.h> +#include <vespa/vsm/vsm/query_term_filter_factory.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::docsummary::IQueryTermFilter; +using search::docsummary::IQueryTermFilterFactory; +using vespa::config::search::vsm::VsmfieldsConfig; +using vespa::config::search::vsm::VsmfieldsConfigBuilder; +using vespa::config::search::vsm::VsmsummaryConfig; +using vespa::config::search::vsm::VsmsummaryConfigBuilder; +using vsm::QueryTermFilterFactory; + +class QueryTermFilterFactoryTest : public testing::Test { + std::unique_ptr<IQueryTermFilterFactory> _factory; + VsmfieldsConfigBuilder _fields; + VsmsummaryConfigBuilder _summary; +protected: + QueryTermFilterFactoryTest(); + ~QueryTermFilterFactoryTest() override; + + void make_factory() { + _factory = std::make_unique<QueryTermFilterFactory>(_fields, _summary); + } + + bool check_view(const vespalib::string& view, const vespalib::string& summary_field) { + if (!_factory) { + make_factory(); + } + auto query_term_filter = _factory->make(summary_field); + return query_term_filter->use_view(view); + } + + void add_summary_field(const vespalib::string& summary_field_name, const std::vector<vespalib::string>& field_names) + { + VsmsummaryConfigBuilder::Fieldmap field_map; + field_map.summary = summary_field_name; + for (auto& field_name : field_names) { + VsmsummaryConfigBuilder::Fieldmap::Document document; + document.field = field_name; + field_map.document.emplace_back(document); + } + _summary.fieldmap.emplace_back(field_map); + _factory.reset(); + } + void add_index(const vespalib::string& index_name, const std::vector<vespalib::string>& field_names) + { + if (_fields.documenttype.empty()) { + _fields.documenttype.resize(1); + _fields.documenttype.back().name = "dummy"; + } + VsmfieldsConfigBuilder::Documenttype::Index index; + index.name = index_name; + for (auto& field_name : field_names) { + VsmfieldsConfigBuilder::Documenttype::Index::Field field; + field.name = field_name; + index.field.emplace_back(field); + } + _fields.documenttype.back().index.emplace_back(index); + _factory.reset(); + } +}; + + +QueryTermFilterFactoryTest::QueryTermFilterFactoryTest() + : testing::Test(), + _factory() +{ +} + +QueryTermFilterFactoryTest::~QueryTermFilterFactoryTest() = default; + +TEST_F(QueryTermFilterFactoryTest, empty_config) +{ + EXPECT_FALSE(check_view("foo", "foo")); +} + +TEST_F(QueryTermFilterFactoryTest, implied_identity_mapping_for_summary_field) +{ + add_index("foo", {"bar"}); + EXPECT_FALSE(check_view("foo", "foo")); + EXPECT_TRUE(check_view("foo", "bar")); +} + +TEST_F(QueryTermFilterFactoryTest, two_source_fields_for_summary_field) +{ + add_index("bar", {"bar"}); + add_index("baz", {"baz"}); + add_summary_field("foo", {"bar", "baz"}); + EXPECT_FALSE(check_view("foo", "foo")); + EXPECT_TRUE(check_view("bar", "foo")); + EXPECT_TRUE(check_view("bar", "bar")); + EXPECT_TRUE(check_view("baz", "foo")); + EXPECT_TRUE(check_view("baz", "baz")); +} + +TEST_F(QueryTermFilterFactoryTest, two_source_fields_for_summary_field_and_multiple_indexes) +{ + add_index("bar", {"bar"}); + add_index("baz", {"baz"}); + add_index("both", {"bar", "baz"}); + add_index("default", {"baz"}); + add_summary_field("foo", {"bar", "baz"}); + EXPECT_FALSE(check_view("foo", "foo")); + EXPECT_TRUE(check_view("both", "foo")); + EXPECT_TRUE(check_view("bar", "foo")); + EXPECT_TRUE(check_view("baz", "foo")); + EXPECT_TRUE(check_view("default", "foo")); + EXPECT_TRUE(check_view("", "foo")); + EXPECT_TRUE(check_view("both", "bar")); + EXPECT_TRUE(check_view("bar", "bar")); + EXPECT_FALSE(check_view("baz", "bar")); + EXPECT_FALSE(check_view("default", "bar")); + EXPECT_FALSE(check_view("", "bar")); + EXPECT_TRUE(check_view("both", "baz")); + EXPECT_FALSE(check_view("bar", "baz")); + EXPECT_TRUE(check_view("baz", "baz")); + EXPECT_TRUE(check_view("default", "baz")); + EXPECT_TRUE(check_view("", "baz")); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt index 67acbc1a391..741d2d7a731 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt +++ b/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt @@ -6,7 +6,7 @@ vespa_add_library(vsm_vsmbase OBJECT docsum_field_writer_factory.cpp fieldsearchspec.cpp flattendocsumwriter.cpp - keyword_extractor_factory.cpp + query_term_filter_factory.cpp snippetmodifier.cpp vsm-adapter.cpp DEPENDS diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsum_field_writer_factory.cpp b/streamingvisitors/src/vespa/vsm/vsm/docsum_field_writer_factory.cpp index f3c383a4ba5..36873b713aa 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/docsum_field_writer_factory.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/docsum_field_writer_factory.cpp @@ -14,7 +14,7 @@ using search::docsummary::CopyDFW; using search::docsummary::DocsumFieldWriter; using search::docsummary::EmptyDFW; using search::docsummary::IDocsumEnvironment; -using search::docsummary::IKeywordExtractorFactory; +using search::docsummary::IQueryTermFilterFactory; using search::docsummary::MatchedElementsFilterDFW; using vespa::config::search::vsm::VsmfieldsConfig; @@ -37,8 +37,8 @@ void populate_fields(MatchingElementsFields& fields, VsmfieldsConfig& fields_con } -DocsumFieldWriterFactory::DocsumFieldWriterFactory(bool use_v8_geo_positions, const IDocsumEnvironment& env, const IKeywordExtractorFactory& keyword_extractor_factory, const vespa::config::search::vsm::VsmfieldsConfig& vsm_fields_config) - : search::docsummary::DocsumFieldWriterFactory(use_v8_geo_positions, env, keyword_extractor_factory), +DocsumFieldWriterFactory::DocsumFieldWriterFactory(bool use_v8_geo_positions, const IDocsumEnvironment& env, const IQueryTermFilterFactory& query_term_filter_factory, const vespa::config::search::vsm::VsmfieldsConfig& vsm_fields_config) + : search::docsummary::DocsumFieldWriterFactory(use_v8_geo_positions, env, query_term_filter_factory), _vsm_fields_config(vsm_fields_config) { } diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsum_field_writer_factory.h b/streamingvisitors/src/vespa/vsm/vsm/docsum_field_writer_factory.h index 81acd0c7668..078c466d3d2 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/docsum_field_writer_factory.h +++ b/streamingvisitors/src/vespa/vsm/vsm/docsum_field_writer_factory.h @@ -16,7 +16,7 @@ class DocsumFieldWriterFactory : public search::docsummary::DocsumFieldWriterFac const vespa::config::search::vsm::VsmfieldsConfig& _vsm_fields_config; public: - DocsumFieldWriterFactory(bool use_v8_geo_positions, const search::docsummary::IDocsumEnvironment& env, const search::docsummary::IKeywordExtractorFactory& keyword_extractor_factory, const vespa::config::search::vsm::VsmfieldsConfig& vsm_fields_config); + DocsumFieldWriterFactory(bool use_v8_geo_positions, const search::docsummary::IDocsumEnvironment& env, const search::docsummary::IQueryTermFilterFactory& query_term_filter_factory, const vespa::config::search::vsm::VsmfieldsConfig& vsm_fields_config); ~DocsumFieldWriterFactory() override; std::unique_ptr<search::docsummary::DocsumFieldWriter> create_docsum_field_writer(const vespalib::string& field_name, diff --git a/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.cpp b/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.cpp deleted file mode 100644 index 5319f554c81..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "keyword_extractor_factory.h" -#include <vespa/searchsummary/docsummary/keyword_extractor.h> -#include <vespa/vespalib/stllike/hash_map.hpp> -#include <vespa/vespalib/stllike/hash_set.hpp> -#include <cassert> -#include <vespa/log/log.h> -LOG_SETUP(".vsm.keyword_extractor_factory"); - -using search::docsummary::IKeywordExtractor; -using search::docsummary::IKeywordExtractorFactory; -using search::docsummary::KeywordExtractor; -using vespa::config::search::vsm::VsmfieldsConfig; -using vespa::config::search::vsm::VsmsummaryConfig; - -namespace vsm { - -KeywordExtractorFactory::KeywordExtractorFactory(VsmfieldsConfig& vsm_fields_config, - VsmsummaryConfig& vsm_summary_config) - : IKeywordExtractorFactory(), - _index_map(), - _field_map() -{ - populate_index_map(vsm_fields_config); - populate_field_map(vsm_summary_config); -} - -KeywordExtractorFactory::~KeywordExtractorFactory() = default; - -void -KeywordExtractorFactory::populate_index_map(VsmfieldsConfig& vsm_fields_config) -{ - for (auto& doctype : vsm_fields_config.documenttype) { - for (auto& index : doctype.index) { - for (auto& field : index.field) { - _index_map[field.name].insert(index.name); - } - } - } -} - -void -KeywordExtractorFactory::populate_field_map(VsmsummaryConfig& vsm_summary_config) -{ - for (auto& summary_field : vsm_summary_config.fieldmap) { - for (auto& document : summary_field.document) { - _field_map[summary_field.summary].insert(document.field); - } - } -} - -void -KeywordExtractorFactory::populate_indexes(StringSet& indexes, const vespalib::string& field) const -{ - auto itr = _index_map.find(field); - if (itr != _index_map.end()) { - for (auto& index : itr->second) { - indexes.insert(index); - } - } -} - -std::shared_ptr<const IKeywordExtractor> -KeywordExtractorFactory::make(vespalib::stringref input_field) const -{ - StringSet indexes; - auto itr = _field_map.find(input_field); - if (itr != _field_map.end()) { - for (auto& field : itr->second) { - populate_indexes(indexes, field); - } - } else { - // Assume identity mapping vsm summary field -> document field - populate_indexes(indexes, input_field); - } - return std::make_shared<KeywordExtractor>(std::move(indexes)); -} - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/query_term_filter_factory.cpp b/streamingvisitors/src/vespa/vsm/vsm/query_term_filter_factory.cpp new file mode 100644 index 00000000000..262a557334e --- /dev/null +++ b/streamingvisitors/src/vespa/vsm/vsm/query_term_filter_factory.cpp @@ -0,0 +1,80 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "query_term_filter_factory.h" +#include <vespa/searchsummary/docsummary/query_term_filter.h> +#include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/stllike/hash_set.hpp> +#include <cassert> +#include <vespa/log/log.h> +LOG_SETUP(".vsm.query_term_filter_factory"); + +using search::docsummary::IQueryTermFilter; +using search::docsummary::IQueryTermFilterFactory; +using search::docsummary::QueryTermFilter; +using vespa::config::search::vsm::VsmfieldsConfig; +using vespa::config::search::vsm::VsmsummaryConfig; + +namespace vsm { + +QueryTermFilterFactory::QueryTermFilterFactory(VsmfieldsConfig& vsm_fields_config, + VsmsummaryConfig& vsm_summary_config) + : IQueryTermFilterFactory(), + _view_map(), + _field_map() +{ + populate_view_map(vsm_fields_config); + populate_field_map(vsm_summary_config); +} + +QueryTermFilterFactory::~QueryTermFilterFactory() = default; + +void +QueryTermFilterFactory::populate_view_map(VsmfieldsConfig& vsm_fields_config) +{ + for (auto& doctype : vsm_fields_config.documenttype) { + for (auto& index : doctype.index) { + for (auto& field : index.field) { + _view_map[field.name].insert(index.name); + } + } + } +} + +void +QueryTermFilterFactory::populate_field_map(VsmsummaryConfig& vsm_summary_config) +{ + for (auto& summary_field : vsm_summary_config.fieldmap) { + for (auto& document : summary_field.document) { + _field_map[summary_field.summary].insert(document.field); + } + } +} + +void +QueryTermFilterFactory::populate_views(StringSet& views, const vespalib::string& field) const +{ + auto itr = _view_map.find(field); + if (itr != _view_map.end()) { + for (auto& index : itr->second) { + views.insert(index); + } + } +} + +std::shared_ptr<const IQueryTermFilter> +QueryTermFilterFactory::make(vespalib::stringref input_field) const +{ + StringSet views; + auto itr = _field_map.find(input_field); + if (itr != _field_map.end()) { + for (auto& field : itr->second) { + populate_views(views, field); + } + } else { + // Assume identity mapping vsm summary field -> document field + populate_views(views, input_field); + } + return std::make_shared<QueryTermFilter>(std::move(views)); +} + +} diff --git a/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.h b/streamingvisitors/src/vespa/vsm/vsm/query_term_filter_factory.h index 6ffcbd6f84b..a0f518b90b0 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/keyword_extractor_factory.h +++ b/streamingvisitors/src/vespa/vsm/vsm/query_term_filter_factory.h @@ -2,7 +2,7 @@ #pragma once -#include <vespa/searchsummary/docsummary/i_keyword_extractor_factory.h> +#include <vespa/searchsummary/docsummary/i_query_term_filter_factory.h> #include <vespa/vespalib/stllike/hash_map.h> #include <vespa/vespalib/stllike/hash_set.h> #include <vespa/vsm/config/config-vsmfields.h> @@ -11,12 +11,12 @@ namespace vsm { /* - * Class for creating an instance of IKeywordExtractor for streaming search. + * Class for creating an instance of IQueryTermFilter for streaming search. * * vsm summary fields are treated as document fields by the summary framework * in the searchsummary module, cf. IDocsumStoreDocument. */ -class KeywordExtractorFactory : public search::docsummary::IKeywordExtractorFactory +class QueryTermFilterFactory : public search::docsummary::IQueryTermFilterFactory { public: using VsmfieldsConfig = vespa::config::search::vsm::VsmfieldsConfig; @@ -24,16 +24,16 @@ public: private: using StringSet = vespalib::hash_set<vespalib::string>; using StringSetMap = vespalib::hash_map<vespalib::string, StringSet>; - StringSetMap _index_map; // document field -> indexes + StringSetMap _view_map; // document field -> views StringSetMap _field_map; // vsm summary field -> document fields - void populate_index_map(VsmfieldsConfig& vsm_fields_config); + void populate_view_map(VsmfieldsConfig& vsm_fields_config); void populate_field_map(VsmsummaryConfig& vsm_summary_config); - void populate_indexes(StringSet& indexes, const vespalib::string& field) const; + void populate_views(StringSet& views, const vespalib::string& field) const; public: - KeywordExtractorFactory(VsmfieldsConfig& vsm_fields_config, + QueryTermFilterFactory(VsmfieldsConfig& vsm_fields_config, VsmsummaryConfig& vsm_summary_config); - ~KeywordExtractorFactory() override; - std::shared_ptr<const search::docsummary::IKeywordExtractor> make(vespalib::stringref input_field) const override; + ~QueryTermFilterFactory() override; + std::shared_ptr<const search::docsummary::IQueryTermFilter> make(vespalib::stringref input_field) const override; }; } diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp index 13442eac1f8..8e5c5cfd8f1 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp @@ -4,17 +4,17 @@ #include "docsum_field_writer_factory.h" #include "i_matching_elements_filler.h" #include <vespa/searchlib/common/matching_elements.h> -#include <vespa/searchsummary/docsummary/legacy_keyword_extractor.h> -#include <vespa/searchsummary/docsummary/legacy_keyword_extractor_factory.h> +#include <vespa/searchsummary/docsummary/legacy_query_term_filter.h> +#include <vespa/searchsummary/docsummary/legacy_query_term_filter_factory.h> #include <vespa/searchsummary/config/config-juniperrc.h> #include <vespa/log/log.h> LOG_SETUP(".vsm.vsm-adapter"); -using search::docsummary::IKeywordExtractorFactory; +using search::docsummary::IQueryTermFilterFactory; using search::docsummary::ResConfigEntry; -using search::docsummary::LegacyKeywordExtractor; -using search::docsummary::LegacyKeywordExtractorFactory; +using search::docsummary::LegacyQueryTermFilter; +using search::docsummary::LegacyQueryTermFilterFactory; using search::MatchingElements; using config::ConfigSnapshot; using vespa::config::search::SummaryConfig; @@ -148,20 +148,20 @@ VSMAdapter::configure(const VSMConfigSnapshot & snapshot) docsumTools->setJuniper(std::move(juniper)); // init keyword extractor - auto kwExtractor = std::make_unique<LegacyKeywordExtractor>(); - kwExtractor->addLegalIndexSpec(_highlightindexes.c_str()); - vespalib::string spec = kwExtractor->getLegalIndexSpec(); + auto query_term_filter = std::make_unique<LegacyQueryTermFilter>(); + query_term_filter->addLegalIndexSpec(_highlightindexes.c_str()); + vespalib::string spec = query_term_filter->getLegalIndexSpec(); LOG(debug, "index highlight spec: '%s'", spec.c_str()); // init result config auto resCfg = std::make_unique<ResultConfig>(); - std::unique_ptr<IKeywordExtractorFactory> keyword_extractor_factory = std::make_unique<LegacyKeywordExtractorFactory>(std::move(kwExtractor)); - auto docsum_field_writer_factory = std::make_unique<DocsumFieldWriterFactory>(summary.get()->usev8geopositions, *docsumTools, *keyword_extractor_factory, *_fieldsCfg.get()); + std::unique_ptr<IQueryTermFilterFactory> query_term_filter_factory = std::make_unique<LegacyQueryTermFilterFactory>(std::move(query_term_filter)); + auto docsum_field_writer_factory = std::make_unique<DocsumFieldWriterFactory>(summary.get()->usev8geopositions, *docsumTools, *query_term_filter_factory, *_fieldsCfg.get()); if ( !resCfg->readConfig(*summary.get(), _configId.c_str(), *docsum_field_writer_factory)) { throw std::runtime_error("(re-)configuration of VSM (docsum tools) failed due to bad summary config"); } docsum_field_writer_factory.reset(); - keyword_extractor_factory.reset(); + query_term_filter_factory.reset(); // create dynamic docsum writer auto writer = std::make_unique<DynamicDocsumWriter>(std::move(resCfg)); |